def test_generate_knntriplets(k_genuine, k_impostor): """Checks edge and over the edge cases of knn triplet construction with not enough neighbors""" T_test = [[0, 1, 3], [0, 1, 4], [0, 1, 5], [0, 2, 3], [0, 2, 4], [0, 2, 5], [1, 0, 3], [1, 0, 4], [1, 0, 5], [1, 2, 3], [1, 2, 4], [1, 2, 5], [2, 0, 3], [2, 0, 4], [2, 0, 5], [2, 1, 3], [2, 1, 4], [2, 1, 5], [3, 4, 0], [3, 4, 1], [3, 4, 2], [3, 5, 0], [3, 5, 1], [3, 5, 2], [4, 3, 0], [4, 3, 1], [4, 3, 2], [4, 5, 0], [4, 5, 1], [4, 5, 2], [5, 3, 0], [5, 3, 1], [5, 3, 2], [5, 4, 0], [5, 4, 1], [5, 4, 2]] X = np.array([[0, 0], [2, 2], [4, 4], [8, 8], [16, 16], [32, 32], [33, 33]]) y = np.array([1, 1, 1, 2, 2, 2, -1]) msg1 = ("The class 1 has 3 elements, which is not sufficient to " f"generate {k_genuine+1} genuine neighbors " "as specified by k_genuine") msg2 = ("The class 2 has 3 elements, which is not sufficient to " f"generate {k_genuine+1} genuine neighbors " "as specified by k_genuine") msg3 = ("The class 1 has 3 elements of other classes, which is " f"not sufficient to generate {k_impostor} impostor " "neighbors as specified by k_impostor") msg4 = ("The class 2 has 3 elements of other classes, which is " f"not sufficient to generate {k_impostor} impostor " "neighbors as specified by k_impostor") msgs = [msg1, msg2, msg3, msg4] with pytest.warns(UserWarning) as user_warning: T = Constraints(y).generate_knntriplets(X, k_genuine, k_impostor) assert any([[msg in str(warn.message) for msg in msgs] for warn in user_warning]) assert np.array_equal(sorted(T.tolist()), T_test)
def test_unknown_labels_not_in_chunks(num_chunks, chunk_size): """Checks that unknown labels are not assigned to any chunk.""" labels = gen_labels_for_chunks(num_chunks, chunk_size) constraints = Constraints(labels) chunks = constraints.chunks(num_chunks=num_chunks, chunk_size=chunk_size, random_state=SEED) assert np.all(chunks[labels < 0] < 0)
def test_generate_knntriplets_under_edge(k_genuine, k_impostor, T_test): """Checks under the edge cases of knn triplet construction with enough neighbors""" X = np.array([[0, 0], [2, 2], [4, 4], [8, 8], [16, 16], [32, 32], [33, 33]]) y = np.array([1, 1, 1, 2, 2, 2, -1]) T = Constraints(y).generate_knntriplets(X, k_genuine, k_impostor) assert np.array_equal(sorted(T.tolist()), T_test)
def test_exact_num_points_for_chunks(num_chunks, chunk_size): """Checks that the chunk generation works well with just enough points.""" labels = gen_labels_for_chunks(num_chunks, chunk_size) constraints = Constraints(labels) chunks = constraints.chunks(num_chunks=num_chunks, chunk_size=chunk_size, random_state=SEED) chunk_no, size_each_chunk = np.unique(chunks, return_counts=True) np.testing.assert_array_equal(size_each_chunk, chunk_size) assert chunk_no.shape[0] == num_chunks
def test_triplet_diffs(self, n_samples, n_features, n_classes): X, y = make_classification(n_samples=n_samples, n_classes=n_classes, n_features=n_features, n_informative=n_features, n_redundant=0, n_repeated=0) X = StandardScaler().fit_transform(X) model = SCML_Supervised() constraints = Constraints(y) triplets = constraints.generate_knntriplets(X, model.k_genuine, model.k_impostor) basis, n_basis = model._generate_bases_dist_diff(triplets, X) expected_n_basis = n_features * 80 assert n_basis == expected_n_basis assert basis.shape == (expected_n_basis, n_features)
def test_chunk_case_one_miss_point(num_chunks, chunk_size): """Checks that the chunk generation breaks when one point is missing.""" labels = gen_labels_for_chunks(num_chunks, chunk_size) assert len(labels) >= 1 constraints = Constraints(labels[1:]) with pytest.raises(ValueError) as e: constraints.chunks(num_chunks=num_chunks, chunk_size=chunk_size, random_state=SEED) expected_message = (('Not enough possible chunks of %d elements in each' ' class to form expected %d chunks - maximum number' ' of chunks is %d' ) % (chunk_size, num_chunks, num_chunks - 1)) assert str(e.value) == expected_message
def test_generate_knntriplets(k_genuine, k_impostor): """Checks edge and over the edge cases of knn triplet construction with not enough neighbors""" T_test = [[0, 1, 3], [0, 1, 4], [0, 1, 5], [0, 2, 3], [0, 2, 4], [0, 2, 5], [1, 0, 3], [1, 0, 4], [1, 0, 5], [1, 2, 3], [1, 2, 4], [1, 2, 5], [2, 0, 3], [2, 0, 4], [2, 0, 5], [2, 1, 3], [2, 1, 4], [2, 1, 5], [3, 4, 0], [3, 4, 1], [3, 4, 2], [3, 5, 0], [3, 5, 1], [3, 5, 2], [4, 3, 0], [4, 3, 1], [4, 3, 2], [4, 5, 0], [4, 5, 1], [4, 5, 2], [5, 3, 0], [5, 3, 1], [5, 3, 2], [5, 4, 0], [5, 4, 1], [5, 4, 2]] X = np.array([[0, 0], [2, 2], [4, 4], [8, 8], [16, 16], [32, 32], [33, 33]]) y = np.array([1, 1, 1, 2, 2, 2, -1]) T = Constraints(y).generate_knntriplets(X, k_genuine, k_impostor) assert np.array_equal(sorted(T.tolist()), T_test)
def test_triplet_diffs(self, n_samples, n_features, n_classes): """ Test that the correct value of n_basis is being generated with different triplet constraints. """ X, y = make_classification(n_samples=n_samples, n_classes=n_classes, n_features=n_features, n_informative=n_features, n_redundant=0, n_repeated=0) X = StandardScaler().fit_transform(X) model = SCML_Supervised(n_basis=None) # Explicit n_basis=None constraints = Constraints(y) triplets = constraints.generate_knntriplets(X, model.k_genuine, model.k_impostor) msg = "As no value for `n_basis` was selected, " with pytest.warns(UserWarning) as raised_warning: basis, n_basis = model._generate_bases_dist_diff(triplets, X) assert msg in str(raised_warning[0].message) expected_n_basis = n_features * 80 assert n_basis == expected_n_basis assert basis.shape == (expected_n_basis, n_features)
def test_generate_knntriplets_k_genuine(): """Checks the correct error raised when k_genuine is too big """ X, y = shuffle(*make_blobs(random_state=SEED), random_state=SEED) label, labels_count = np.unique(y, return_counts=True) labels_count_min = np.min(labels_count) idx_smallest_label, = np.where(labels_count == labels_count_min) k_genuine = labels_count_min warn_msgs = [] for idx in idx_smallest_label: warn_msgs.append( "The class {} has {} elements, which is not sufficient " "to generate {} genuine neighbors as specified by " "k_genuine. Will generate {} genuine neighbors instead." "\n".format(label[idx], k_genuine, k_genuine + 1, k_genuine - 1)) with pytest.warns(UserWarning) as raised_warning: Constraints(y).generate_knntriplets(X, k_genuine, 1) for warn in raised_warning: assert str(warn.message) in warn_msgs
def test_generate_knntriplets_k_impostor(): """Checks the correct error raised when k_impostor is too big """ X, y = shuffle(*make_blobs(random_state=SEED), random_state=SEED) length = len(y) label, labels_count = np.unique(y, return_counts=True) labels_count_max = np.max(labels_count) idx_biggest_label, = np.where(labels_count == labels_count_max) k_impostor = length - labels_count_max + 1 warn_msgs = [] for idx in idx_biggest_label: warn_msgs.append( "The class {} has {} elements of other classes, which is" " not sufficient to generate {} impostor neighbors as " "specified by k_impostor. Will generate {} impostor " "neighbors instead.\n".format(label[idx], k_impostor - 1, k_impostor, k_impostor - 1)) with pytest.warns(UserWarning) as raised_warning: Constraints(y).generate_knntriplets(X, 1, k_impostor) for warn in raised_warning: assert str(warn.message) in warn_msgs