def test_transformer_result(): # Test the number of neighbors returned n_neighbors = 5 n_samples_fit = 20 n_queries = 18 n_features = 10 rng = np.random.RandomState(42) X = rng.randn(n_samples_fit, n_features) X2 = rng.randn(n_queries, n_features) radius = np.percentile(euclidean_distances(X), 10) # with n_neighbors for mode in ["distance", "connectivity"]: add_one = mode == "distance" nnt = KNeighborsTransformer(n_neighbors=n_neighbors, mode=mode) Xt = nnt.fit_transform(X) assert Xt.shape == (n_samples_fit, n_samples_fit) assert Xt.data.shape == (n_samples_fit * (n_neighbors + add_one), ) assert Xt.format == "csr" assert _is_sorted_by_data(Xt) X2t = nnt.transform(X2) assert X2t.shape == (n_queries, n_samples_fit) assert X2t.data.shape == (n_queries * (n_neighbors + add_one), ) assert X2t.format == "csr" assert _is_sorted_by_data(X2t) # with radius for mode in ["distance", "connectivity"]: add_one = mode == "distance" nnt = RadiusNeighborsTransformer(radius=radius, mode=mode) Xt = nnt.fit_transform(X) assert Xt.shape == (n_samples_fit, n_samples_fit) assert not Xt.data.shape == (n_samples_fit * (n_neighbors + add_one), ) assert Xt.format == "csr" assert _is_sorted_by_data(Xt) X2t = nnt.transform(X2) assert X2t.shape == (n_queries, n_samples_fit) assert not X2t.data.shape == (n_queries * (n_neighbors + add_one), ) assert X2t.format == "csr" assert _is_sorted_by_data(X2t)
def test_transformers(): """Test that AnnoyTransformer and KNeighborsTransformer give same results""" X = np.random.RandomState(42).randn(10, 2) knn = KNeighborsTransformer() Xt0 = knn.fit_transform(X) ann = AnnoyTransformer() Xt1 = ann.fit_transform(X) nms = NMSlibTransformer() Xt2 = nms.fit_transform(X) assert_array_almost_equal(Xt0.toarray(), Xt1.toarray(), decimal=5) assert_array_almost_equal(Xt0.toarray(), Xt2.toarray(), decimal=5)
def get_kNN_score_torch(pairwise_distances, matching_matrix, n_neighbours=5): # The score shows how the collection of persistent landscapes, corresponding to each label, # are separeted from each other, in the sense of L2 distance in the Hilbert space of persistent landscape # pairwise_distances - torch tensor of the shape (n_samples, n_samples). # mathcing_matrix - numpy array of the shape (n_samples, n_samples). 1 if samples have the same label, 0 otherwise # n_neighbours - integer, number of nearest used to calculate the score # returns kNN_score - real number between 0 and 1 n_samples = pairwise_distances.size()[0] kNN_transformer = KNeighborsTransformer(mode='connectivity', metric='precomputed', n_neighbors=n_neighbours) connectivity_matrix = kNN_transformer.fit_transform(pairwise_distances.numpy()).toarray() #if(matching_matrix == 0): # matching_matrix = labels.numpy()[:, np.newaxis] == labels.numpy()[np.newaxis, :] kNN_score = (np.sum(matching_matrix * connectivity_matrix) - n_samples) / (np.sum(connectivity_matrix) - n_samples) return kNN_score
def test_explicit_diagonal(): # Test that the diagonal is explicitly stored in the sparse graph n_neighbors = 5 n_samples_fit, n_samples_transform, n_features = 20, 18, 10 rng = np.random.RandomState(42) X = rng.randn(n_samples_fit, n_features) X2 = rng.randn(n_samples_transform, n_features) nnt = KNeighborsTransformer(n_neighbors=n_neighbors) Xt = nnt.fit_transform(X) assert _has_explicit_diagonal(Xt) assert np.all(Xt.data.reshape(n_samples_fit, n_neighbors + 1)[:, 0] == 0) Xt = nnt.transform(X) assert _has_explicit_diagonal(Xt) assert np.all(Xt.data.reshape(n_samples_fit, n_neighbors + 1)[:, 0] == 0) # Using transform on new data should not always have zero diagonal X2t = nnt.transform(X2) assert not _has_explicit_diagonal(X2t)
def convert2graph(components): knn = KNeighborsTransformer(n_neighbors=10, n_jobs=-1) graph = knn.fit_transform(components) G = nx.Graph(graph) return G