def test_transformer_equivalence(): N_NEIGHBORS = 15 EPSILON = 0.15 train = nn_data[:400] test = nn_data[:200] # Note we shift N_NEIGHBORS to conform to sklearn's KNeighborTransformer defn nnd = NNDescent(data=train, n_neighbors=N_NEIGHBORS + 1, random_state=42, compressed=False) indices, dists = nnd.query(test, k=N_NEIGHBORS, epsilon=EPSILON) sort_idx = np.argsort(indices, axis=1) indices_sorted = np.vstack( [indices[i, sort_idx[i]] for i in range(sort_idx.shape[0])]) dists_sorted = np.vstack( [dists[i, sort_idx[i]] for i in range(sort_idx.shape[0])]) # Note we shift N_NEIGHBORS to conform to sklearn' KNeighborTransformer defn transformer = PyNNDescentTransformer(n_neighbors=N_NEIGHBORS, search_epsilon=EPSILON, random_state=42).fit( train, compress_index=False) Xt = transformer.transform(test).sorted_indices() assert np.all(Xt.indices == indices_sorted.flatten()) assert np.allclose(Xt.data, dists_sorted.flat)
def to_graph(X,sigma,e,n_neighbors,similarity_matrix,knn_aprox,eps=1e-7): ''' Compute similarity matrix. return: similarity matrix ''' if type(X) == torch.Tensor: X = X.detach().to("cpu").numpy() if similarity_matrix == 'e-NG': A = radius_neighbors_graph(X, e, mode='connectivity',include_self=False, n_jobs=-1) return A elif similarity_matrix == 'full': pass elif similarity_matrix == 'precomputed': return A else: if knn_aprox: A = PyNNDescentTransformer(n_neighbors=n_neighbors,metric="euclidean",n_jobs=-1).fit_transform(X) else: A = kneighbors_graph(X, n_neighbors, mode='distance',include_self=False, n_jobs=-1) if sigma == 'max': sigma_2 = 2*np.power(A.max(axis=1).toarray(),2) + eps A = ( ( A.power(2,dtype=np.float32) ).multiply(1/sigma_2) ).tocsr() np.exp(-A.data,out=A.data) elif sigma == 'mean': sigma_2 = 2*np.power(A.sum(axis=1) / A.getnnz(axis=1).reshape(-1,1),2) + eps A = ( ( A.power(2,dtype=np.float32) ).multiply(1/sigma_2) ).tocsr() np.exp(-A.data,out=A.data) else: sigma_2 = 2*np.power(sigma,2) + eps A = ( ( A.power(2,dtype=np.float32) ).multiply(1/sigma_2) ).tocsr() np.exp(-A.data,out=A.data) if knn_aprox: A = A - sparse.identity(A.shape[0]) if similarity_matrix == 'k-hNNG': return (A + A.T)/2 if similarity_matrix == 'k-NNG': return A.maximum(A.T) if similarity_matrix == 'k-mNNG': return A.minimum(A.T)
def test_transformer_pickle_unpickle(): seed = np.random.RandomState(42) x1 = seed.normal(0, 100, (1000, 50)) x2 = seed.normal(0, 100, (1000, 50)) index1 = PyNNDescentTransformer(n_neighbors=10).fit(x1) result1 = index1.transform(x2) pickle.dump(index1, open("test_tmp.pkl", "wb")) index2 = pickle.load(open("test_tmp.pkl", "rb")) os.remove("test_tmp.pkl") result2 = index2.transform(x2) np.testing.assert_equal(result1.indices, result2.indices) np.testing.assert_equal(result1.data, result2.data)
def test_transformer_pickle_unpickle(): seed = np.random.RandomState(42) x1 = seed.normal(0, 100, (1000, 50)) x2 = seed.normal(0, 100, (1000, 50)) index1 = PyNNDescentTransformer(n_neighbors=10).fit(x1) result1 = index1.transform(x2) mem_temp = io.BytesIO() pickle.dump(index1, mem_temp) mem_temp.seek(0) index2 = pickle.load(mem_temp) result2 = index2.transform(x2) np.testing.assert_equal(result1.indices, result2.indices) np.testing.assert_equal(result1.data, result2.data)
def test_transformer_equivalence(): N_NEIGHBORS = 15 QUEUE_SIZE = 5.0 train = nn_data[:400] test = nn_data[:200] nnd = NNDescent(data=train, n_neighbors=N_NEIGHBORS, random_state=42) indices, dists = nnd.query(test, k=N_NEIGHBORS, queue_size=QUEUE_SIZE) sort_idx = np.argsort(indices, axis=1) indices_sorted = np.vstack( [indices[i, sort_idx[i]] for i in range(sort_idx.shape[0])] ) dists_sorted = np.vstack([dists[i, sort_idx[i]] for i in range(sort_idx.shape[0])]) transformer = PyNNDescentTransformer( n_neighbors=N_NEIGHBORS, search_queue_size=QUEUE_SIZE, random_state=42 ).fit(train) Xt = transformer.transform(test).sorted_indices() assert np.all(Xt.indices == indices_sorted.flat) assert np.allclose(Xt.data, dists_sorted.flat)
def test_transformer_output_when_verbose_is_false(): out = io.StringIO() with redirect_stdout(out): _ = PyNNDescentTransformer( n_neighbors=4, metric="euclidean", metric_kwds={}, random_state=np.random, n_trees=5, n_iters=2, verbose=False, ).fit_transform(spatial_data) output = out.getvalue().strip() assert_equal(len(output), 0)
def test_transformer_output_when_verbose_is_false(spatial_data, seed): out = io.StringIO() with redirect_stdout(out): _ = PyNNDescentTransformer( n_neighbors=4, metric="standardised_euclidean", metric_kwds={"sigma": np.ones(spatial_data.shape[1])}, random_state=np.random.RandomState(seed), n_trees=5, n_iters=2, verbose=False, ).fit_transform(spatial_data) output = out.getvalue().strip() assert len(output) == 0
def test_transformer_output_when_verbose_is_true(): out = io.StringIO() with redirect_stdout(out): _ = PyNNDescentTransformer( n_neighbors=4, metric="euclidean", metric_kwds={}, random_state=np.random, n_trees=5, n_iters=2, verbose=True, ).fit_transform(spatial_data) output = out.getvalue() assert_true(re.match("^.*5 trees", output, re.DOTALL)) assert_true(re.match("^.*2 iterations", output, re.DOTALL))