def testLearnInLocalCluster(self, *_): from mars.learn.neighbors import NearestNeighbors from sklearn.neighbors import NearestNeighbors as SkNearestNeighbors with new_cluster(scheduler_n_process=2, worker_n_process=3, shared_memory='20M') as cluster: rs = np.random.RandomState(0) raw_X = rs.rand(10, 5) raw_Y = rs.rand(8, 5) X = mt.tensor(raw_X, chunk_size=7) Y = mt.tensor(raw_Y, chunk_size=(5, 3)) nn = NearestNeighbors(n_neighbors=3) nn.fit(X) ret = nn.kneighbors(Y, session=cluster.session) snn = SkNearestNeighbors(n_neighbors=3) snn.fit(raw_X) expected = snn.kneighbors(raw_Y) result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_almost_equal(result[1], expected[1])
def test_faiss_query(setup, X, Y, metric): faiss_index = build_faiss_index(X, 'Flat', None, metric=metric, random_state=0) d, i = faiss_query(faiss_index, Y, 5, nprobe=10) distance, indices = fetch(*execute(d, i)) nn = NearestNeighbors(metric=metric) nn.fit(x) expected_distance, expected_indices = nn.kneighbors(y, 5) np.testing.assert_array_equal(indices, expected_indices.fetch()) np.testing.assert_almost_equal(distance, expected_distance.fetch(), decimal=4) # test other index X2 = X.astype(np.float64) Y2 = y.astype(np.float64) faiss_index = build_faiss_index(X2, 'PCAR6,IVF8_HNSW32,SQ8', 10, random_state=0, return_index_type='object') d, i = faiss_query(faiss_index, Y2, 5, nprobe=10) # test execute only execute(d, i)
def testFaissQuery(self): d = 8 n = 50 n_test = 10 x = np.random.RandomState(0).rand(n, d).astype(np.float32) y = np.random.RandomState(1).rand(n_test, d).astype(np.float32) test_tensors = [ # multi chunks (mt.tensor(x, chunk_size=(20, 5)), mt.tensor(y, chunk_size=5)), # one chunk (mt.tensor(x, chunk_size=50), mt.tensor(y, chunk_size=10)) ] for X, Y in test_tensors: for metric in ['l2', 'cosine']: faiss_index = build_faiss_index(X, 'Flat', None, metric=metric, random_state=0, return_index_type='object') d, i = faiss_query(faiss_index, Y, 5, nprobe=10) distance, indices = self.executor.execute_tensors([d, i]) nn = NearestNeighbors(metric=metric) nn.fit(x) expected_distance, expected_indices = nn.kneighbors(y, 5) np.testing.assert_array_equal(indices, expected_indices.fetch()) np.testing.assert_almost_equal(distance, expected_distance.fetch())
def testMarsKNN(self): client = self.odps.create_mars_cluster(1, 4, 8, name=str(uuid.uuid4()), scheduler_mem=12, scheduler_cpu=4) try: import numpy as np import mars.tensor as mt from mars.learn.neighbors import NearestNeighbors from sklearn.neighbors import NearestNeighbors as SkNearestNeighbors rs = np.random.RandomState(0) raw_X = rs.rand(10, 5) raw_Y = rs.rand(8, 5) X = mt.tensor(raw_X, chunk_size=7) Y = mt.tensor(raw_Y, chunk_size=(5, 3)) nn = NearestNeighbors(n_neighbors=3) nn.fit(X) ret = nn.kneighbors(Y) snn = SkNearestNeighbors(n_neighbors=3) snn.fit(raw_X) expected = snn.kneighbors(raw_Y) result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_almost_equal(result[1], expected[1]) finally: client.stop_server()
def testLearnInLocalCluster(self, *_): from mars.learn.cluster import KMeans from mars.learn.neighbors import NearestNeighbors from sklearn.cluster import KMeans as SK_KMEANS from sklearn.neighbors import NearestNeighbors as SkNearestNeighbors with new_cluster(scheduler_n_process=2, worker_n_process=3, shared_memory='20M') as cluster: rs = np.random.RandomState(0) raw_X = rs.rand(10, 5) raw_Y = rs.rand(8, 5) X = mt.tensor(raw_X, chunk_size=7) Y = mt.tensor(raw_Y, chunk_size=(5, 3)) nn = NearestNeighbors(n_neighbors=3) nn.fit(X) ret = nn.kneighbors(Y, session=cluster.session) snn = SkNearestNeighbors(n_neighbors=3) snn.fit(raw_X) expected = snn.kneighbors(raw_Y) result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_almost_equal(result[1], expected[1]) raw = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]]) X = mt.array(raw) kmeans = KMeans(n_clusters=2, random_state=0, init='k-means++').fit(X) sk_km_elkan = SK_KMEANS(n_clusters=2, random_state=0, init='k-means++').fit(raw) np.testing.assert_allclose(kmeans.cluster_centers_, sk_km_elkan.cluster_centers_)
def testFaissNearestNeighborsExecution(self): rs = np.random.RandomState(0) raw_X = rs.rand(10, 5) raw_Y = rs.rand(8, 5) # test faiss execution X = mt.tensor(raw_X, chunk_size=7) Y = mt.tensor(raw_Y, chunk_size=(5, 3)) nn = NearestNeighbors(n_neighbors=3, algorithm='faiss', metric='l2') nn.fit(X) ret = nn.kneighbors(Y) snn = SkNearestNeighbors(n_neighbors=3, algorithm='auto', metric='l2') snn.fit(raw_X) expected = snn.kneighbors(raw_Y) result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0], decimal=6) np.testing.assert_almost_equal(result[1], expected[1]) # test return_distance=False ret = nn.kneighbors(Y, return_distance=False) result = ret.fetch() np.testing.assert_almost_equal(result, expected[1]) # test y is x ret = nn.kneighbors() expected = snn.kneighbors() result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0], decimal=5) np.testing.assert_almost_equal(result[1], expected[1])
def testLearnInLocalCluster(self, *_): from mars.learn.neighbors import NearestNeighbors from sklearn.neighbors import NearestNeighbors as SkNearestNeighbors from mars.learn.metrics import roc_curve, auc from sklearn.metrics import roc_curve as sklearn_roc_curve, auc as sklearn_auc with new_cluster(scheduler_n_process=2, worker_n_process=3, shared_memory='20M') as cluster: rs = np.random.RandomState(0) raw_X = rs.rand(10, 5) raw_Y = rs.rand(8, 5) X = mt.tensor(raw_X, chunk_size=7) Y = mt.tensor(raw_Y, chunk_size=(5, 3)) nn = NearestNeighbors(n_neighbors=3) nn.fit(X) ret = nn.kneighbors(Y, session=cluster.session) snn = SkNearestNeighbors(n_neighbors=3) snn.fit(raw_X) expected = snn.kneighbors(raw_Y) result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_almost_equal(result[1], expected[1]) rs = np.random.RandomState(0) raw = pd.DataFrame({ 'a': rs.randint(0, 10, (10, )), 'b': rs.rand(10) }) df = md.DataFrame(raw) y = df['a'].to_tensor().astype('int') pred = df['b'].to_tensor().astype('float') fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2) m = auc(fpr, tpr) sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve( raw['a'].to_numpy().astype('int'), raw['b'].to_numpy().astype('float'), pos_label=2) expect_m = sklearn_auc(sk_fpr, sk_tpr) self.assertAlmostEqual(m.fetch(), expect_m)
def testAutoIndex(self): d = 8 n = 50 n_test = 10 x = np.random.RandomState(0).rand(n, d).astype(np.float32) y = np.random.RandomState(1).rand(n_test, d).astype(np.float32) for chunk_size in (50, 20): X = mt.tensor(x, chunk_size=chunk_size) faiss_index = build_faiss_index(X, random_state=0, return_index_type='object') d, i = faiss_query(faiss_index, y, 5, nprobe=10) indices = self.executor.execute_tensor(i, concat=True)[0] nn = NearestNeighbors() nn.fit(x) expected_indices = nn.kneighbors(y, 5, return_distance=False) np.testing.assert_array_equal(indices, expected_indices)
def testGPUFaissNearestNeighborsExecution(self): rs = np.random.RandomState(0) raw_X = rs.rand(10, 5) raw_Y = rs.rand(8, 5) # test faiss execution X = mt.tensor(raw_X, chunk_size=7).to_gpu() Y = mt.tensor(raw_Y, chunk_size=8).to_gpu() nn = NearestNeighbors(n_neighbors=3, algorithm='faiss', metric='l2') nn.fit(X) ret = nn.kneighbors(Y) snn = SkNearestNeighbors(n_neighbors=3, algorithm='auto', metric='l2') snn.fit(raw_X) expected = snn.kneighbors(raw_Y) result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0].get(), expected[0], decimal=6) np.testing.assert_almost_equal(result[1].get(), expected[1])
def test_manual_build_faiss_index(setup): d = 8 n = 50 n_test = 10 x = np.random.RandomState(0).rand(n, d).astype(np.float32) y = np.random.RandomState(0).rand(n_test, d).astype(np.float32) nn = NearestNeighbors(algorithm='kd_tree') nn.fit(x) _, expected_indices = nn.kneighbors(y, 5) # test brute-force search X = mt.tensor(x, chunk_size=10) index = build_faiss_index(X, 'Flat', None, random_state=0, same_distribution=True) faiss_index = index.execute().fetch() index_shards = faiss.IndexShards(d) for ind in faiss_index: shard = _load_index(ind, -1) index_shards.add_shard(shard) faiss_index = index_shards faiss_index.nprob = 10 _, indices = faiss_index.search(y, k=5) np.testing.assert_array_equal(indices, expected_indices.fetch()) # test one chunk, brute force X = mt.tensor(x, chunk_size=50) index = build_faiss_index(X, 'Flat', None, random_state=0, same_distribution=True) faiss_index = _load_index(index.execute().fetch(), -1) faiss_index.nprob = 10 _, indices = faiss_index.search(y, k=5) np.testing.assert_array_equal(indices, expected_indices.fetch()) # test train, same distribution X = mt.tensor(x, chunk_size=10) index = build_faiss_index(X, 'IVF30,Flat', 30, random_state=0, same_distribution=True) faiss_index = _load_index(index.execute().fetch(), -1) assert isinstance(faiss_index, faiss.IndexIVFFlat) assert faiss_index.ntotal == n assert len(tile(index).chunks) == 1 # test train, distributions are variant X = mt.tensor(x, chunk_size=10) index = build_faiss_index(X, 'IVF10,Flat', None, random_state=0, same_distribution=False) faiss_index = index.execute().fetch() assert len(faiss_index) == 5 for ind in faiss_index: ind = _load_index(ind, -1) assert isinstance(ind, faiss.IndexIVFFlat) assert ind.ntotal == 10 # test more index type index = build_faiss_index(X, 'PCAR6,IVF8_HNSW32,SQ8', 10, random_state=0) faiss_index = index.execute().fetch() assert len(faiss_index) == 5 for ind in faiss_index: ind = _load_index(ind, -1) assert isinstance(ind, faiss.IndexPreTransform) assert ind.ntotal == 10 # test one chunk, train X = mt.tensor(x, chunk_size=50) index = build_faiss_index(X, 'IVF30,Flat', 30, random_state=0, same_distribution=True) faiss_index = _load_index(index.execute().fetch(), -1) assert isinstance(faiss_index, faiss.IndexIVFFlat) assert faiss_index.ntotal == n # test wrong index with pytest.raises(ValueError): build_faiss_index(X, 'unknown_index', None) # test unknown metric with pytest.raises(ValueError): build_faiss_index(X, 'Flat', None, metric='unknown_metric')
def testNearestNeighbors(self): rs = np.random.RandomState(0) raw_X = rs.rand(10, 5) raw_Y = rs.rand(8, 5) X = mt.tensor(raw_X) Y = mt.tensor(raw_Y) raw_sparse_x = sps.random(10, 5, density=0.5, format='csr', random_state=rs) raw_sparse_y = sps.random(8, 5, density=0.4, format='csr', random_state=rs) X_sparse = mt.tensor(raw_sparse_x) Y_sparse = mt.tensor(raw_sparse_y) metric_func = lambda u, v: np.sqrt(((u - v)**2).sum()) _ = NearestNeighbors(algorithm='auto', metric='precomputed', metric_params={}) with self.assertRaises(ValueError): _ = NearestNeighbors(algorithm='unknown') with self.assertRaises(ValueError): _ = NearestNeighbors(algorithm='kd_tree', metric=metric_func) with self.assertRaises(ValueError): _ = NearestNeighbors(algorithm='auto', metric='unknown') assert_warns(SyntaxWarning, NearestNeighbors, metric_params={'p': 1}) with self.assertRaises(ValueError): _ = NearestNeighbors(metric='wminkowski', p=0) with self.assertRaises(ValueError): _ = NearestNeighbors(algorithm='auto', metric='minkowski', p=0) nn = NearestNeighbors(algorithm='auto', metric='minkowski', p=1) nn.fit(X) self.assertEqual(nn.effective_metric_, 'manhattan') nn = NearestNeighbors(algorithm='auto', metric='minkowski', p=2) nn.fit(X) self.assertEqual(nn.effective_metric_, 'euclidean') nn = NearestNeighbors(algorithm='auto', metric='minkowski', p=np.inf) nn.fit(X) self.assertEqual(nn.effective_metric_, 'chebyshev') nn2 = NearestNeighbors(algorithm='auto', metric='minkowski') nn2.fit(nn) self.assertEqual(nn2._fit_method, nn._fit_method) nn = NearestNeighbors(algorithm='auto', metric='minkowski') ball_tree = SkBallTree(raw_X) nn.fit(ball_tree) self.assertEqual(nn._fit_method, 'ball_tree') nn = NearestNeighbors(algorithm='auto', metric='minkowski') kd_tree = SkKDTree(raw_X) nn.fit(kd_tree) self.assertEqual(nn._fit_method, 'kd_tree') with self.assertRaises(ValueError): nn = NearestNeighbors() nn.fit(np.random.rand(0, 10)) nn = NearestNeighbors(algorithm='ball_tree') assert_warns(UserWarning, nn.fit, X_sparse) nn = NearestNeighbors(metric='haversine') with self.assertRaises(ValueError): nn.fit(X_sparse) nn = NearestNeighbors(metric=metric_func, n_neighbors=1) nn.fit(X) self.assertEqual(nn._fit_method, 'ball_tree') nn = NearestNeighbors(metric='sqeuclidean', n_neighbors=1) nn.fit(X) self.assertEqual(nn._fit_method, 'brute') with self.assertRaises(ValueError): nn = NearestNeighbors(n_neighbors=-1) nn.fit(X) with self.assertRaises(TypeError): nn = NearestNeighbors(n_neighbors=1.3) nn.fit(X) nn = NearestNeighbors() nn.fit(X) with self.assertRaises(ValueError): nn.kneighbors(Y, n_neighbors=-1) with self.assertRaises(TypeError): nn.kneighbors(Y, n_neighbors=1.3) with self.assertRaises(ValueError): nn.kneighbors(Y, n_neighbors=11) nn = NearestNeighbors(algorithm='ball_tree') nn.fit(X) with self.assertRaises(ValueError): nn.kneighbors(Y_sparse)
def testNearestNeighborsExecution(self): rs = np.random.RandomState(0) raw_X = rs.rand(10, 5) raw_Y = rs.rand(8, 5) X = mt.tensor(raw_X, chunk_size=7) Y = mt.tensor(raw_Y, chunk_size=(5, 3)) for algo in ['brute', 'ball_tree', 'kd_tree', 'auto']: for metric in ['minkowski', 'manhattan']: nn = NearestNeighbors(n_neighbors=3, algorithm=algo, metric=metric) nn.fit(X) ret = nn.kneighbors(Y) snn = SkNearestNeighbors(n_neighbors=3, algorithm=algo, metric=metric) snn.fit(raw_X) expected = snn.kneighbors(raw_Y) result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_almost_equal(result[1], expected[1]) if nn._tree is not None: self.assertIsInstance(nn._tree.fetch(), type(snn._tree)) # test return_distance=False ret = nn.kneighbors(Y, return_distance=False) result = ret.fetch() np.testing.assert_almost_equal(result, expected[1]) # test y is x ret = nn.kneighbors() expected = snn.kneighbors() result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_almost_equal(result[1], expected[1]) # test y is x, and return_distance=False ret = nn.kneighbors(return_distance=False) result = ret.fetch() np.testing.assert_almost_equal(result, expected[1]) # test callable metric metric = lambda u, v: np.sqrt(((u - v)**2).sum()) for algo in ['brute', 'ball_tree']: nn = NearestNeighbors(n_neighbors=3, algorithm=algo, metric=metric) nn.fit(X) ret = nn.kneighbors(Y) snn = SkNearestNeighbors(n_neighbors=3, algorithm=algo, metric=metric) snn.fit(raw_X) expected = snn.kneighbors(raw_Y) result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_almost_equal(result[1], expected[1]) # test sparse raw_sparse_x = sps.random(10, 5, density=0.5, format='csr', random_state=rs) raw_sparse_y = sps.random(8, 5, density=0.4, format='csr', random_state=rs) X = mt.tensor(raw_sparse_x, chunk_size=7) Y = mt.tensor(raw_sparse_y, chunk_size=5) nn = NearestNeighbors(n_neighbors=3) nn.fit(X) ret = nn.kneighbors(Y) snn = SkNearestNeighbors(n_neighbors=3) snn.fit(raw_sparse_x) expected = snn.kneighbors(raw_sparse_y) result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_almost_equal(result[1], expected[1]) # test input with unknown shape X = mt.tensor(raw_X, chunk_size=7) X = X[X[:, 0] > 0.1] Y = mt.tensor(raw_Y, chunk_size=(5, 3)) Y = Y[Y[:, 0] > 0.1] nn = NearestNeighbors(n_neighbors=3) nn.fit(X) ret = nn.kneighbors(Y) x2 = raw_X[raw_X[:, 0] > 0.1] y2 = raw_Y[raw_Y[:, 0] > 0.1] snn = SkNearestNeighbors(n_neighbors=3) snn.fit(x2) expected = snn.kneighbors(y2) result = ret.fetch() self.assertEqual(nn._fit_method, snn._fit_method) np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_almost_equal(result[1], expected[1]) # test serialization graph = ret[0].build_graph() self.assertEqual(len(graph.from_pb(graph.to_pb())), len(graph)) self.assertEqual(len(graph.from_json(graph.to_json())), len(graph)) # test fit a sklearn tree nn = NearestNeighbors(n_neighbors=3) nn.fit(snn._tree) ret = nn.kneighbors(Y) result = ret.fetch() self.assertEqual(nn._fit_method, snn._fit_method) np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_almost_equal(result[1], expected[1]) # test serialization graph = ret[0].build_graph() self.assertEqual(len(graph.from_pb(graph.to_pb())), len(graph)) self.assertEqual(len(graph.from_json(graph.to_json())), len(graph))
def testManualBuildFaissIndex(self): d = 8 n = 50 n_test = 10 x = np.random.RandomState(0).rand(n, d).astype(np.float32) y = np.random.RandomState(0).rand(n_test, d).astype(np.float32) nn = NearestNeighbors(algorithm='kd_tree') nn.fit(x) _, expected_indices = nn.kneighbors(y, 5) for index_type in ['object', 'filename', 'bytes']: # test brute-force search X = mt.tensor(x, chunk_size=10) index = build_faiss_index(X, 'Flat', None, random_state=0, same_distribution=True, return_index_type=index_type) faiss_index = self.executor.execute_tileable(index) index_shards = faiss.IndexShards(d) for ind in faiss_index: shard = _load_index(None, index.op, ind, -1) index_shards.add_shard(shard) faiss_index = index_shards faiss_index.nprob = 10 _, indices = faiss_index.search(y, k=5) np.testing.assert_array_equal(indices, expected_indices.fetch()) # test one chunk, brute force X = mt.tensor(x, chunk_size=50) index = build_faiss_index(X, 'Flat', None, random_state=0, same_distribution=True, return_index_type='object') faiss_index = self.executor.execute_tileable(index)[0] faiss_index.nprob = 10 _, indices = faiss_index.search(y, k=5) np.testing.assert_array_equal(indices, expected_indices.fetch()) # test train, same distribution X = mt.tensor(x, chunk_size=10) index = build_faiss_index(X, 'IVF30,Flat', 30, random_state=0, same_distribution=True, return_index_type='object') faiss_index = self.executor.execute_tileable(index)[0] self.assertIsInstance(faiss_index, faiss.IndexIVFFlat) self.assertEqual(faiss_index.ntotal, n) self.assertEqual(len(get_tiled(index).chunks), 1) # test train, distributions are variant X = mt.tensor(x, chunk_size=10) index = build_faiss_index(X, 'IVF10,Flat', None, random_state=0, same_distribution=False, return_index_type='object') faiss_index = self.executor.execute_tileable(index) self.assertEqual(len(faiss_index), 5) for ind in faiss_index: self.assertIsInstance(ind, faiss.IndexIVFFlat) self.assertEqual(ind.ntotal, 10) # test one chunk, train X = mt.tensor(x, chunk_size=50) index = build_faiss_index(X, 'IVF30,Flat', 30, random_state=0, same_distribution=True, return_index_type='object') faiss_index = self.executor.execute_tileable(index)[0] self.assertIsInstance(faiss_index, faiss.IndexIVFFlat) self.assertEqual(faiss_index.ntotal, n) # test wrong index with self.assertRaises(ValueError): build_faiss_index(X, 'unknown_index', None) # test unknown metric with self.assertRaises(ValueError): build_faiss_index(X, 'Flat', None, metric='unknown_metric')
def testNearestNeighborsExecution(self): rs = np.random.RandomState(0) raw_X = rs.rand(10, 5) raw_Y = rs.rand(8, 5) X = mt.tensor(raw_X, chunk_size=7) Y = mt.tensor(raw_Y, chunk_size=(5, 3)) for algo in ['brute', 'ball_tree', 'kd_tree', 'auto']: for metric in ['minkowski', 'manhattan']: nn = NearestNeighbors(n_neighbors=3, algorithm=algo, metric=metric) nn.fit(X) ret = nn.kneighbors(Y) snn = SkNearestNeighbors(n_neighbors=3, algorithm=algo, metric=metric) snn.fit(raw_X) expected = snn.kneighbors(raw_Y) result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_almost_equal(result[1], expected[1]) # test return_distance=False ret = nn.kneighbors(Y, return_distance=False) result = ret.fetch() np.testing.assert_almost_equal(result, expected[1]) # test y is x ret = nn.kneighbors() expected = snn.kneighbors() result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_almost_equal(result[1], expected[1]) # test y is x, and return_distance=False ret = nn.kneighbors(return_distance=False) result = ret.fetch() np.testing.assert_almost_equal(result, expected[1]) # test callable metric metric = lambda u, v: np.sqrt(((u - v)**2).sum()) for algo in ['brute', 'ball_tree']: nn = NearestNeighbors(n_neighbors=3, algorithm=algo, metric=metric) nn.fit(X) ret = nn.kneighbors(Y) snn = SkNearestNeighbors(n_neighbors=3, algorithm=algo, metric=metric) snn.fit(raw_X) expected = snn.kneighbors(raw_Y) result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_almost_equal(result[1], expected[1]) # test sparse raw_sparse_x = sps.random(10, 5, density=0.5, format='csr', random_state=rs) raw_sparse_y = sps.random(8, 5, density=0.4, format='csr', random_state=rs) X = mt.tensor(raw_sparse_x, chunk_size=7) Y = mt.tensor(raw_sparse_y, chunk_size=5) nn = NearestNeighbors(n_neighbors=3) nn.fit(X) ret = nn.kneighbors(Y) snn = SkNearestNeighbors(n_neighbors=3) snn.fit(raw_sparse_x) expected = snn.kneighbors(raw_sparse_y) result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_almost_equal(result[1], expected[1])