def test_faiss_query(setup, X, Y, metric): faiss_index = build_faiss_index(X, 'Flat', None, metric=metric, random_state=0) d, i = faiss_query(faiss_index, Y, 5, nprobe=10) distance, indices = fetch(*execute(d, i)) nn = NearestNeighbors(metric=metric) nn.fit(x) expected_distance, expected_indices = nn.kneighbors(y, 5) np.testing.assert_array_equal(indices, expected_indices.fetch()) np.testing.assert_almost_equal(distance, expected_distance.fetch(), decimal=4) # test other index X2 = X.astype(np.float64) Y2 = y.astype(np.float64) faiss_index = build_faiss_index(X2, 'PCAR6,IVF8_HNSW32,SQ8', 10, random_state=0, return_index_type='object') d, i = faiss_query(faiss_index, Y2, 5, nprobe=10) # test execute only execute(d, i)
def testFaissQuery(self): d = 8 n = 50 n_test = 10 x = np.random.RandomState(0).rand(n, d).astype(np.float32) y = np.random.RandomState(1).rand(n_test, d).astype(np.float32) test_tensors = [ # multi chunks (mt.tensor(x, chunk_size=(20, 5)), mt.tensor(y, chunk_size=5)), # one chunk (mt.tensor(x, chunk_size=50), mt.tensor(y, chunk_size=10)) ] for X, Y in test_tensors: for metric in ['l2', 'cosine']: faiss_index = build_faiss_index(X, 'Flat', None, metric=metric, random_state=0, return_index_type='object') d, i = faiss_query(faiss_index, Y, 5, nprobe=10) distance, indices = self.executor.execute_tensors([d, i]) nn = NearestNeighbors(metric=metric) nn.fit(x) expected_distance, expected_indices = nn.kneighbors(y, 5) np.testing.assert_array_equal(indices, expected_indices.fetch()) np.testing.assert_almost_equal(distance, expected_distance.fetch())
def testAutoIndex(self): d = 8 n = 50 n_test = 10 x = np.random.RandomState(0).rand(n, d).astype(np.float32) y = np.random.RandomState(1).rand(n_test, d).astype(np.float32) for chunk_size in (50, 20): X = mt.tensor(x, chunk_size=chunk_size) faiss_index = build_faiss_index(X, random_state=0, return_index_type='object') d, i = faiss_query(faiss_index, y, 5, nprobe=10) indices = self.executor.execute_tensor(i, concat=True)[0] nn = NearestNeighbors() nn.fit(x) expected_indices = nn.kneighbors(y, 5, return_distance=False) np.testing.assert_array_equal(indices, expected_indices)
def test_manual_build_faiss_index(setup): d = 8 n = 50 n_test = 10 x = np.random.RandomState(0).rand(n, d).astype(np.float32) y = np.random.RandomState(0).rand(n_test, d).astype(np.float32) nn = NearestNeighbors(algorithm='kd_tree') nn.fit(x) _, expected_indices = nn.kneighbors(y, 5) # test brute-force search X = mt.tensor(x, chunk_size=10) index = build_faiss_index(X, 'Flat', None, random_state=0, same_distribution=True) faiss_index = index.execute().fetch() index_shards = faiss.IndexShards(d) for ind in faiss_index: shard = _load_index(ind, -1) index_shards.add_shard(shard) faiss_index = index_shards faiss_index.nprob = 10 _, indices = faiss_index.search(y, k=5) np.testing.assert_array_equal(indices, expected_indices.fetch()) # test one chunk, brute force X = mt.tensor(x, chunk_size=50) index = build_faiss_index(X, 'Flat', None, random_state=0, same_distribution=True) faiss_index = _load_index(index.execute().fetch(), -1) faiss_index.nprob = 10 _, indices = faiss_index.search(y, k=5) np.testing.assert_array_equal(indices, expected_indices.fetch()) # test train, same distribution X = mt.tensor(x, chunk_size=10) index = build_faiss_index(X, 'IVF30,Flat', 30, random_state=0, same_distribution=True) faiss_index = _load_index(index.execute().fetch(), -1) assert isinstance(faiss_index, faiss.IndexIVFFlat) assert faiss_index.ntotal == n assert len(tile(index).chunks) == 1 # test train, distributions are variant X = mt.tensor(x, chunk_size=10) index = build_faiss_index(X, 'IVF10,Flat', None, random_state=0, same_distribution=False) faiss_index = index.execute().fetch() assert len(faiss_index) == 5 for ind in faiss_index: ind = _load_index(ind, -1) assert isinstance(ind, faiss.IndexIVFFlat) assert ind.ntotal == 10 # test more index type index = build_faiss_index(X, 'PCAR6,IVF8_HNSW32,SQ8', 10, random_state=0) faiss_index = index.execute().fetch() assert len(faiss_index) == 5 for ind in faiss_index: ind = _load_index(ind, -1) assert isinstance(ind, faiss.IndexPreTransform) assert ind.ntotal == 10 # test one chunk, train X = mt.tensor(x, chunk_size=50) index = build_faiss_index(X, 'IVF30,Flat', 30, random_state=0, same_distribution=True) faiss_index = _load_index(index.execute().fetch(), -1) assert isinstance(faiss_index, faiss.IndexIVFFlat) assert faiss_index.ntotal == n # test wrong index with pytest.raises(ValueError): build_faiss_index(X, 'unknown_index', None) # test unknown metric with pytest.raises(ValueError): build_faiss_index(X, 'Flat', None, metric='unknown_metric')
def testManualBuildFaissIndex(self): d = 8 n = 50 n_test = 10 x = np.random.RandomState(0).rand(n, d).astype(np.float32) y = np.random.RandomState(0).rand(n_test, d).astype(np.float32) nn = NearestNeighbors(algorithm='kd_tree') nn.fit(x) _, expected_indices = nn.kneighbors(y, 5) for index_type in ['object', 'filename', 'bytes']: # test brute-force search X = mt.tensor(x, chunk_size=10) index = build_faiss_index(X, 'Flat', None, random_state=0, same_distribution=True, return_index_type=index_type) faiss_index = self.executor.execute_tileable(index) index_shards = faiss.IndexShards(d) for ind in faiss_index: shard = _load_index(None, index.op, ind, -1) index_shards.add_shard(shard) faiss_index = index_shards faiss_index.nprob = 10 _, indices = faiss_index.search(y, k=5) np.testing.assert_array_equal(indices, expected_indices.fetch()) # test one chunk, brute force X = mt.tensor(x, chunk_size=50) index = build_faiss_index(X, 'Flat', None, random_state=0, same_distribution=True, return_index_type='object') faiss_index = self.executor.execute_tileable(index)[0] faiss_index.nprob = 10 _, indices = faiss_index.search(y, k=5) np.testing.assert_array_equal(indices, expected_indices.fetch()) # test train, same distribution X = mt.tensor(x, chunk_size=10) index = build_faiss_index(X, 'IVF30,Flat', 30, random_state=0, same_distribution=True, return_index_type='object') faiss_index = self.executor.execute_tileable(index)[0] self.assertIsInstance(faiss_index, faiss.IndexIVFFlat) self.assertEqual(faiss_index.ntotal, n) self.assertEqual(len(get_tiled(index).chunks), 1) # test train, distributions are variant X = mt.tensor(x, chunk_size=10) index = build_faiss_index(X, 'IVF10,Flat', None, random_state=0, same_distribution=False, return_index_type='object') faiss_index = self.executor.execute_tileable(index) self.assertEqual(len(faiss_index), 5) for ind in faiss_index: self.assertIsInstance(ind, faiss.IndexIVFFlat) self.assertEqual(ind.ntotal, 10) # test one chunk, train X = mt.tensor(x, chunk_size=50) index = build_faiss_index(X, 'IVF30,Flat', 30, random_state=0, same_distribution=True, return_index_type='object') faiss_index = self.executor.execute_tileable(index)[0] self.assertIsInstance(faiss_index, faiss.IndexIVFFlat) self.assertEqual(faiss_index.ntotal, n) # test wrong index with self.assertRaises(ValueError): build_faiss_index(X, 'unknown_index', None) # test unknown metric with self.assertRaises(ValueError): build_faiss_index(X, 'Flat', None, metric='unknown_metric')