def test_IVFPQ_non8bit(self): d = 16 xt, xb, xq = get_dataset_2(d, 10000, 2000, 200) nlist = 64 gt_index = faiss.IndexFlat(d) gt_index.add(xb) gt_D, gt_I = gt_index.search(xq, 10) quantizer = faiss.IndexFlat(d) ninter = {} for v in '2x8', '8x2': if v == '8x2': index = faiss.IndexIVFPQ( quantizer, d, nlist, 2, 8) else: index = faiss.IndexIVFPQ( quantizer, d, nlist, 8, 2) index.train(xt) index.add(xb) index.npobe = 16 D, I = index.search(xq, 10) ninter[v] = faiss.eval_intersection(I, gt_I) print('ninter=', ninter) # this should be the case but we don't observe # that... Probavly too few test points # assert ninter['2x8'] > ninter['8x2'] # ref numbers on 2019-11-02 assert abs(ninter['2x8'] - 458) < 4 assert abs(ninter['8x2'] - 465) < 4
def subtest(self, d, K, metric): metric_names = {faiss.METRIC_L1: 'L1', faiss.METRIC_L2: 'L2', faiss.METRIC_INNER_PRODUCT: 'IP'} nb = 1000 _, xb, _ = get_dataset_2(d, 0, nb, 0) _, knn = faiss.knn(xb, xb, K + 1, metric) knn = knn[:, 1:] index = faiss.IndexNNDescentFlat(d, K, metric) index.nndescent.S = 10 index.nndescent.R = 32 index.nndescent.L = K + 20 index.nndescent.iter = 5 index.verbose = True index.add(xb) graph = index.nndescent.final_graph graph = faiss.vector_to_array(graph) graph = graph.reshape(nb, K) recalls = 0 for i in range(nb): for j in range(K): for k in range(K): if graph[i, j] == knn[i, k]: recalls += 1 break recall = 1.0 * recalls / (nb * K) print('Metric: {}, knng accuracy: {}'.format(metric_names[metric], recall)) assert recall > 0.99
def do_mmappedIO(self, sparse, in_pretransform=False): d = 10 nb = 1000 nq = 200 nt = 200 xt, xb, xq = get_dataset_2(d, nt, nb, nq) quantizer = faiss.IndexFlatL2(d) index1 = faiss.IndexIVFFlat(quantizer, d, 20) if sparse: # makes the inverted lists sparse because all elements get # assigned to the same invlist xt += (np.ones(10) * 1000).astype('float32') if in_pretransform: # make sure it still works when wrapped in an IndexPreTransform index1 = faiss.IndexPreTransform(index1) index1.train(xt) index1.add(xb) _, fname = tempfile.mkstemp() try: faiss.write_index(index1, fname) index2 = faiss.read_index(fname) self.compare_results(index1, index2, xq) index3 = faiss.read_index(fname, faiss.IO_FLAG_MMAP) self.compare_results(index1, index3, xq) finally: if os.path.exists(fname): os.unlink(fname)
def test_IndexIVFPQ(self): d = 32 nb = 1000 nt = 1500 nq = 200 (xt, xb, xq) = get_dataset_2(d, nt, nb, nq) coarse_quantizer = faiss.IndexFlatL2(d) index = faiss.IndexIVFPQ(coarse_quantizer, d, 32, 8, 8) index.cp.min_points_per_centroid = 5 # quiet warning index.train(xt) index.add(xb) # invalid nprobe index.nprobe = 0 k = 10 self.assertRaises(RuntimeError, index.search, xq, k) # invalid k index.nprobe = 4 k = -10 self.assertRaises(AssertionError, index.search, xq, k) # valid params index.nprobe = 4 k = 10 D, nns = index.search(xq, k) self.assertEqual(D.shape[0], nq) self.assertEqual(D.shape[1], k)
def test_encoded(self): d = 32 k = 5 xt, xb, xq = get_dataset_2(d, 1000, 0, 0) # make sure that training on a compressed then decompressed # dataset gives the same result as decompressing on-the-fly codec = faiss.IndexScalarQuantizer(d, faiss.ScalarQuantizer.QT_4bit) codec.train(xt) codes = codec.sa_encode(xt) xt2 = codec.sa_decode(codes) clus = faiss.Clustering(d, k) # clus.verbose = True clus.niter = 0 index = faiss.IndexFlatL2(d) clus.train(xt2, index) ref_centroids = faiss.vector_to_array(clus.centroids).reshape(-1, d) _, ref_errs = index.search(xt2, 1) clus = faiss.Clustering(d, k) # clus.verbose = True clus.niter = 0 clus.decode_block_size = 120 index = faiss.IndexFlatL2(d) clus.train_encoded(codes, codec, index) new_centroids = faiss.vector_to_array(clus.centroids).reshape(-1, d) _, new_errs = index.search(xt2, 1) # It's the same operation, so should be bit-exact the same self.assertTrue(np.all(ref_centroids == new_centroids))
def do_encode_twice(self, factory_key): d = 96 nb = 1000 nq = 0 nt = 2000 xt, x, _ = get_dataset_2(d, nt, nb, nq) assert x.size > 0 codec = faiss.index_factory(d, factory_key) codec.train(xt) codes = codec.sa_encode(x) x2 = codec.sa_decode(codes) codes2 = codec.sa_encode(x2) if 'IVF' not in factory_key: self.assertTrue(np.all(codes == codes2)) else: # some rows are not reconstructed exactly because they # flip into another quantization cell nrowdiff = (codes != codes2).any(axis=1).sum() self.assertTrue(nrowdiff < 10) x3 = codec.sa_decode(codes2) if 'IVF' not in factory_key: self.assertTrue(np.allclose(x2, x3)) else: diffs = np.abs(x2 - x3).sum(axis=1) avg = np.abs(x2).sum(axis=1).mean() diffs.sort() assert diffs[-10] < avg * 1e-5
def test_progressive_dim(self): d = 32 n = 10000 k = 50 xt, _, _ = get_dataset_2(d, n, 0, 0) # basic kmeans kmeans = faiss.Kmeans(d, k) kmeans.train(xt) clus = faiss.ProgressiveDimClustering(d, k) clus.verbose clus.verbose = True clus.progressive_dim_steps clus.progressive_dim_steps = 5 fac = faiss.ProgressiveDimIndexFactory() clus.train(n, faiss.swig_ptr(xt), fac) stats = clus.iteration_stats stats = [stats.at(i) for i in range(stats.size())] obj = np.array([st.obj for st in stats]) # clustering objective should be a tad better self.assertLess(obj[-1], kmeans.obj[-1]) # same test w/ Kmeans wrapper kmeans2 = faiss.Kmeans(d, k, progressive_dim_steps=5) kmeans2.train(xt) self.assertLess(kmeans2.obj[-1], kmeans.obj[-1])
def test_stats(self): d = 32 k = 5 xt, xb, xq = get_dataset_2(d, 1000, 0, 0) km = faiss.Kmeans(d, k, niter=4) km.train(xt) assert list(km.obj) == [st['obj'] for st in km.iteration_stats]
def test_slice_vstack(self): d = 10 nb = 1000 nq = 100 nt = 200 xt, xb, xq = get_dataset_2(d, nt, nb, nq) quantizer = faiss.IndexFlatL2(d) index = faiss.IndexIVFFlat(quantizer, d, 30) index.train(xt) index.add(xb) Dref, Iref = index.search(xq, 10) # faiss.wait() il0 = index.invlists ils = [] ilv = faiss.InvertedListsPtrVector() for sl in 0, 1, 2: il = faiss.SliceInvertedLists(il0, sl * 10, sl * 10 + 10) ils.append(il) ilv.push_back(il) il2 = faiss.VStackInvertedLists(ilv.size(), ilv.data()) index2 = faiss.IndexIVFFlat(quantizer, d, 30) index2.replace_invlists(il2) index2.ntotal = index.ntotal D, I = index2.search(xq, 10) assert np.all(D == Dref) assert np.all(I == Iref)
def test_IMI_2(self): d = 32 nb = 1000 nt = 1500 nq = 200 (xt, xb, xq) = get_dataset_2(d, nt, nb, nq) d = xt.shape[1] gt_index = faiss.IndexFlatL2(d) gt_index.add(xb) D, gt_nns = gt_index.search(xq, 1) ############# redo including training nbits = 5 ai0 = faiss.IndexFlatL2(int(d / 2)) ai1 = faiss.IndexFlatL2(int(d / 2)) coarse_quantizer = faiss.MultiIndexQuantizer2(d, nbits, ai0, ai1) index = faiss.IndexIVFPQ(coarse_quantizer, d, (1 << nbits)**2, 8, 8) index.quantizer_trains_alone = 1 index.train(xt) index.add(xb) index.nprobe = 100 D, nns = index.search(xq, 10) n_ok = (nns == gt_nns).sum() # should return the same result self.assertGreater(n_ok, 165)
def test_serialize_to_vector(self): d = 10 nb = 1000 nq = 200 nt = 500 xt, xb, xq = get_dataset_2(d, nt, nb, nq) index = faiss.IndexFlatL2(d) index.add(xb) Dref, Iref = index.search(xq, 5) writer = faiss.VectorIOWriter() faiss.write_index(index, writer) ar_data = faiss.vector_to_array(writer.data) # direct transfer of vector reader = faiss.VectorIOReader() reader.data.swap(writer.data) index2 = faiss.read_index(reader) Dnew, Inew = index2.search(xq, 5) assert np.all(Dnew == Dref) and np.all(Inew == Iref) # from intermediate numpy array reader = faiss.VectorIOReader() faiss.copy_array_to_vector(ar_data, reader.data) index3 = faiss.read_index(reader) Dnew, Inew = index3.search(xq, 5) assert np.all(Dnew == Dref) and np.all(Inew == Iref)
def subtest(self, d, metric, topk, search_L, threshold): metric_names = {faiss.METRIC_L1: 'L1', faiss.METRIC_L2: 'L2', faiss.METRIC_INNER_PRODUCT: 'IP'} topk = 10 nt, nb, nq = 2000, 1000, 200 xt, xb, xq = get_dataset_2(d, nt, nb, nq) gt_index = faiss.IndexFlat(d, metric) gt_index.add(xb) gt_D, gt_I = gt_index.search(xq, topk) K = 16 index = faiss.IndexNNDescentFlat(d, K, metric) index.nndescent.S = 10 index.nndescent.R = 32 index.nndescent.L = K + 20 index.nndescent.iter = 5 index.verbose = False index.nndescent.search_L = search_L index.add(xb) D, I = index.search(xq, topk) recalls = 0 for i in range(nq): for j in range(topk): for k in range(topk): if I[i, j] == gt_I[i, k]: recalls += 1 break recall = 1.0 * recalls / (nq * topk) print('Metric: {}, L: {}, Recall@{}: {}'.format( metric_names[metric], search_L, topk, recall)) assert recall > threshold, '{} <= {}'.format(recall, threshold)
def compare_accuracy(self, lowac, highac, max_errs=(1e10, 1e10)): d = 96 nb = 1000 nq = 0 nt = 2000 xt, x, _ = get_dataset_2(d, nt, nb, nq) errs = [] for factory_string in lowac, highac: codec = faiss.index_factory(d, factory_string) print('sa codec: code size %d' % codec.sa_code_size()) codec.train(xt) codes = codec.sa_encode(x) x2 = codec.sa_decode(codes) err = ((x - x2)**2).sum() errs.append(err) print(errs) self.assertGreater(errs[0], errs[1]) self.assertGreater(max_errs[0], errs[0]) self.assertGreater(max_errs[1], errs[1]) # just a small IndexLattice I/O test if 'Lattice' in highac: codec2 = faiss.deserialize_index(faiss.serialize_index(codec)) codes = codec2.sa_encode(x) x3 = codec2.sa_decode(codes) self.assertTrue(np.all(x2 == x3))
def __init__(self, *args, **kwargs): unittest.TestCase.__init__(self, *args, **kwargs) d = 32 nt = 0 nb = 1500 nq = 500 self.GK = 32 _, self.xb, self.xq = get_dataset_2(d, nt, nb, nq)
def test_polysemous_OOM(self): """ this used to cause OOM when training polysemous with large nb bits""" d = 32 xt, xb, xq = get_dataset_2(d, 10000, 0, 0) index = faiss.IndexPQ(d, M, 13) index.do_polysemous_training = True index.pq.cp.niter = 0 index.polysemous_training.max_memory = 128 * 1024 * 1024 self.assertRaises(RuntimeError, index.train, xt)
def __init__(self, *args, **kwargs): unittest.TestCase.__init__(self, *args, **kwargs) d = 32 nt = 0 nb = 1500 nq = 500 (_, self.xb, self.xq) = get_dataset_2(d, nt, nb, nq) index = faiss.IndexFlatL2(d) index.add(self.xb) Dref, Iref = index.search(self.xq, 1) self.Iref = Iref
def test_init(self): d = 32 k = 5 xt, xb, xq = get_dataset_2(d, 1000, 0, 0) km = faiss.Kmeans(d, k, niter=4) km.train(xt) km2 = faiss.Kmeans(d, k, niter=4) km2.train(xt, init_centroids=km.centroids) # check that the initial objective is better for km2 than km self.assertGreater(km.obj[0], km2.obj[0] * 1.01)
def test_4variants_ivf(self): d = 32 nt = 2500 nq = 400 nb = 5000 (xt, xb, xq) = get_dataset_2(d, nt, nb, nq) # common quantizer quantizer = faiss.IndexFlatL2(d) ncent = 64 index_gt = faiss.IndexFlatL2(d) index_gt.add(xb) D, I_ref = index_gt.search(xq, 10) nok = {} index = faiss.IndexIVFFlat(quantizer, d, ncent, faiss.METRIC_L2) index.cp.min_points_per_centroid = 5 # quiet warning index.nprobe = 4 index.train(xt) index.add(xb) D, I = index.search(xq, 10) nok['flat'] = (I[:, 0] == I_ref[:, 0]).sum() for qname in "QT_4bit QT_4bit_uniform QT_8bit QT_8bit_uniform QT_fp16".split( ): qtype = getattr(faiss.ScalarQuantizer, qname) index = faiss.IndexIVFScalarQuantizer(quantizer, d, ncent, qtype, faiss.METRIC_L2) index.nprobe = 4 index.train(xt) index.add(xb) D, I = index.search(xq, 10) nok[qname] = (I[:, 0] == I_ref[:, 0]).sum() print(nok, nq) self.assertGreaterEqual(nok['flat'], nq * 0.6) # The tests below are a bit fragile, it happens that the # ordering between uniform and non-uniform are reverted, # probably because the dataset is small, which introduces # jitter self.assertGreaterEqual(nok['flat'], nok['QT_8bit']) self.assertGreaterEqual(nok['QT_8bit'], nok['QT_4bit']) self.assertGreaterEqual(nok['QT_8bit'], nok['QT_8bit_uniform']) self.assertGreaterEqual(nok['QT_4bit'], nok['QT_4bit_uniform']) self.assertGreaterEqual(nok['QT_fp16'], nok['QT_8bit'])
def test_IMI(self): d = 32 nb = 1000 nt = 1500 nq = 200 (xt, xb, xq) = get_dataset_2(d, nt, nb, nq) d = xt.shape[1] gt_index = faiss.IndexFlatL2(d) gt_index.add(xb) D, gt_nns = gt_index.search(xq, 1) nbits = 5 coarse_quantizer = faiss.MultiIndexQuantizer(d, 2, nbits) index = faiss.IndexIVFPQ(coarse_quantizer, d, (1 << nbits)**2, 8, 8) index.quantizer_trains_alone = 1 index.train(xt) index.add(xb) index.nprobe = 100 D, nns = index.search(xq, 10) n_ok = (nns == gt_nns).sum() # Should return 166 on mac, and 170 on linux. self.assertGreater(n_ok, 165) ############# replace with explicit assignment indexes nbits = 5 pq = coarse_quantizer.pq centroids = faiss.vector_to_array(pq.centroids) centroids = centroids.reshape(pq.M, pq.ksub, pq.dsub) ai0 = faiss.IndexFlatL2(pq.dsub) ai0.add(centroids[0]) ai1 = faiss.IndexFlatL2(pq.dsub) ai1.add(centroids[1]) coarse_quantizer_2 = faiss.MultiIndexQuantizer2(d, nbits, ai0, ai1) coarse_quantizer_2.pq = pq coarse_quantizer_2.is_trained = True index.quantizer = coarse_quantizer_2 index.reset() index.add(xb) D, nns = index.search(xq, 10) n_ok = (nns == gt_nns).sum() # should return the same result self.assertGreater(n_ok, 165)
def subtest_8bit_direct(self, metric_type, d): xt, xb, xq = get_dataset_2(d, 500, 1000, 30) # rescale everything to get integer tmin, tmax = xt.min(), xt.max() def rescale(x): x = np.floor((x - tmin) * 256 / (tmax - tmin)) x[x < 0] = 0 x[x > 255] = 255 return x xt = rescale(xt) xb = rescale(xb) xq = rescale(xq) gt_index = faiss.IndexFlat(d, metric_type) gt_index.add(xb) Dref, Iref = gt_index.search(xq, 10) index = faiss.IndexScalarQuantizer( d, faiss.ScalarQuantizer.QT_8bit_direct, metric_type) index.add(xb) D, I = index.search(xq, 10) assert np.all(I == Iref) assert np.all(D == Dref) # same, with IVF nlist = 64 quantizer = faiss.IndexFlat(d, metric_type) gt_index = faiss.IndexIVFFlat(quantizer, d, nlist, metric_type) gt_index.nprobe = 4 gt_index.train(xt) gt_index.add(xb) Dref, Iref = gt_index.search(xq, 10) index = faiss.IndexIVFScalarQuantizer( quantizer, d, nlist, faiss.ScalarQuantizer.QT_8bit_direct, metric_type) index.nprobe = 4 index.by_residual = False index.train(xt) index.add(xb) D, I = index.search(xq, 10) assert np.all(I == Iref) assert np.all(D == Dref)
def do_reconstruct(self, by_residual): d = 32 xt, xb, xq = get_dataset_2(d, 100, 5, 5) index = faiss.index_factory(d, "IVF10,SQ8") index.by_residual = by_residual index.train(xt) index.add(xb) index.nprobe = 10 D, I = index.search(xq, 4) xb2 = index.reconstruct_n(0, index.ntotal) for i in range(5): for j in range(4): self.assertAlmostEqual(((xq[i] - xb2[I[i, j]])**2).sum(), D[i, j], places=4)
def test_parallel_mode(self): d = 32 xt, xb, xq = get_dataset_2(d, 2000, 1000, 200) index = faiss.index_factory(d, "IVF64,SQ8") index.train(xt) index.add(xb) index.nprobe = 4 # hopefully more robust than 1 Dref, Iref = index.search(xq, 10) for pm in 1, 2, 3: index.parallel_mode = pm Dnew, Inew = index.search(xq, 10) np.testing.assert_array_equal(Iref, Inew) np.testing.assert_array_equal(Dref, Dnew)
def test_rename(self): d = 10 nb = 500 nq = 100 nt = 100 xt, xb, xq = get_dataset_2(d, nt, nb, nq) quantizer = faiss.IndexFlatL2(d) index1 = faiss.IndexIVFFlat(quantizer, d, 20) index1.train(xt) dirname = tempfile.mkdtemp() try: # make an index with ondisk invlists invlists = faiss.OnDiskInvertedLists(index1.nlist, index1.code_size, dirname + '/aa.ondisk') index1.replace_invlists(invlists) index1.add(xb) D1, I1 = index1.search(xq, 10) faiss.write_index(index1, dirname + '/aa.ivf') # move the index elsewhere os.mkdir(dirname + '/1') for fname in 'aa.ondisk', 'aa.ivf': os.rename(dirname + '/' + fname, dirname + '/1/' + fname) # try to read it: fails! try: index2 = faiss.read_index(dirname + '/1/aa.ivf') except RuntimeError: pass # normal else: assert False # read it with magic flag index2 = faiss.read_index(dirname + '/1/aa.ivf', faiss.IO_FLAG_ONDISK_SAME_DIR) D2, I2 = index2.search(xq, 10) assert np.all(I1 == I2) finally: shutil.rmtree(dirname)
def do_test(self, nq, metric_type=faiss.METRIC_L2, k=10): d = 32 nb = 1000 nt = 0 (xt, xb, xq) = get_dataset_2(d, nt, nb, nq) index = faiss.IndexFlat(d, metric_type) ### k-NN search index.add(xb) D1, I1 = index.search(xq, k) if metric_type == faiss.METRIC_L2: all_dis = ((xq.reshape(nq, 1, d) - xb.reshape(1, nb, d)) ** 2).sum(2) Iref = all_dis.argsort(axis=1)[:, :k] else: all_dis = np.dot(xq, xb.T) Iref = all_dis.argsort(axis=1)[:, ::-1][:, :k] Dref = all_dis[np.arange(nq)[:, None], Iref] self.assertLessEqual((Iref != I1).sum(), Iref.size * 0.0001) # np.testing.assert_equal(Iref, I1) np.testing.assert_almost_equal(Dref, D1, decimal=5) ### Range search radius = float(np.median(Dref[:, -1])) lims, D2, I2 = index.range_search(xq, radius) for i in range(nq): l0, l1 = lims[i:i + 2] _, Il = D2[l0:l1], I2[l0:l1] if metric_type == faiss.METRIC_L2: Ilref, = np.where(all_dis[i] < radius) else: Ilref, = np.where(all_dis[i] > radius) Il.sort() Ilref.sort() np.testing.assert_equal(Il, Ilref) np.testing.assert_almost_equal( all_dis[i, Ilref], D2[l0:l1], decimal=5 )
def test_progressive_dim(self): d = 32 n = 10000 k = 50 xt, _, _ = get_dataset_2(d, n, 0, 0) # basic kmeans kmeans = faiss.Kmeans(d, k, gpu=True) kmeans.train(xt) pca = faiss.PCAMatrix(d, d) pca.train(xt) xt_pca = pca.apply(xt) # same test w/ Kmeans wrapper kmeans2 = faiss.Kmeans(d, k, progressive_dim_steps=5, gpu=True) kmeans2.train(xt_pca) self.assertLess(kmeans2.obj[-1], kmeans.obj[-1])
def test_compute_GT(self): d = 64 xt, xb, xq = get_dataset_2(d, 0, 10000, 100) index = faiss.IndexFlatL2(d) index.add(xb) Dref, Iref = index.search(xq, 10) # iterator function on the matrix def matrix_iterator(xb, bs): for i0 in range(0, xb.shape[0], bs): yield xb[i0:i0 + bs] Dnew, Inew = knn_ground_truth(xq, matrix_iterator(xb, 1000), 10) np.testing.assert_array_equal(Iref, Inew) np.testing.assert_almost_equal(Dref, Dnew, decimal=4)
def test_hnsw(self): d = 10 nb = 1000 nq = 100 nt = 0 xt, xb, xq = get_dataset_2(d, nt, nb, nq) mt = faiss.METRIC_L1 index = faiss.IndexHNSW(faiss.IndexFlat(d, mt)) index.add(xb) D, I = index.search(xq, 10) dis = faiss.pairwise_distances(xq, xb, mt) for q in range(nq): assert np.all(D[q] == dis[q, I[q]])
def test_stop_words(self): d = 10 nb = 1000 nq = 1 nt = 200 xt, xb, xq = get_dataset_2(d, nt, nb, nq) index = faiss.index_factory(d, "IVF32,Flat") index.nprobe = 4 index.train(xt) index.add(xb) Dref, Iref = index.search(xq, 10) il = index.invlists maxsz = max(il.list_size(i) for i in range(il.nlist)) il2 = faiss.StopWordsInvertedLists(il, maxsz + 1) index.own_invlists index.own_invlists = False index.replace_invlists(il2, False) D1, I1 = index.search(xq, 10) np.testing.assert_array_equal(Dref, D1) np.testing.assert_array_equal(Iref, I1) # cleanup to avoid segfault on exit index.replace_invlists(il, False) # voluntarily unbalance one invlist i = int(I1[0, 0]) index.add(np.vstack([xb[i]] * (maxsz + 10))) # introduce stopwords again index.replace_invlists(il2, False) D2, I2 = index.search(xq, 10) self.assertFalse(i in list(I2.ravel())) # avoid mem leak index.replace_invlists(il, True)
def do_merge_then_remove(self, ondisk): d = 10 nb = 1000 nq = 200 nt = 200 xt, xb, xq = get_dataset_2(d, nt, nb, nq) quantizer = faiss.IndexFlatL2(d) index1 = faiss.IndexIVFFlat(quantizer, d, 20) index1.train(xt) filename = None if ondisk: filename = tempfile.mkstemp()[1] invlists = faiss.OnDiskInvertedLists(index1.nlist, index1.code_size, filename) index1.replace_invlists(invlists) index1.add(xb[:int(nb / 2)]) index2 = faiss.IndexIVFFlat(quantizer, d, 20) assert index2.is_trained index2.add(xb[int(nb / 2):]) Dref, Iref = index1.search(xq, 10) index1.merge_from(index2, int(nb / 2)) assert index1.ntotal == nb index1.remove_ids(faiss.IDSelectorRange(int(nb / 2), nb)) assert index1.ntotal == int(nb / 2) Dnew, Inew = index1.search(xq, 10) assert np.all(Dnew == Dref) assert np.all(Inew == Iref) if filename is not None: os.unlink(filename)
def test_IndexFlat(self): d = 32 nb = 1000 nt = 0 nq = 200 (xt, xb, xq) = get_dataset_2(d, nt, nb, nq) index = faiss.IndexFlat(d, faiss.METRIC_L2) index.add(xb) # invalid k k = -5 self.assertRaises(AssertionError, index.search, xq, k) # valid k k = 5 D, I = index.search(xq, k) self.assertEqual(D.shape[0], nq) self.assertEqual(D.shape[1], k)