def test_downcast_Refine(self): index = faiss.IndexRefineFlat( faiss.IndexScalarQuantizer(10, faiss.ScalarQuantizer.QT_8bit)) # serialize and deserialize index2 = faiss.deserialize_index(faiss.serialize_index(index)) assert isinstance(index2, faiss.IndexRefineFlat)
def do_test_accuracy(self, by_residual, st): ds = datasets.SyntheticDataset(32, 3000, 1000, 100) quantizer = faiss.IndexFlatL2(ds.d) index = faiss.IndexIVFResidualQuantizer(quantizer, ds.d, 100, 3, 4, faiss.METRIC_L2, st) index.by_residual = by_residual index.rq.train_type index.rq.train_type = faiss.ResidualQuantizer.Train_default index.rq.max_beam_size = 30 index.train(ds.get_train()) index.add(ds.get_database()) inters = [] for nprobe in 1, 2, 5, 10, 20, 50: index.nprobe = nprobe D, I = index.search(ds.get_queries(), 10) inter = faiss.eval_intersection(I, ds.get_groundtruth(10)) # print(st, "nprobe=", nprobe, "inter=", inter) inters.append(inter) # do a little I/O test index2 = faiss.deserialize_index(faiss.serialize_index(index)) D2, I2 = index2.search(ds.get_queries(), 10) np.testing.assert_array_equal(I2, I) np.testing.assert_array_equal(D2, D) inters = np.array(inters) if by_residual: # check that we have increasing intersection measures with # nprobe self.assertTrue(np.all(inters[1:] >= inters[:-1])) else: self.assertTrue(np.all(inters[1:3] >= inters[:2])) # check that we have the same result as the flat residual quantizer iflat = faiss.IndexResidualQuantizer(ds.d, 3, 4, faiss.METRIC_L2, st) iflat.rq.train_type iflat.rq.train_type = faiss.ResidualQuantizer.Train_default iflat.rq.max_beam_size = 30 iflat.train(ds.get_train()) iflat.rq.codebooks = index.rq.codebooks iflat.add(ds.get_database()) Dref, Iref = iflat.search(ds.get_queries(), 10) index.nprobe = 100 D2, I2 = index.search(ds.get_queries(), 10) np.testing.assert_array_almost_equal(Dref, D2, decimal=5) # there are many ties because the codes are so short self.assertLess((Iref != I2).sum(), Iref.size * 0.2)
def do_test(self, index_key): d = 32 index = faiss.index_factory(d, index_key) index.train(faiss.randn((100, d), 123)) # reference reconstruction index.add(faiss.randn((100, d), 345)) index.add(faiss.randn((100, d), 678)) ref_recons = index.reconstruct_n(0, 200) # with lookup index.reset() rs = np.random.RandomState(123) ids = rs.choice(10000, size=200, replace=False).astype(np.int64) index.add_with_ids(faiss.randn((100, d), 345), ids[:100]) index.set_direct_map_type(faiss.DirectMap.Hashtable) index.add_with_ids(faiss.randn((100, d), 678), ids[100:]) # compare for i in range(0, 200, 13): recons = index.reconstruct(int(ids[i])) self.assertTrue(np.all(recons == ref_recons[i])) # test I/O buf = faiss.serialize_index(index) index2 = faiss.deserialize_index(buf) # compare for i in range(0, 200, 13): recons = index2.reconstruct(int(ids[i])) self.assertTrue(np.all(recons == ref_recons[i])) # remove toremove = np.ascontiguousarray(ids[0:200:3]) sel = faiss.IDSelectorArray(50, faiss.swig_ptr(toremove[:50])) # test both ways of removing elements nremove = index2.remove_ids(sel) nremove += index2.remove_ids(toremove[50:]) self.assertEqual(nremove, len(toremove)) for i in range(0, 200, 13): if i % 3 == 0: self.assertRaises(RuntimeError, index2.reconstruct, int(ids[i])) else: recons = index2.reconstruct(int(ids[i])) self.assertTrue(np.all(recons == ref_recons[i])) # index error should raise self.assertRaises(RuntimeError, index.reconstruct, 20000)
def test_factory(self): ds = datasets.SyntheticDataset(16, 500, 1000, 100) index = faiss.index_factory(ds.d, "IVF1024(RCQ2x5),Flat") index.train(ds.get_train()) index.add(ds.get_database()) Dref, Iref = index.search(ds.get_queries(), 10) b = faiss.serialize_index(index) index2 = faiss.deserialize_index(b) Dnew, Inew = index2.search(ds.get_queries(), 10) np.testing.assert_equal(Dref, Dnew) np.testing.assert_equal(Iref, Inew)
def test_io(self): ds = datasets.SyntheticDataset(32, 1000, 100, 0) xt = ds.get_train() xb = ds.get_database() ir = faiss.IndexResidualQuantizer(ds.d, 3, 4) ir.rq.train_type = faiss.ResidualQuantizer.Train_default ir.train(xt) ref_codes = ir.sa_encode(xb) b = faiss.serialize_index(ir) ir2 = faiss.deserialize_index(b) codes2 = ir2.sa_encode(xb) np.testing.assert_array_equal(ref_codes, codes2)
def test_serialize(self): res = faiss.StandardGpuResources() d = 32 k = 10 train = make_t(10000, d) add = make_t(10000, d) query = make_t(10, d) # Construct various GPU index types indexes = [] # Flat indexes.append(faiss.GpuIndexFlatL2(res, d)) # IVF nlist = 5 # IVFFlat indexes.append(faiss.GpuIndexIVFFlat(res, d, nlist, faiss.METRIC_L2)) # IVFSQ indexes.append(faiss.GpuIndexIVFScalarQuantizer(res, d, nlist, faiss.ScalarQuantizer.QT_fp16)) # IVFPQ indexes.append(faiss.GpuIndexIVFPQ(res, d, nlist, 4, 8, faiss.METRIC_L2)) for index in indexes: index.train(train) index.add(add) orig_d, orig_i = index.search(query, k) ser = faiss.serialize_index(faiss.index_gpu_to_cpu(index)) cpu_index = faiss.deserialize_index(ser) gpu_index_restore = faiss.index_cpu_to_gpu(res, 0, cpu_index) restore_d, restore_i = gpu_index_restore.search(query, k) self.assertTrue(np.array_equal(orig_d, restore_d)) self.assertTrue(np.array_equal(orig_i, restore_i)) # Make sure the index is in a state where we can add to it # without error gpu_index_restore.add(query)
def do_test(self, by_residual=False, metric=faiss.METRIC_L2, d=32, bbs=32): bbs = 32 ds = datasets.SyntheticDataset(d, 2000, 5000, 200) index = faiss.index_factory(d, f"IVF32,PQ{d//2}x4np", metric) index.by_residual = by_residual index.train(ds.get_train()) index.add(ds.get_database()) index.nprobe = 4 Dref, Iref = index.search(ds.get_queries(), 10) index2 = faiss.IndexIVFPQFastScan( index.quantizer, d, 32, d // 2, 4, metric, bbs) index2.by_residual = by_residual index2.train(ds.get_train()) index2.add(ds.get_database()) index2.nprobe = 4 Dnew, Inew = index2.search(ds.get_queries(), 10) m3 = three_metrics(Dref, Iref, Dnew, Inew) # print((by_residual, metric, d), ":", m3) ref_m3_tab = { (True, 1, 32) : (0.995, 1.0, 9.91), (True, 0, 32) : (0.99, 1.0, 9.91), (True, 1, 30) : (0.99, 1.0, 9.885), (False, 1, 32) : (0.99, 1.0, 9.875), (False, 0, 32) : (0.99, 1.0, 9.92), (False, 1, 30) : (1.0, 1.0, 9.895) } ref_m3 = ref_m3_tab[(by_residual, metric, d)] self.assertGreater(m3[0], ref_m3[0] * 0.99) self.assertGreater(m3[1], ref_m3[1] * 0.99) self.assertGreater(m3[2], ref_m3[2] * 0.99) # Test I/O data = faiss.serialize_index(index2) index3 = faiss.deserialize_index(data) D3, I3 = index3.search(ds.get_queries(), 10) np.testing.assert_array_equal(I3, Inew) np.testing.assert_array_equal(D3, Dnew)
def test_rcq_LUT(self): ds = datasets.SyntheticDataset(32, 3000, 1000, 100) xt = ds.get_train() xb = ds.get_database() # RQ 2x5 = 10 bits = 1024 centroids index = faiss.index_factory(ds.d, "IVF1024(RCQ2x5),SQ8") quantizer = faiss.downcast_index(index.quantizer) rq = quantizer.rq rq.train_type = faiss.ResidualQuantizer.Train_default index.train(xt) index.add(xb) index.nprobe = 10 # set exact centroids as coarse quantizer all_centroids = quantizer.reconstruct_n(0, quantizer.ntotal) q2 = faiss.IndexFlatL2(32) q2.add(all_centroids) index.quantizer = q2 Dref, Iref = index.search(ds.get_queries(), 10) index.quantizer = quantizer # search with LUT quantizer.set_beam_factor(-1) Dnew, Inew = index.search(ds.get_queries(), 10) np.testing.assert_array_almost_equal(Dref, Dnew, decimal=5) np.testing.assert_array_equal(Iref, Inew) # check i/o CDref, CIref = quantizer.search(ds.get_queries(), 10) quantizer2 = faiss.deserialize_index(faiss.serialize_index(quantizer)) quantizer2.search(ds.get_queries(), 10) CDnew, CInew = quantizer2.search(ds.get_queries(), 10) np.testing.assert_array_almost_equal(CDref, CDnew, decimal=5) np.testing.assert_array_equal(CIref, CInew)
def do_test(self, key1, key2): d = 96 nb = 1000 nq = 0 nt = 2000 xt, x, _ = get_dataset_2(d, nt, nb, nq) codec_ref = faiss.index_factory(d, key1) codec_ref.train(xt) code_ref = codec_ref.sa_encode(x) x_recons_ref = codec_ref.sa_decode(code_ref) codec_new = faiss.index_factory(d, key2) codec_new.pq = codec_ref.pq # replace quantizer, avoiding mem leak oldq = codec_new.q1.quantizer oldq.this.own() codec_new.q1.own_fields = False codec_new.q1.quantizer = codec_ref.quantizer codec_new.is_trained = True code_new = codec_new.sa_encode(x) x_recons_new = codec_new.sa_decode(code_new) self.assertTrue(np.all(code_new == code_ref)) self.assertTrue(np.all(x_recons_new == x_recons_ref)) codec_new_2 = faiss.deserialize_index( faiss.serialize_index(codec_new)) code_new = codec_new_2.sa_encode(x) x_recons_new = codec_new_2.sa_decode(code_new) self.assertTrue(np.all(code_new == code_ref)) self.assertTrue(np.all(x_recons_new == x_recons_ref))
def do_test_knn(self, mt): d = 10 nb = 100 nq = 50 nt = 0 xt, xb, xq = get_dataset_2(d, nt, nb, nq) index = faiss.IndexFlat(d, mt) index.add(xb) D, I = index.search(xq, 10) dis = faiss.pairwise_distances(xq, xb, mt) o = dis.argsort(axis=1) assert np.all(I == o[:, :10]) for q in range(nq): assert np.all(D[q] == dis[q, I[q]]) index2 = faiss.deserialize_index(faiss.serialize_index(index)) D2, I2 = index2.search(xq, 10) self.assertTrue(np.all(I == I2))
def save(self, path): self._index = faiss.serialize_index(self._index) with open(path, 'wb') as f: pickle.dump(self.__dict__, f) self._index = faiss.deserialize_index(self._index)