def do_test(self, by_residual, metric=faiss.METRIC_L2, d=32): ds = datasets.SyntheticDataset(d, 2000, 5000, 200) index = faiss.index_factory(d, f"IVF32,PQ{d//2}x4np", metric) # force coarse quantizer # index.quantizer.add(np.zeros((1, 32), dtype='float32')) index.by_residual = by_residual index.train(ds.get_train()) index.add(ds.get_database()) index.nprobe = 4 index2 = faiss.IndexIVFPQFastScan(index) index2.implem = 2 Dref, Iref = index2.search(ds.get_queries(), 4) index2 = faiss.IndexIVFPQFastScan(index) index2.implem = self.IMPLEM Dnew, Inew = index2.search(ds.get_queries(), 4) verify_with_draws(self, Dref, Iref, Dnew, Inew) stats = faiss.cvar.indexIVF_stats stats.reset() # also verify with single result Dnew, Inew = index2.search(ds.get_queries(), 1) for q in range(len(Dref)): if Dref[q, 1] == Dref[q, 0]: # then we cannot conclude continue self.assertEqual(Iref[q, 0], Inew[q, 0]) np.testing.assert_almost_equal(Dref[q, 0], Dnew[q, 0], decimal=5) self.assertGreater(stats.ndis, 0)
def test_equiv_pq(self): ds = datasets.SyntheticDataset(32, 2000, 200, 4) index = faiss.index_factory(32, "IVF1,PQ16x4np") index.by_residual = False # force coarse quantizer index.quantizer.add(np.zeros((1, 32), dtype='float32')) index.train(ds.get_train()) index.add(ds.get_database()) Dref, Iref = index.search(ds.get_queries(), 4) index_pq = faiss.index_factory(32, "PQ16x4np") index_pq.pq = index.pq index_pq.is_trained = True index_pq.codes = faiss. downcast_InvertedLists( index.invlists).codes.at(0) index_pq.ntotal = index.ntotal Dnew, Inew = index_pq.search(ds.get_queries(), 4) np.testing.assert_array_equal(Iref, Inew) np.testing.assert_array_equal(Dref, Dnew) index_pq2 = faiss.IndexPQFastScan(index_pq) index_pq2.implem = 12 Dref, Iref = index_pq2.search(ds.get_queries(), 4) index2 = faiss.IndexIVFPQFastScan(index) index2.implem = 12 Dnew, Inew = index2.search(ds.get_queries(), 4) np.testing.assert_array_equal(Iref, Inew) np.testing.assert_array_equal(Dref, Dnew)
def eval_quant_loss(self, by_residual, metric=faiss.METRIC_L2): ds = datasets.SyntheticDataset(32, 2000, 5000, 1000) index = faiss.index_factory(32, "IVF32,PQ16x4np", metric) index.train(ds.get_train()) index.add(ds.get_database()) index.nprobe = 4 index.by_residual = by_residual Da, Ia = index.search(ds.get_queries(), 10) # loss due to int8 quantization of LUTs index2 = faiss.IndexIVFPQFastScan(index) index2.implem = 2 Db, Ib = index2.search(ds.get_queries(), 10) m3 = three_metrics(Da, Ia, Db, Ib) # print(by_residual, metric, recall_at_1, recall_at_10, intersection_at_10) ref_results = { (True, 1): [0.985, 1.0, 9.872], (True, 0): [ 0.987, 1.0, 9.914], (False, 1): [0.991, 1.0, 9.907], (False, 0): [0.986, 1.0, 9.917], } ref = ref_results[(by_residual, metric)] self.assertGreaterEqual(m3[0], ref[0] * 0.995) self.assertGreaterEqual(m3[1], ref[1] * 0.995) self.assertGreaterEqual(m3[2], ref[2] * 0.995)
def do_test(self, by_residual=False, metric=faiss.METRIC_L2, d=32, bbs=32): bbs = 32 ds = datasets.SyntheticDataset(d, 2000, 5000, 200) index = faiss.index_factory(d, f"IVF32,PQ{d//2}x4np", metric) index.by_residual = by_residual index.train(ds.get_train()) index.nprobe = 4 xb = ds.get_database() index.add(xb[:1235]) index2 = faiss.IndexIVFPQFastScan(index, bbs) index.add(xb[1235:]) index3 = faiss.IndexIVFPQFastScan(index, bbs) Dref, Iref = index3.search(ds.get_queries(), 10) index2.add(xb[1235:]) Dnew, Inew = index2.search(ds.get_queries(), 10) np.testing.assert_array_equal(Dref, Dnew) np.testing.assert_array_equal(Iref, Inew) # direct verification of code content. Not sure the test is correct # if codes are shuffled. for list_no in range(32): ref_ids, ref_codes = get_invlist(index3.invlists, list_no) new_ids, new_codes = get_invlist(index2.invlists, list_no) self.assertEqual(set(ref_ids), set(new_ids)) new_code_per_id = { new_ids[i]: new_codes[i // bbs, :, i % bbs] for i in range(new_ids.size) } for i, the_id in enumerate(ref_ids): ref_code_i = ref_codes[i // bbs, :, i % bbs] new_code_i = new_code_per_id[the_id] np.testing.assert_array_equal(ref_code_i, new_code_i)
def prepare_trained_index(preproc, coarse_quantizer, ncent, pqflat_str, is_gpu): d = preproc.d_out if pqflat_str == 'Flat': print("making an IVFFlat index") idx_model = faiss.IndexIVFFlat(coarse_quantizer, d, ncent, fmetric) elif 'SQ' in pqflat_str: print("making a SQ index") if fmetric == faiss.METRIC_L2: quantizer = faiss.IndexFlatL2(d) elif fmetric == faiss.METRIC_INNER_PRODUCT: quantizer = faiss.IndexFlatIP(d) if pqflat_str.split("SQ")[1] == "16": name = "QT_fp16" else: name = "QT_" + str(pqflat_str.split("SQ")[1]) + "bit" qtype = getattr(faiss.ScalarQuantizer, name) idx_model = faiss.IndexIVFScalarQuantizer(quantizer, d, ncent, qtype, fmetric) else: key = pqflat_str[2:].split("x") assert len(key) == 2, "use format PQ(m)x(log2kstar)" m, log2kstar = map(int, pqflat_str[2:].split("x")) assert m < 56 or useFloat16, "PQ%d will work only with -float16" % m if log2kstar == 4 and is_gpu == False: idx_model = faiss.IndexIVFPQFastScan(coarse_quantizer, d, ncent, m, log2kstar, fmetric) print( "making an IndexIVFPQFastScan index, m = %d, log2kstar = %d" % (m, log2kstar)) else: idx_model = faiss.IndexIVFPQ(coarse_quantizer, d, ncent, m, log2kstar, fmetric) print("making an IVFPQ index, m = %d, log2kstar = %d" % (m, log2kstar)) coarse_quantizer.this.disown() idx_model.own_fields = True # finish training on CPU t0 = time.time() print("Training vector codes") x = preproc.apply_py(sanitize(xt)) idx_model.train(x) print(" done %.3f s" % (time.time() - t0)) return idx_model
def do_test(self, by_residual, metric_type=faiss.METRIC_L2, use_precomputed_table=0): ds = datasets.SyntheticDataset(32, 2000, 5000, 1000) index = faiss.index_factory(32, "IVF32,PQ16x4np", metric_type) index.use_precomputed_table index.use_precomputed_table = use_precomputed_table index.train(ds.get_train()) index.add(ds.get_database()) index.nprobe = 4 index.by_residual = by_residual Da, Ia = index.search(ds.get_queries(), 10) index2 = faiss.IndexIVFPQFastScan(index) index2.implem = 1 Db, Ib = index2.search(ds.get_queries(), 10) # self.assertLess((Ia != Ib).sum(), Ia.size * 0.005) np.testing.assert_array_equal(Ia, Ib) np.testing.assert_almost_equal(Da, Db, decimal=5)
def do_test(self, by_residual=False, metric=faiss.METRIC_L2, d=32, bbs=32): bbs = 32 ds = datasets.SyntheticDataset(d, 2000, 5000, 200) index = faiss.index_factory(d, f"IVF32,PQ{d//2}x4np", metric) index.by_residual = by_residual index.train(ds.get_train()) index.add(ds.get_database()) index.nprobe = 4 Dref, Iref = index.search(ds.get_queries(), 10) index2 = faiss.IndexIVFPQFastScan( index.quantizer, d, 32, d // 2, 4, metric, bbs) index2.by_residual = by_residual index2.train(ds.get_train()) index2.add(ds.get_database()) index2.nprobe = 4 Dnew, Inew = index2.search(ds.get_queries(), 10) m3 = three_metrics(Dref, Iref, Dnew, Inew) # print((by_residual, metric, d), ":", m3) ref_m3_tab = { (True, 1, 32) : (0.995, 1.0, 9.91), (True, 0, 32) : (0.99, 1.0, 9.91), (True, 1, 30) : (0.99, 1.0, 9.885), (False, 1, 32) : (0.99, 1.0, 9.875), (False, 0, 32) : (0.99, 1.0, 9.92), (False, 1, 30) : (1.0, 1.0, 9.895) } ref_m3 = ref_m3_tab[(by_residual, metric, d)] self.assertGreater(m3[0], ref_m3[0] * 0.99) self.assertGreater(m3[1], ref_m3[1] * 0.99) self.assertGreater(m3[2], ref_m3[2] * 0.99) # Test I/O data = faiss.serialize_index(index2) index3 = faiss.deserialize_index(data) D3, I3 = index3.search(ds.get_queries(), 10) np.testing.assert_array_equal(I3, Inew) np.testing.assert_array_equal(D3, Dnew)