def test_shards(self): d = 32 nq = 100 nb = 200 (_, xb, xq) = make_binary_dataset(d, 0, nb, nq) index_ref = faiss.IndexBinaryFlat(d) index_ref.add(xb) Dref, Iref = index_ref.search(xq, 10) nrep = 5 index = faiss.IndexBinaryShards(d) for i in range(nrep): sub_idx = faiss.IndexBinaryFlat(d) sub_idx.add(xb[i * nb // nrep:(i + 1) * nb // nrep]) index.add_shard(sub_idx) D, I = index.search(xq, 10) compare_binary_result_lists(Dref, Iref, D, I) index2 = faiss.IndexBinaryShards(d) for _i in range(nrep): sub_idx = faiss.IndexBinaryFlat(d) index2.add_shard(sub_idx) index2.add(xb) D2, I2 = index2.search(xq, 10) compare_binary_result_lists(Dref, Iref, D2, I2)
def test_replicas(self): d = 32 nq = 100 nb = 200 (_, xb, xq) = make_binary_dataset(d, 0, nb, nq) index_ref = faiss.IndexBinaryFlat(d) index_ref.add(xb) Dref, Iref = index_ref.search(xq, 10) nrep = 5 index = faiss.IndexBinaryReplicas() for _i in range(nrep): sub_idx = faiss.IndexBinaryFlat(d) sub_idx.add(xb) index.addIndex(sub_idx) D, I = index.search(xq, 10) self.assertTrue((Dref == D).all()) self.assertTrue((Iref == I).all()) index2 = faiss.IndexBinaryReplicas() for _i in range(nrep): sub_idx = faiss.IndexBinaryFlat(d) index2.addIndex(sub_idx) index2.add(xb) D2, I2 = index2.search(xq, 10) self.assertTrue((Dref == D2).all()) self.assertTrue((Iref == I2).all())
def __init__(self, *args, **kwargs): unittest.TestCase.__init__(self, *args, **kwargs) d = 32 nt = 0 nb = 1500 nq = 500 (_, self.xb, self.xq) = make_binary_dataset(d, nt, nb, nq)
def __init__(self, *args, **kwargs): unittest.TestCase.__init__(self, *args, **kwargs) d = 32 nt = 200 nb = 1500 nq = 500 (self.xt, self.xb, self.xq) = make_binary_dataset(d, nt, nb, nq) index = faiss.IndexBinaryFlat(d) index.add(self.xb) Dref, Iref = index.search(self.xq, 10) self.Dref = Dref
def test_encode_to_binary(self): d = 256 nt = 256 nb = 1500 nq = 500 (xt, xb, xq) = make_binary_dataset(d, nt, nb, nq) pq = faiss.ProductQuantizer(d, int(d / 8), 8) centroids = binary_to_float( np.tile(np.arange(256), int(d / 8)).astype('uint8').reshape(-1, 1)) faiss.copy_array_to_vector(centroids.ravel(), pq.centroids) pq.is_trained = True codes = pq.compute_codes(binary_to_float(xb)) assert np.all(codes == xb) indexpq = faiss.IndexPQ(d, int(d / 8), 8) indexpq.pq = pq indexpq.is_trained = True indexpq.add(binary_to_float(xb)) D, I = indexpq.search(binary_to_float(xq), 3) for i in range(nq): for j, dj in zip(I[i], D[i]): ref_dis = binary_dis(xq[i], xb[j]) assert 4 * ref_dis == dj nlist = 32 quantizer = faiss.IndexFlatL2(d) # pretext class for training iflat = faiss.IndexIVFFlat(quantizer, d, nlist) iflat.train(binary_to_float(xt)) indexivfpq = faiss.IndexIVFPQ(quantizer, d, nlist, int(d / 8), 8) indexivfpq.pq = pq indexivfpq.is_trained = True indexivfpq.by_residual = False indexivfpq.add(binary_to_float(xb)) indexivfpq.nprobe = 4 D, I = indexivfpq.search(binary_to_float(xq), 3) for i in range(nq): for j, dj in zip(I[i], D[i]): ref_dis = binary_dis(xq[i], xb[j]) assert 4 * ref_dis == dj
def test_hash_and_multihash(self): d = 128 nq = 100 nb = 2000 (_, xb, xq) = make_binary_dataset(d, 0, nb, nq) index_ref = faiss.IndexBinaryFlat(d) index_ref.add(xb) k = 10 Dref, Iref = index_ref.search(xq, k) nfound = {} for nh in 0, 1, 3, 5: for nbit in 4, 7: if nh == 0: index = faiss.IndexBinaryHash(d, nbit) else: index = faiss.IndexBinaryMultiHash(d, nh, nbit) index.add(xb) index.nflip = 2 Dnew, Inew = index.search(xq, k) nf = 0 for i in range(nq): ref = Iref[i] new = Inew[i] snew = set(new) # no duplicates self.assertTrue(len(new) == len(snew)) nf += len(set(ref) & snew) print('nfound', nh, nbit, nf) nfound[(nh, nbit)] = nf self.assertGreater(nfound[(nh, 4)], nfound[(nh, 7)]) # test serialization index2 = faiss.deserialize_index_binary( faiss.serialize_index_binary(index)) D2, I2 = index2.search(xq, k) np.testing.assert_array_equal(Inew, I2) np.testing.assert_array_equal(Dnew, D2) print('nfound=', nfound) self.assertGreater(3, abs(nfound[(0, 7)] - nfound[(1, 7)])) self.assertGreater(nfound[(3, 7)], nfound[(1, 7)]) self.assertGreater(nfound[(5, 7)], nfound[(3, 7)])
def subtest_result_order(self, nh): d = 128 nq = 10 nb = 200 (_, xb, xq) = make_binary_dataset(d, 0, nb, nq) nbit = 10 if nh == 0: index = faiss.IndexBinaryHash(d, nbit) else: index = faiss.IndexBinaryMultiHash(d, nh, nbit) index.add(xb) index.nflip = 5 k = 10 Do, Io = index.search(xq, k) self.assertTrue(np.all(Do[:, 1:] >= Do[:, :-1]))
def test_replicas(self): d = 32 nq = 100 nb = 200 (_, xb, xq) = make_binary_dataset(d, 0, nb, nq) index_ref = faiss.IndexBinaryFlat(d) index_ref.add(xb) Dref, Iref = index_ref.search(xq, 10) # there is a OpenMP bug in this configuration, so disable threading if sys.platform == "darwin" and "Clang 12" in sys.version: nthreads = faiss.omp_get_max_threads() faiss.omp_set_num_threads(1) else: nthreads = None nrep = 5 index = faiss.IndexBinaryReplicas() for _i in range(nrep): sub_idx = faiss.IndexBinaryFlat(d) sub_idx.add(xb) index.addIndex(sub_idx) D, I = index.search(xq, 10) self.assertTrue((Dref == D).all()) self.assertTrue((Iref == I).all()) index2 = faiss.IndexBinaryReplicas() for _i in range(nrep): sub_idx = faiss.IndexBinaryFlat(d) index2.addIndex(sub_idx) index2.add(xb) D2, I2 = index2.search(xq, 10) if nthreads is not None: faiss.omp_set_num_threads(nthreads) self.assertTrue((Dref == D2).all()) self.assertTrue((Iref == I2).all())
def test_multihash(self): d = 128 nq = 100 nb = 2000 (_, xb, xq) = make_binary_dataset(d, 0, nb, nq) index_ref = faiss.IndexBinaryFlat(d) index_ref.add(xb) radius = 55 Lref, Dref, Iref = index_ref.range_search(xq, radius) print("nb res: ", Lref[-1]) nfound = [] ndis = [] for nh in 1, 3, 5: index = faiss.IndexBinaryMultiHash(d, nh, 10) index.add(xb) # index.display() stats = faiss.cvar.indexBinaryHash_stats index.nflip = 2 stats.reset() Lnew, Dnew, Inew = index.range_search(xq, radius) for i in range(nq): ref = Iref[Lref[i]:Lref[i + 1]] new = Inew[Lnew[i]:Lnew[i + 1]] snew = set(new) # no duplicates self.assertTrue(len(new) == len(snew)) # subset of real results self.assertTrue(snew <= set(ref)) nfound.append(Lnew[-1]) ndis.append(stats.ndis) print('nfound=', nfound) print('ndis=', ndis) nfound = np.array(nfound) # self.assertTrue(nfound[-1] == Lref[-1]) self.assertTrue(np.all(nfound[1:] >= nfound[:-1]))