def test_wrapped_quantizer(self): d = 256 nt = 150 nb = 1500 nq = 500 (xt, xb, xq) = make_binary_dataset(d, nb, nt, nq) nlist = 16 quantizer_ref = faiss.IndexBinaryFlat(d) index_ref = faiss.IndexBinaryIVF(quantizer_ref, d, nlist) index_ref.train(xt) index_ref.add(xb) unwrapped_quantizer = faiss.IndexFlatL2(d) quantizer = faiss.IndexBinaryFromFloat(unwrapped_quantizer) index = faiss.IndexBinaryIVF(quantizer, d, nlist) index.train(xt) index.add(xb) D_ref, I_ref = index_ref.search(xq, 10) D, I = index.search(xq, 10) np.testing.assert_array_equal(D_ref, D)
def test_wrapped_quantizer_IMI(self): d = 256 nt = 3500 nb = 10000 nq = 500 (xt, xb, xq) = make_binary_dataset(d, nb, nt, nq) index_ref = faiss.IndexBinaryFlat(d) index_ref.add(xb) nlist_exp = 6 nlist = 2**(2 * nlist_exp) float_quantizer = faiss.MultiIndexQuantizer(d, 2, nlist_exp) wrapped_quantizer = faiss.IndexBinaryFromFloat(float_quantizer) wrapped_quantizer.train(xt) assert nlist == float_quantizer.ntotal index = faiss.IndexBinaryIVF(wrapped_quantizer, d, float_quantizer.ntotal) index.nprobe = 2048 assert index.is_trained index.add(xb) D_ref, I_ref = index_ref.search(xq, 10) D, I = index.search(xq, 10) recall = sum(gti[0] in Di[:10] for gti, Di in zip(D_ref, D)) \ / float(D_ref.shape[0]) assert recall > 0.82, "recall = %g" % recall
def test_wrapped_quantizer_HNSW(self): faiss.omp_set_num_threads(1) def bin2float(v): def byte2float(byte): return np.array( [-1.0 + 2.0 * (byte & (1 << b) != 0) for b in range(0, 8)]) return np.hstack([byte2float(byte) for byte in v]).astype('float32') def floatvec2nparray(v): return np.array([np.float32(v.at(i)) for i in range(0, v.size())]) \ .reshape(-1, d) d = 256 nt = 12800 nb = 10000 nq = 500 (xt, xb, xq) = make_binary_dataset(d, nb, nt, nq) index_ref = faiss.IndexBinaryFlat(d) index_ref.add(xb) nlist = 256 clus = faiss.Clustering(d, nlist) clus_index = faiss.IndexFlatL2(d) xt_f = np.array([bin2float(v) for v in xt]) clus.train(xt_f, clus_index) centroids = floatvec2nparray(clus.centroids) hnsw_quantizer = faiss.IndexHNSWFlat(d, 32) hnsw_quantizer.add(centroids) hnsw_quantizer.is_trained = True wrapped_quantizer = faiss.IndexBinaryFromFloat(hnsw_quantizer) assert nlist == hnsw_quantizer.ntotal assert nlist == wrapped_quantizer.ntotal assert wrapped_quantizer.is_trained index = faiss.IndexBinaryIVF(wrapped_quantizer, d, hnsw_quantizer.ntotal) index.nprobe = 128 assert index.is_trained index.add(xb) D_ref, I_ref = index_ref.search(xq, 10) D, I = index.search(xq, 10) recall = sum(gti[0] in Di[:10] for gti, Di in zip(D_ref, D)) \ / float(D_ref.shape[0]) assert recall > 0.77, "recall = %g" % recall
def test_wrapped_quantizer_HNSW(self): def bin2float2d(v): n, d = v.shape vf = ((v.reshape(-1, 1) >> np.arange(8)) & 1).astype("float32") vf *= 2 vf -= 1 return vf.reshape(n, d * 8) d = 256 nt = 12800 nb = 10000 nq = 500 (xt, xb, xq) = make_binary_dataset(d, nb, nt, nq) index_ref = faiss.IndexBinaryFlat(d) index_ref.add(xb) nlist = 256 clus = faiss.Clustering(d, nlist) clus_index = faiss.IndexFlatL2(d) xt_f = bin2float2d(xt) clus.train(xt_f, clus_index) centroids = faiss.vector_to_array(clus.centroids).reshape(-1, clus.d) hnsw_quantizer = faiss.IndexHNSWFlat(d, 32) hnsw_quantizer.add(centroids) hnsw_quantizer.is_trained = True wrapped_quantizer = faiss.IndexBinaryFromFloat(hnsw_quantizer) assert nlist == hnsw_quantizer.ntotal assert nlist == wrapped_quantizer.ntotal assert wrapped_quantizer.is_trained index = faiss.IndexBinaryIVF(wrapped_quantizer, d, hnsw_quantizer.ntotal) index.nprobe = 128 assert index.is_trained index.add(xb) D_ref, I_ref = index_ref.search(xq, 10) D, I = index.search(xq, 10) recall = sum(gti[0] in Di[:10] for gti, Di in zip(D_ref, D)) \ / float(D_ref.shape[0]) assert recall >= 0.77, "recall = %g" % recall
def test_index_from_float(self): d = 256 nt = 0 nb = 1500 nq = 500 (xt, xb, xq) = make_binary_dataset(d, nb, nt, nq) index_ref = faiss.IndexFlatL2(d) index_ref.add(binary_to_float(xb)) index = faiss.IndexFlatL2(d) index_bin = faiss.IndexBinaryFromFloat(index) index_bin.add(xb) D_ref, I_ref = index_ref.search(binary_to_float(xq), 10) D, I = index_bin.search(xq, 10) np.testing.assert_allclose((D_ref / 4.0).astype('int32'), D)
def test_hnsw(self): d = self.xq.shape[1] * 8 # NOTE(hoss): Ensure the HNSW construction is deterministic. nthreads = faiss.omp_get_max_threads() faiss.omp_set_num_threads(1) index_hnsw_float = faiss.IndexHNSWFlat(d, 16) index_hnsw_ref = faiss.IndexBinaryFromFloat(index_hnsw_float) index_hnsw_bin = faiss.IndexBinaryHNSW(d, 16) index_hnsw_ref.add(self.xb) index_hnsw_bin.add(self.xb) faiss.omp_set_num_threads(nthreads) Dref, Iref = index_hnsw_ref.search(self.xq, 3) Dbin, Ibin = index_hnsw_bin.search(self.xq, 3) self.assertTrue((Dref == Dbin).all())
def test_binary_from_float(self): d = self.xq.shape[1] * 8 float_index = faiss.IndexHNSWFlat(d, 16) index = faiss.IndexBinaryFromFloat(float_index) index.add(self.xb) D, I = index.search(self.xq, 3) _, tmpnam = tempfile.mkstemp() try: faiss.write_index_binary(index, tmpnam) index2 = faiss.read_index_binary(tmpnam) D2, I2 = index2.search(self.xq, 3) assert (I2 == I).all() assert (D2 == D).all() finally: os.remove(tmpnam)
def test_wrap(self): index = faiss.IndexBinaryFromFloat(faiss.IndexFlatL2(dbin)) gc.collect() index.add(xbbin)