def test_wrapped_quantizer(self):
        d = 256
        nt = 150
        nb = 1500
        nq = 500
        (xt, xb, xq) = make_binary_dataset(d, nb, nt, nq)

        nlist = 16
        quantizer_ref = faiss.IndexBinaryFlat(d)
        index_ref = faiss.IndexBinaryIVF(quantizer_ref, d, nlist)
        index_ref.train(xt)

        index_ref.add(xb)

        unwrapped_quantizer = faiss.IndexFlatL2(d)
        quantizer = faiss.IndexBinaryFromFloat(unwrapped_quantizer)
        index = faiss.IndexBinaryIVF(quantizer, d, nlist)

        index.train(xt)

        index.add(xb)

        D_ref, I_ref = index_ref.search(xq, 10)
        D, I = index.search(xq, 10)

        np.testing.assert_array_equal(D_ref, D)
    def test_wrapped_quantizer_IMI(self):
        d = 256
        nt = 3500
        nb = 10000
        nq = 500
        (xt, xb, xq) = make_binary_dataset(d, nb, nt, nq)

        index_ref = faiss.IndexBinaryFlat(d)

        index_ref.add(xb)

        nlist_exp = 6
        nlist = 2**(2 * nlist_exp)
        float_quantizer = faiss.MultiIndexQuantizer(d, 2, nlist_exp)
        wrapped_quantizer = faiss.IndexBinaryFromFloat(float_quantizer)
        wrapped_quantizer.train(xt)

        assert nlist == float_quantizer.ntotal

        index = faiss.IndexBinaryIVF(wrapped_quantizer, d,
                                     float_quantizer.ntotal)
        index.nprobe = 2048
        assert index.is_trained

        index.add(xb)

        D_ref, I_ref = index_ref.search(xq, 10)
        D, I = index.search(xq, 10)

        recall = sum(gti[0] in Di[:10] for gti, Di in zip(D_ref, D)) \
                 / float(D_ref.shape[0])

        assert recall > 0.82, "recall = %g" % recall
    def test_wrapped_quantizer_HNSW(self):
        faiss.omp_set_num_threads(1)

        def bin2float(v):
            def byte2float(byte):
                return np.array(
                    [-1.0 + 2.0 * (byte & (1 << b) != 0) for b in range(0, 8)])

            return np.hstack([byte2float(byte)
                              for byte in v]).astype('float32')

        def floatvec2nparray(v):
            return np.array([np.float32(v.at(i)) for i in range(0, v.size())]) \
                     .reshape(-1, d)

        d = 256
        nt = 12800
        nb = 10000
        nq = 500
        (xt, xb, xq) = make_binary_dataset(d, nb, nt, nq)

        index_ref = faiss.IndexBinaryFlat(d)

        index_ref.add(xb)

        nlist = 256
        clus = faiss.Clustering(d, nlist)
        clus_index = faiss.IndexFlatL2(d)

        xt_f = np.array([bin2float(v) for v in xt])
        clus.train(xt_f, clus_index)

        centroids = floatvec2nparray(clus.centroids)
        hnsw_quantizer = faiss.IndexHNSWFlat(d, 32)
        hnsw_quantizer.add(centroids)
        hnsw_quantizer.is_trained = True
        wrapped_quantizer = faiss.IndexBinaryFromFloat(hnsw_quantizer)

        assert nlist == hnsw_quantizer.ntotal
        assert nlist == wrapped_quantizer.ntotal
        assert wrapped_quantizer.is_trained

        index = faiss.IndexBinaryIVF(wrapped_quantizer, d,
                                     hnsw_quantizer.ntotal)
        index.nprobe = 128

        assert index.is_trained

        index.add(xb)

        D_ref, I_ref = index_ref.search(xq, 10)
        D, I = index.search(xq, 10)

        recall = sum(gti[0] in Di[:10] for gti, Di in zip(D_ref, D)) \
                 / float(D_ref.shape[0])

        assert recall > 0.77, "recall = %g" % recall
Ejemplo n.º 4
0
    def test_wrapped_quantizer_HNSW(self):
        def bin2float2d(v):
            n, d = v.shape
            vf = ((v.reshape(-1, 1) >> np.arange(8)) & 1).astype("float32")
            vf *= 2
            vf -= 1
            return vf.reshape(n, d * 8)

        d = 256
        nt = 12800
        nb = 10000
        nq = 500
        (xt, xb, xq) = make_binary_dataset(d, nb, nt, nq)

        index_ref = faiss.IndexBinaryFlat(d)

        index_ref.add(xb)

        nlist = 256
        clus = faiss.Clustering(d, nlist)
        clus_index = faiss.IndexFlatL2(d)

        xt_f = bin2float2d(xt)
        clus.train(xt_f, clus_index)

        centroids = faiss.vector_to_array(clus.centroids).reshape(-1, clus.d)
        hnsw_quantizer = faiss.IndexHNSWFlat(d, 32)
        hnsw_quantizer.add(centroids)
        hnsw_quantizer.is_trained = True
        wrapped_quantizer = faiss.IndexBinaryFromFloat(hnsw_quantizer)

        assert nlist == hnsw_quantizer.ntotal
        assert nlist == wrapped_quantizer.ntotal
        assert wrapped_quantizer.is_trained

        index = faiss.IndexBinaryIVF(wrapped_quantizer, d,
                                     hnsw_quantizer.ntotal)
        index.nprobe = 128

        assert index.is_trained

        index.add(xb)

        D_ref, I_ref = index_ref.search(xq, 10)
        D, I = index.search(xq, 10)

        recall = sum(gti[0] in Di[:10] for gti, Di in zip(D_ref, D)) \
                 / float(D_ref.shape[0])

        assert recall >= 0.77, "recall = %g" % recall
    def test_index_from_float(self):
        d = 256
        nt = 0
        nb = 1500
        nq = 500
        (xt, xb, xq) = make_binary_dataset(d, nb, nt, nq)

        index_ref = faiss.IndexFlatL2(d)
        index_ref.add(binary_to_float(xb))

        index = faiss.IndexFlatL2(d)
        index_bin = faiss.IndexBinaryFromFloat(index)
        index_bin.add(xb)

        D_ref, I_ref = index_ref.search(binary_to_float(xq), 10)
        D, I = index_bin.search(xq, 10)

        np.testing.assert_allclose((D_ref / 4.0).astype('int32'), D)
Ejemplo n.º 6
0
    def test_hnsw(self):
        d = self.xq.shape[1] * 8

        # NOTE(hoss): Ensure the HNSW construction is deterministic.
        nthreads = faiss.omp_get_max_threads()
        faiss.omp_set_num_threads(1)

        index_hnsw_float = faiss.IndexHNSWFlat(d, 16)
        index_hnsw_ref = faiss.IndexBinaryFromFloat(index_hnsw_float)

        index_hnsw_bin = faiss.IndexBinaryHNSW(d, 16)

        index_hnsw_ref.add(self.xb)
        index_hnsw_bin.add(self.xb)

        faiss.omp_set_num_threads(nthreads)

        Dref, Iref = index_hnsw_ref.search(self.xq, 3)
        Dbin, Ibin = index_hnsw_bin.search(self.xq, 3)

        self.assertTrue((Dref == Dbin).all())
Ejemplo n.º 7
0
    def test_binary_from_float(self):
        d = self.xq.shape[1] * 8

        float_index = faiss.IndexHNSWFlat(d, 16)
        index = faiss.IndexBinaryFromFloat(float_index)
        index.add(self.xb)
        D, I = index.search(self.xq, 3)

        _, tmpnam = tempfile.mkstemp()

        try:
            faiss.write_index_binary(index, tmpnam)

            index2 = faiss.read_index_binary(tmpnam)

            D2, I2 = index2.search(self.xq, 3)

            assert (I2 == I).all()
            assert (D2 == D).all()

        finally:
            os.remove(tmpnam)
Ejemplo n.º 8
0
 def test_wrap(self):
     index = faiss.IndexBinaryFromFloat(faiss.IndexFlatL2(dbin))
     gc.collect()
     index.add(xbbin)