Ejemplo n.º 1
0
    def test_equiv_sh(self):
        """ make sure that the IVFSpectralHash sa_encode function gives the same
        result as the concatenated RQ + LSH index sa_encode """
        ds = SyntheticDataset(32, 500, 100, 0)
        index1 = faiss.index_factory(ds.d, "RQ1x4,Refine(ITQ16,LSH)")
        index1.train(ds.get_train())

        # reproduce this in an IndexIVFSpectralHash
        coarse_quantizer = faiss.IndexFlat(ds.d)
        rq = faiss.downcast_index(index1.base_index).rq
        centroids = get_additive_quantizer_codebooks(rq)[0]
        coarse_quantizer.add(centroids)

        encoder = faiss.downcast_index(index1.refine_index)

        # larger than the magnitude of the vectors
        # negative because otherwise the bits are flipped
        period = -100000.0

        index2 = faiss.IndexIVFSpectralHash(coarse_quantizer, ds.d,
                                            coarse_quantizer.ntotal,
                                            encoder.sa_code_size() * 8, period)

        # replace with the vt of the encoder. Binarization is performed by
        # the IndexIVFSpectralHash itself
        index2.replace_vt(encoder)

        codes1 = index1.sa_encode(ds.get_database())
        codes2 = index2.sa_encode(ds.get_database())

        np.testing.assert_array_equal(codes1, codes2)
Ejemplo n.º 2
0
    def test_precomp(self):
        ds = datasets.SyntheticDataset(32, 1000, 1000, 0)

        # make sure it work with varying nb of bits
        nbits = faiss.UInt64Vector()
        nbits.push_back(5)
        nbits.push_back(6)
        nbits.push_back(7)

        rq = faiss.ResidualQuantizer(ds.d, nbits)
        rq.train_type = faiss.ResidualQuantizer.Train_default
        rq.train(ds.get_train())

        codebooks = get_additive_quantizer_codebooks(rq)
        precomp = precomp_codebooks(codebooks)
        codebook_cross_prods_ref, cent_norms_ref = precomp

        # check C++ precomp tables
        codebook_cross_prods_ref = np.hstack([
            np.vstack(c) for c in codebook_cross_prods_ref])

        rq.compute_codebook_tables()
        codebook_cross_prods = faiss.vector_to_array(
            rq.codebook_cross_products)
        codebook_cross_prods = codebook_cross_prods.reshape(
            rq.total_codebook_size, rq.total_codebook_size)
        cent_norms = faiss.vector_to_array(rq.cent_norms)

        np.testing.assert_array_almost_equal(
            codebook_cross_prods, codebook_cross_prods_ref, decimal=5)
        np.testing.assert_array_almost_equal(
            np.hstack(cent_norms_ref), cent_norms, decimal=5)

        # validate that the python tab-based encoding works
        xb = ds.get_database()
        ref_codes, _, _ = beam_search_encoding_ref(codebooks, xb, 7)
        new_codes, _ = beam_search_encoding_tab(codebooks, xb, 7, precomp)
        np.testing.assert_array_equal(ref_codes, new_codes)

        # validate the C++ beam_search_encode_step_tab function
        beam_search_encoding_tab(codebooks, xb, 7, precomp, implem="ref cpp")

        # check implem w/ residuals
        n = ref_codes.shape[0]
        sp = faiss.swig_ptr
        ref_codes_packed = np.zeros((n, rq.code_size), dtype='uint8')
        ref_codes_int32 = ref_codes.astype('int32')
        rq.pack_codes(
            n, sp(ref_codes_int32),
            sp(ref_codes_packed), rq.M * ref_codes.shape[1]
        )

        rq.max_beam_size = 7
        codes_ref_residuals = rq.compute_codes(xb)
        np.testing.assert_array_equal(ref_codes_packed, codes_ref_residuals)

        rq.use_beam_LUT = 1
        codes_new = rq.compute_codes(xb)
        np.testing.assert_array_equal(codes_ref_residuals, codes_new)
Ejemplo n.º 3
0
    def test_equiv_rq(self):
        """
        make sure it is equivalent to search a RQ and to search an IVF
        with RCQ + RQ with the same codebooks.
        """
        ds = datasets.SyntheticDataset(32, 3000, 1000, 50)

        # make a flat RQ
        iflat = faiss.IndexResidualQuantizer(ds.d, 5, 4)
        iflat.rq.train_type = faiss.ResidualQuantizer.Train_default
        iflat.train(ds.get_train())
        iflat.add(ds.get_database())

        # ref search result
        Dref, Iref = iflat.search(ds.get_queries(), 10)

        # get its codebooks + encoded version of the dataset
        codebooks = get_additive_quantizer_codebooks(iflat.rq)
        codes = faiss.vector_to_array(iflat.codes).reshape(-1, iflat.code_size)

        # make an IVF with 2x4 + 3x4 = 5x4 bits
        ivf = faiss.index_factory(ds.d, "IVF256(RCQ2x4),RQ3x4")

        # initialize the codebooks
        rcq = faiss.downcast_index(ivf.quantizer)
        faiss.copy_array_to_vector(
            np.vstack(codebooks[:rcq.rq.M]).ravel(),
            rcq.rq.codebooks
        )
        rcq.rq.is_trained = True
        # translation of AdditiveCoarseQuantizer::train
        rcq.ntotal = 1 << rcq.rq.tot_bits
        rcq.centroid_norms.resize(rcq.ntotal)
        rcq.rq.compute_centroid_norms(rcq.centroid_norms.data())
        rcq.is_trained = True

        faiss.copy_array_to_vector(
            np.vstack(codebooks[rcq.rq.M:]).ravel(),
            ivf.rq.codebooks
        )
        ivf.rq.is_trained = True
        ivf.is_trained = True

        # add the codes (this works because 2x4 is a multiple of 8 bits)
        ivf.add_sa_codes(codes)

        # perform exhaustive search
        ivf.nprobe = ivf.nlist

        Dnew, Inew = ivf.search(ds.get_queries(), 10)

        np.testing.assert_array_equal(Iref, Inew)
        np.testing.assert_array_almost_equal(Dref, Dnew, decimal=5)
Ejemplo n.º 4
0
    def test_training_with_limited_mem(self):
        """ make sure a different batch size gives the same result"""
        ds = datasets.SyntheticDataset(32, 3000, 1000, 0)

        xt = ds.get_train()

        rq0 = faiss.ResidualQuantizer(ds.d, 4, 6)
        rq0.train_type = faiss.ResidualQuantizer.Train_default
        rq0.max_beam_size = 5
        # rq0.verbose = True
        rq0.train(xt)
        cb0 = get_additive_quantizer_codebooks(rq0)

        rq1 = faiss.ResidualQuantizer(ds.d, 4, 6)
        rq1.train_type = faiss.ResidualQuantizer.Train_default
        rq1.max_beam_size = 5
        rq1.max_mem_distances
        rq1.max_mem_distances = 3000 * ds.d * 4 * 3
        # rq1.verbose = True
        rq1.train(xt)
        cb1 = get_additive_quantizer_codebooks(rq1)

        for c0, c1 in zip(cb0, cb1):
            self.assertTrue(np.all(c0 == c1))