コード例 #1
0
def nanopq_to_faiss(pq_nanopq):
    """Convert a :class:`nanopq.PQ` instance to `faiss.IndexPQ <https://github.com/facebookresearch/faiss/blob/master/IndexPQ.h>`_.
    To use this function, `faiss module needs to be installed <https://github.com/facebookresearch/faiss/blob/master/INSTALL.md>`_.

    Args:
        pq_nanopq (nanopq.PQ): An input PQ instance.

    Returns:
        faiss.IndexPQ: A converted PQ instance, with the same codewords to the input.

    """
    assert isinstance(pq_nanopq, PQ), "Error. pq_nanopq must be nanopq.pq"
    assert pq_nanopq.codewords is not None, "Error. pq_nanopq.codewords must have been set beforehand"
    D = pq_nanopq.Ds * pq_nanopq.M
    nbits = {np.uint8: 8, np.uint16: 16, np.uint32: 32}[pq_nanopq.code_dtype]

    pq_faiss = faiss.IndexPQ(D, pq_nanopq.M, nbits)

    for m in range(pq_nanopq.M):
        # Prepare std::vector<float>
        codewords_cpp_m = faiss.FloatVector()

        # Flatten m-th codewords from (Ks, Ds) to (Ks * Ds, ), then copy them to cpp
        faiss.copy_array_to_vector(pq_nanopq.codewords[m].reshape(-1),
                                   codewords_cpp_m)

        # Set the codeword to ProductQuantizer in IndexPQ
        pq_faiss.pq.set_params(centroids=codewords_cpp_m.data(), m=m)

    pq_faiss.is_trained = True

    return pq_faiss
コード例 #2
0
    def do_test_codec(self, nbit):
        pq = faiss.ProductQuantizer(16, 2, nbit)

        # simulate training
        rs = np.random.RandomState(123)
        centroids = rs.rand(2, 1 << nbit, 8).astype('float32')
        faiss.copy_array_to_vector(centroids.ravel(), pq.centroids)

        idx = rs.randint(1 << nbit, size=(100, 2))
        # can be encoded exactly
        x = np.hstack((
            centroids[0, idx[:, 0]],
            centroids[1, idx[:, 1]]
        ))

        # encode / decode
        codes = pq.compute_codes(x)
        xr = pq.decode(codes)
        assert np.all(xr == x)

        # encode w/ external index
        assign_index = faiss.IndexFlatL2(8)
        pq.assign_index = assign_index
        codes2 = np.empty((100, pq.code_size), dtype='uint8')
        pq.compute_codes_with_assign_index(
            faiss.swig_ptr(x), faiss.swig_ptr(codes2), 100)
        assert np.all(codes == codes2)
コード例 #3
0
    def test_serialize_to_vector(self):
        d = 10
        nb = 1000
        nq = 200
        nt = 500
        xt, xb, xq = get_dataset_2(d, nt, nb, nq)

        index = faiss.IndexFlatL2(d)
        index.add(xb)

        Dref, Iref = index.search(xq, 5)

        writer = faiss.VectorIOWriter()
        faiss.write_index(index, writer)

        ar_data = faiss.vector_to_array(writer.data)

        # direct transfer of vector
        reader = faiss.VectorIOReader()
        reader.data.swap(writer.data)

        index2 = faiss.read_index(reader)

        Dnew, Inew = index2.search(xq, 5)
        assert np.all(Dnew == Dref) and np.all(Inew == Iref)

        # from intermediate numpy array
        reader = faiss.VectorIOReader()
        faiss.copy_array_to_vector(ar_data, reader.data)

        index3 = faiss.read_index(reader)

        Dnew, Inew = index3.search(xq, 5)
        assert np.all(Dnew == Dref) and np.all(Inew == Iref)
コード例 #4
0
    def test_equiv_rq(self):
        """
        make sure it is equivalent to search a RQ and to search an IVF
        with RCQ + RQ with the same codebooks.
        """
        ds = datasets.SyntheticDataset(32, 3000, 1000, 50)

        # make a flat RQ
        iflat = faiss.IndexResidualQuantizer(ds.d, 5, 4)
        iflat.rq.train_type = faiss.ResidualQuantizer.Train_default
        iflat.train(ds.get_train())
        iflat.add(ds.get_database())

        # ref search result
        Dref, Iref = iflat.search(ds.get_queries(), 10)

        # get its codebooks + encoded version of the dataset
        codebooks = get_additive_quantizer_codebooks(iflat.rq)
        codes = faiss.vector_to_array(iflat.codes).reshape(-1, iflat.code_size)

        # make an IVF with 2x4 + 3x4 = 5x4 bits
        ivf = faiss.index_factory(ds.d, "IVF256(RCQ2x4),RQ3x4")

        # initialize the codebooks
        rcq = faiss.downcast_index(ivf.quantizer)
        faiss.copy_array_to_vector(
            np.vstack(codebooks[:rcq.rq.M]).ravel(),
            rcq.rq.codebooks
        )
        rcq.rq.is_trained = True
        # translation of AdditiveCoarseQuantizer::train
        rcq.ntotal = 1 << rcq.rq.tot_bits
        rcq.centroid_norms.resize(rcq.ntotal)
        rcq.rq.compute_centroid_norms(rcq.centroid_norms.data())
        rcq.is_trained = True

        faiss.copy_array_to_vector(
            np.vstack(codebooks[rcq.rq.M:]).ravel(),
            ivf.rq.codebooks
        )
        ivf.rq.is_trained = True
        ivf.is_trained = True

        # add the codes (this works because 2x4 is a multiple of 8 bits)
        ivf.add_sa_codes(codes)

        # perform exhaustive search
        ivf.nprobe = ivf.nlist

        Dnew, Inew = ivf.search(ds.get_queries(), 10)

        np.testing.assert_array_equal(Iref, Inew)
        np.testing.assert_array_almost_equal(Dref, Dnew, decimal=5)
コード例 #5
0
ファイル: test_index_binary.py プロジェクト: zhyq/faiss
    def test_encode_to_binary(self):
        d = 256
        nt = 256
        nb = 1500
        nq = 500
        (xt, xb, xq) = make_binary_dataset(d, nt, nb, nq)
        pq = faiss.ProductQuantizer(d, int(d / 8), 8)

        centroids = binary_to_float(
            np.tile(np.arange(256), int(d / 8)).astype('uint8').reshape(-1, 1))

        faiss.copy_array_to_vector(centroids.ravel(), pq.centroids)
        pq.is_trained = True

        codes = pq.compute_codes(binary_to_float(xb))

        assert np.all(codes == xb)

        indexpq = faiss.IndexPQ(d, int(d / 8), 8)
        indexpq.pq = pq
        indexpq.is_trained = True

        indexpq.add(binary_to_float(xb))
        D, I = indexpq.search(binary_to_float(xq), 3)

        for i in range(nq):
            for j, dj in zip(I[i], D[i]):
                ref_dis = binary_dis(xq[i], xb[j])
                assert 4 * ref_dis == dj

        nlist = 32
        quantizer = faiss.IndexFlatL2(d)
        # pretext class for training
        iflat = faiss.IndexIVFFlat(quantizer, d, nlist)
        iflat.train(binary_to_float(xt))

        indexivfpq = faiss.IndexIVFPQ(quantizer, d, nlist, int(d / 8), 8)

        indexivfpq.pq = pq
        indexivfpq.is_trained = True
        indexivfpq.by_residual = False

        indexivfpq.add(binary_to_float(xb))
        indexivfpq.nprobe = 4

        D, I = indexivfpq.search(binary_to_float(xq), 3)

        for i in range(nq):
            for j, dj in zip(I[i], D[i]):
                ref_dis = binary_dis(xq[i], xb[j])
                assert 4 * ref_dis == dj
コード例 #6
0
def assign_beta_2(beta_centroids, x, rfn, Inn):
    _, _, sq = Inn
    if rfn.k == 1:
        return np.zeros(x.shape[0], dtype=int)
    # add dummy dimensions to beta_centroids and x
    all_beta_centroids = np.zeros((rfn.nsq, rfn.k, rfn.M + 1), dtype='float32')
    all_beta_centroids[sq] = beta_centroids
    all_x = np.zeros((len(x), rfn.d), dtype='float32')
    all_x[:, sq * rfn.dsub:(sq + 1) * rfn.dsub] = x
    rfn.codes.clear()
    rfn.ntotal = 0
    faiss.copy_array_to_vector(all_beta_centroids.ravel(), rfn.codebook)
    rfn.add_codes(len(x), faiss.swig_ptr(all_x))
    codes = faiss.vector_to_array(rfn.codes)
    codes = codes.reshape(-1, rfn.nsq)
    return codes[:, sq]
コード例 #7
0
    def test_recons_orthogona_impossible(self):
        lt = faiss.LinearTransform(20, 10, True)
        rs = np.random.RandomState(10)
        A = rs.randn(10 * 20).astype('float32')
        faiss.copy_array_to_vector(A.ravel(), lt.A)
        faiss.copy_array_to_vector(rs.randn(10).astype('float32'), lt.b)

        lt.set_is_orthonormal()
        assert not lt.is_orthonormal

        x = rs.rand(30, 20).astype('float32')
        xt = lt.apply_py(x)
        try:
            xtt = lt.reverse_transform(xt)
        except Exception:
            pass
        else:
            self.assertFalse('should do an exception')
コード例 #8
0
    def test_recons_orthonormal(self):
        lt = faiss.LinearTransform(20, 10, True)
        rs = np.random.RandomState(10)
        A, _ = np.linalg.qr(rs.randn(20, 20))
        A = A[:10].astype('float32')
        faiss.copy_array_to_vector(A.ravel(), lt.A)
        faiss.copy_array_to_vector(rs.randn(10).astype('float32'), lt.b)

        lt.set_is_orthonormal()
        assert lt.is_orthonormal

        x = rs.rand(30, 20).astype('float32')
        xt = lt.apply_py(x)
        xtt = lt.reverse_transform(xt)
        xttt = lt.apply_py(xtt)

        err = ((xt - xttt)**2).sum()

        self.assertGreater(1e-5, err)
コード例 #9
0
    def subtest_add2col(self, xb, xq, index, qname):
        """Test with 2 additional dimensions to take also the non-SIMD
        codepath. We don't retrain anything but add 2 dims to the
        queries, the centroids and the trained ScalarQuantizer.
        """
        nb, d = xb.shape

        d2 = d + 2
        xb2 = self.add2columns(xb)
        xq2 = self.add2columns(xq)

        nlist = index.nlist
        quantizer = faiss.downcast_index(index.quantizer)
        quantizer2 = faiss.IndexFlat(d2, index.metric_type)
        centroids = faiss.vector_to_array(quantizer.xb).reshape(nlist, d)
        centroids2 = self.add2columns(centroids)
        quantizer2.add(centroids2)
        index2 = faiss.IndexIVFScalarQuantizer(
            quantizer2, d2, index.nlist, index.sq.qtype,
            index.metric_type)
        index2.nprobe = 4
        if qname in ('8bit', '4bit'):
            trained = faiss.vector_to_array(index.sq.trained).reshape(2, -1)
            nt = trained.shape[1]
            # 2 lines: vmins and vdiffs
            new_nt = int(nt * d2 / d)
            trained2 = np.hstack((
                trained,
                np.zeros((2, new_nt - nt), dtype='float32')
            ))
            trained2[1, nt:] = 1.0   # set vdiff to 1 to avoid div by 0
            faiss.copy_array_to_vector(trained2.ravel(), index2.sq.trained)
        else:
            index2.sq.trained = index.sq.trained

        index2.is_trained = True
        index2.add(xb2)
        return index2.search(xq2, 10)
コード例 #10
0
    def resume(self, inc, resume_full_path):
        """
        Load previous REMIND model to continue training.
        :param inc: which increment number was saved
        :param resume_full_path: path where weights are saved
        :return: (classifier state dict, latent dict, rehearsal ixs list, class id to item ix dict)
        """

        print(f'\nResuming REMIND model from {resume_full_path}')
        state = torch.load(
            os.path.join(resume_full_path, 'remind_classifier_F_%d.pth' % inc))
        self.classifier_F.load_state_dict(state['model_state_dict'])
        self.optimizer.load_state_dict(state['optimizer_state_dict'])

        # load parameters
        with open(os.path.join(resume_full_path, 'remind_buffer_%d.pkl' % inc),
                  'rb') as f:
            d = pickle.load(f)
        nbits = int(np.log2(self.codebook_size))
        pq = faiss.ProductQuantizer(self.num_channels, self.num_codebooks,
                                    nbits)
        faiss.copy_array_to_vector(d['pq_centroids'].ravel(), pq.centroids)
        return state, d['latent_dict'], d['rehearsal_ixs'], d[
            'class_id_to_item_ix_dict'], pq
コード例 #11
0
    print nrun


if parametersets == ['autotune']:

    ps.n_experiments = args.n_autotune
    ps.min_test_duration = args.min_test_duration

    for kv in args.autotune_max:
        k, vmax = kv.split(':')
        vmax = float(vmax)
        print "limiting %s to %g" % (k, vmax)
        pr = ps.add_range(k)
        values = faiss.vector_to_array(pr.values)
        values = np.array([v for v in values if v < vmax])
        faiss.copy_array_to_vector(values, pr.values)

    for kv in args.autotune_range:
        k, vals = kv.split(':')
        vals = np.fromstring(vals, sep=',')
        print "setting %s to %s" % (k, vals)
        pr = ps.add_range(k)
        faiss.copy_array_to_vector(vals, pr.values)

    # setup the Criterion object: optimize for 1-R@1
    crit = faiss.OneRecallAtRCriterion(nq, 1)

    # by default, the criterion will request only 1 NN
    crit.nnn = 100
    crit.set_groundtruth(None, gt.astype('int64'))
コード例 #12
0
        rfn = faiss.ReconstructFromNeighbors(index_hnsw, k, nsq)
    else:
        print "train beta centroids"
        rfn = faiss.ReconstructFromNeighbors(index_hnsw, args.beta_k,
                                             args.beta_nsq)

        xb_full = vec_transform(sanitize(xb[:args.beta_ntrain]))

        beta_centroids = neighbor_codec.train_beta_codebook(
            rfn, xb_full, niter=args.beta_niter)

        print "  storing", args.beta_centroids
        np.save(args.beta_centroids, beta_centroids)

    faiss.copy_array_to_vector(beta_centroids.ravel(), rfn.codebook)
    index_hnsw.reconstruct_from_neighbors = rfn

    if rfn.k == 1:
        pass  # no codes to take care of
    elif os.path.exists(args.neigh_recons_codes):
        print "loading neigh codes", args.neigh_recons_codes
        codes = np.load(args.neigh_recons_codes)
        assert codes.size == rfn.code_size * index.ntotal
        faiss.copy_array_to_vector(codes.astype('uint8'), rfn.codes)
        rfn.ntotal = index.ntotal
    else:
        print "encoding neigh codes"
        t0 = time.time()

        bs = 1000000 if args.add_bs == -1 else args.add_bs
コード例 #13
0
if ngpu > 0:
    print "moving index to GPU"
    index = faiss.index_cpu_to_all_gpus(index)


clustering = faiss.Clustering(d, args.k)

clustering.verbose = True
clustering.seed = args.seed
clustering.max_points_per_centroid = 10**6
clustering.min_points_per_centroid = 1


for iter0 in range(0, args.niter, args.eval_freq):
    iter1 = min(args.niter, iter0 + args.eval_freq)
    clustering.niter = iter1 - iter0

    if iter0 > 0:
        faiss.copy_array_to_vector(centroids.ravel(), clustering.centroids)

    clustering.train(sanitize(xt), index)
    index.reset()
    centroids = faiss.vector_to_array(clustering.centroids).reshape(args.k, d)
    index.add(centroids)

    _, I = index.search(sanitize(xb), 1)

    error = ((xb - centroids[I.ravel()]) ** 2).sum()

    print "iter1=%d quantization error on test: %.4f" % (iter1, error)
コード例 #14
0
def deserialize_index(data):
    reader = faiss.VectorIOReader()
    faiss.copy_array_to_vector(data, reader.data)
    return faiss.read_index(reader)