Python IndexPQ Exemples, faiss.IndexPQ Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_index_accuracy.py Projet : xiongziqi/faiss

    def test_OPQ(self):

        M = 4

        ev = Randu10kUnbalanced()
        d = ev.d
        index = faiss.IndexPQ(d, M, 8)

        res = ev.launch('PQ', index)
        e_pq = ev.evalres(res)

        index_pq = faiss.IndexPQ(d, M, 8)
        opq_matrix = faiss.OPQMatrix(d, M)
        # opq_matrix.verbose = true
        opq_matrix.niter = 10
        opq_matrix.niter_pq = 4
        index = faiss.IndexPreTransform(opq_matrix, index_pq)

        res = ev.launch('OPQ', index)
        e_opq = ev.evalres(res)

        print('e_pq=%s' % e_pq)
        print('e_opq=%s' % e_opq)

        # verify that OPQ better than PQ
        for r in 1, 10, 100:
            assert(e_opq[r] > e_pq[r])

Exemple #2

0

Afficher le fichier

    def test_faiss_to_nanopq(self):
        D, M, Ks = 32, 4, 256
        Nt, Nb, Nq = 2000, 10000, 100
        nbits = int(np.log2(Ks))
        assert nbits == 8
        Xt = np.random.rand(Nt, D).astype(np.float32)
        Xb = np.random.rand(Nb, D).astype(np.float32)
        Xq = np.random.rand(Nq, D).astype(np.float32)

        pq_faiss = faiss.IndexPQ(D, M, nbits)
        pq_faiss.train(x=Xt)
        pq_faiss.add(x=Xb)

        pq_nanopq, Cb_faiss = nanopq.faiss_to_nanopq(pq_faiss=pq_faiss)
        self.assertEqual(pq_nanopq.codewords.shape, (M, Ks, int(D / M)))

        # Encoded results should be same
        Cb_nanopq = pq_nanopq.encode(vecs=Xb)
        self.assertTrue(np.array_equal(Cb_nanopq, Cb_faiss))

        # Search result should be same
        topk = 100
        _, ids1 = pq_faiss.search(x=Xq, k=topk)
        ids2 = np.array(
            [
                np.argsort(pq_nanopq.dtable(query=xq).adist(codes=Cb_nanopq))[:topk]
                for xq in Xq
            ]
        )
        self.assertTrue(np.array_equal(ids1, ids2))

Exemple #3

0

Afficher le fichier

Fichier : index.py Projet : sb1992/FiD

 def __init__(self, vector_sz, n_subquantizers=0, n_bits=8):
     if n_subquantizers > 0:
         self.index = faiss.IndexPQ(vector_sz, n_subquantizers, n_bits,
                                    faiss.METRIC_INNER_PRODUCT)
     else:
         self.index = faiss.IndexFlatIP(vector_sz)
     self.index_id_to_db_id = np.empty((0), dtype=np.int64)

Exemple #4

0

Afficher le fichier

    def test_faiss_nanopq_compare_accuracy(self):
        D, M, Ks = 32, 4, 256
        Nt, Nb, Nq = 20000, 10000, 100
        nbits = int(np.log2(Ks))
        assert nbits == 8
        Xt = np.random.rand(Nt, D).astype(np.float32)
        Xb = np.random.rand(Nb, D).astype(np.float32)
        Xq = np.random.rand(Nq, D).astype(np.float32)

        pq_faiss = faiss.IndexPQ(D, M, nbits)
        pq_faiss.train(x=Xt)
        Cb_faiss = pq_faiss.pq.compute_codes(Xb)
        Xb_faiss_ = pq_faiss.pq.decode(Cb_faiss)

        pq_nanopq = nanopq.PQ(M=M, Ks=Ks)
        pq_nanopq.fit(vecs=Xt)
        Cb_nanopq = pq_nanopq.encode(vecs=Xb)
        Xb_nanopq_ = pq_nanopq.decode(codes=Cb_nanopq)

        # Reconstruction error should be almost identical
        avg_relative_error_faiss = ((Xb - Xb_faiss_) ** 2).sum() / (Xb ** 2).sum()
        avg_relative_error_nanopq = ((Xb - Xb_nanopq_) ** 2).sum() / (Xb ** 2).sum()
        diff_rel = (
            avg_relative_error_faiss - avg_relative_error_nanopq
        ) / avg_relative_error_faiss
        diff_rel = np.sqrt(diff_rel ** 2)
        print("avg_rel_error_faiss:", avg_relative_error_faiss)
        print("avg_rel_error_nanopq:", avg_relative_error_nanopq)
        print("diff rel:", diff_rel)

        self.assertLess(diff_rel, 0.01)

Exemple #5

0

Afficher le fichier

Fichier : test_index_accuracy.py Projet : xiongziqi/faiss

 def test_IndexPQ_ip(self):
     q = faiss.IndexPQ(d, M, nbits_per_index, faiss.METRIC_INNER_PRODUCT)
     res = ev.launch('FLAT / PQ IP', q)
     e = ev.evalres(res)
     # should give 0.070  0.230  0.260
     #(same result as regular PQ on normalized distances)
     assert e[10] > 0.2

Exemple #6

0

Afficher le fichier

def nanopq_to_faiss(pq_nanopq):
    """Convert a :class:`nanopq.PQ` instance to `faiss.IndexPQ <https://github.com/facebookresearch/faiss/blob/master/IndexPQ.h>`_.
    To use this function, `faiss module needs to be installed <https://github.com/facebookresearch/faiss/blob/master/INSTALL.md>`_.

    Args:
        pq_nanopq (nanopq.PQ): An input PQ instance.

    Returns:
        faiss.IndexPQ: A converted PQ instance, with the same codewords to the input.

    """
    assert isinstance(pq_nanopq, PQ), "Error. pq_nanopq must be nanopq.pq"
    assert pq_nanopq.codewords is not None, "Error. pq_nanopq.codewords must have been set beforehand"
    D = pq_nanopq.Ds * pq_nanopq.M
    nbits = {np.uint8: 8, np.uint16: 16, np.uint32: 32}[pq_nanopq.code_dtype]

    pq_faiss = faiss.IndexPQ(D, pq_nanopq.M, nbits)

    for m in range(pq_nanopq.M):
        # Prepare std::vector<float>
        codewords_cpp_m = faiss.FloatVector()

        # Flatten m-th codewords from (Ks, Ds) to (Ks * Ds, ), then copy them to cpp
        faiss.copy_array_to_vector(pq_nanopq.codewords[m].reshape(-1),
                                   codewords_cpp_m)

        # Set the codeword to ProductQuantizer in IndexPQ
        pq_faiss.pq.set_params(centroids=codewords_cpp_m.data(), m=m)

    pq_faiss.is_trained = True

    return pq_faiss

Exemple #7

0

Afficher le fichier

    def pq_train(self, x, m, n_bits):
        d = x.shape[1]

        # Create the index
        self.pq = faiss.IndexPQ(d, m, n_bits)

        # Training
        self.pq.train(x)

Exemple #8

0

Afficher le fichier

Fichier : faiss.py Projet : cynricshu/ann-benchmarks

    def fit(self, X):
        print("start to fit, X.shape[1]=%d, summary=%s" %
              (X.shape[1], self.__str__()))

        index = faiss.IndexPQ(X.shape[1], self._n_M, self._n_bits,
                              faiss.METRIC_L2)
        index.train(X)
        index.add(X)
        self.index = index

Exemple #9

0

Afficher le fichier

Fichier : test_index_accuracy.py Projet : zhujindi/faiss

 def test_polysemous_OOM(self):
     """ this used to cause OOM when training polysemous with large
     nb bits"""
     d = 32
     xt, xb, xq = get_dataset_2(d, 10000, 0, 0)
     index = faiss.IndexPQ(d, M, 13)
     index.do_polysemous_training = True
     index.pq.cp.niter = 0
     index.polysemous_training.max_memory = 128 * 1024 * 1024
     self.assertRaises(RuntimeError, index.train, xt)

Exemple #10

0

Afficher le fichier

    def test_id_remap_idmap(self):
        # reference: index without remapping

        index = faiss.IndexPQ(d, 8, 8)
        k = 10
        index.train(xt)
        index.add(xb)
        _Dref, Iref = index.search(xq, k)

        # try a remapping
        ids = np.arange(nb)[::-1].copy()

        sub_index = faiss.IndexPQ(d, 8, 8)
        index2 = faiss.IndexIDMap(sub_index)

        index2.train(xt)
        index2.add_with_ids(xb, ids)

        _D, I = index2.search(xq, k)

        assert np.all(I == nb - 1 - Iref)

Exemple #11

0

Afficher le fichier

Fichier : test_index_binary.py Projet : zhyq/faiss

    def test_encode_to_binary(self):
        d = 256
        nt = 256
        nb = 1500
        nq = 500
        (xt, xb, xq) = make_binary_dataset(d, nt, nb, nq)
        pq = faiss.ProductQuantizer(d, int(d / 8), 8)

        centroids = binary_to_float(
            np.tile(np.arange(256), int(d / 8)).astype('uint8').reshape(-1, 1))

        faiss.copy_array_to_vector(centroids.ravel(), pq.centroids)
        pq.is_trained = True

        codes = pq.compute_codes(binary_to_float(xb))

        assert np.all(codes == xb)

        indexpq = faiss.IndexPQ(d, int(d / 8), 8)
        indexpq.pq = pq
        indexpq.is_trained = True

        indexpq.add(binary_to_float(xb))
        D, I = indexpq.search(binary_to_float(xq), 3)

        for i in range(nq):
            for j, dj in zip(I[i], D[i]):
                ref_dis = binary_dis(xq[i], xb[j])
                assert 4 * ref_dis == dj

        nlist = 32
        quantizer = faiss.IndexFlatL2(d)
        # pretext class for training
        iflat = faiss.IndexIVFFlat(quantizer, d, nlist)
        iflat.train(binary_to_float(xt))

        indexivfpq = faiss.IndexIVFPQ(quantizer, d, nlist, int(d / 8), 8)

        indexivfpq.pq = pq
        indexivfpq.is_trained = True
        indexivfpq.by_residual = False

        indexivfpq.add(binary_to_float(xb))
        indexivfpq.nprobe = 4

        D, I = indexivfpq.search(binary_to_float(xq), 3)

        for i in range(nq):
            for j, dj in zip(I[i], D[i]):
                ref_dis = binary_dis(xq[i], xb[j])
                assert 4 * ref_dis == dj

Exemple #12

0

Afficher le fichier

    def test_id_remap_idmap(self):
        # reference: index without remapping

        index = faiss.IndexPQ(d, 8, 8)
        k = 10
        index.train(xt)
        index.add(xb)
        _Dref, Iref = index.search(xq, k)

        # try a remapping
        ids = np.arange(nb)[::-1].copy()

        sub_index = faiss.IndexPQ(d, 8, 8)
        index2 = faiss.IndexIDMap(sub_index)

        index2.train(xt)
        index2.add_with_ids(xb, ids)

        # false = do not add 1 to the returned ids (this is done by
        # default to accommodate lua indexing)
        _D, I = index2.search(xq, k)

        assert np.all(I == nb - 1 - Iref)

Exemple #13

0

Afficher le fichier

Fichier : test_index_accuracy.py Projet : xiongziqi/faiss

    def test_IndexPQ_refined(self):
        q = faiss.IndexPQ(d, M, nbits_per_index)
        res = ev.launch('PQ non-refined', q)
        e = ev.evalres(res)
        q.reset()

        rq = faiss.IndexRefineFlat(q)
        res = ev.launch('PQ refined', rq)
        e2 = ev.evalres(res)
        assert e2[10] >= e[10]
        rq.k_factor = 4

        res = ev.launch('PQ refined*4', rq)
        e3 = ev.evalres(res)
        assert e3[10] >= e2[10]

Exemple #14

0

Afficher le fichier

Fichier : test_build_blocks.py Projet : ifeherva/faiss

 def test_rand_vector(self):
     """ test if the smooth_vectors function is reasonably compressible with
     a small PQ """
     x = faiss.rand_smooth_vectors(1300, 32)
     xt = x[:1000]
     xb = x[1000:1200]
     xq = x[1200:]
     _, gt = faiss.knn(xq, xb, 10)
     index = faiss.IndexPQ(32, 4, 4)
     index.train(xt)
     index.add(xb)
     D, I = index.search(xq, 10)
     ninter = faiss.eval_intersection(I, gt)
     # 445 for SyntheticDataset
     self.assertGreater(ninter, 420)
     self.assertLess(ninter, 460)

Exemple #15

0

Afficher le fichier

def qnnd(features_train, features_test, k, m, c):
    # approximated L2 distance with product quantization
    # FAISS library
    # input: features_train, features_test
    # output: anomaly score and test runtime
    #k = 1
    d = features_train.shape[1]
    # building index
    index = faiss.IndexPQ(d, m, c)
    index.train(features_train)
    index.add(features_train)
    # searching nearest neighbour
    start = time.time()
    Dq, Iq = index.search(features_test, k)
    test_runtime = time.time() - start
    anomalyScores = Dq[:, 0]
    return anomalyScores, test_runtime

Exemple #16

0

Afficher le fichier

def main():
    xd = fvecs_read("D:\data\gist\gist_base.fvecs")
    xq = fvecs_read("D:\data\gist\gist_query.fvecs")
    xt = fvecs_read("D:\data\gist\gist_learn.fvecs")


    index = faiss.IndexFlatL2(xd.shape[1])
    index.add(sanitize(xd))

    pqidx = faiss.IndexPQ(xd.shape[1], int(xd.shape[1] / 2), 8)
    pqidx.train(sanitize(xt))
    centroids = faiss.vector_to_array(pqidx.pq.centroids)
    print(centroids.shape)
    centroids = centroids.reshape(int(xd.shape[1] / 2), 256, 2)

    DEFAULT_write("gist_vector.bin", xd)
    DEFAULT_write("gist_query.bin", xq)
    codebooks_write("gist_codebook.bin", centroids)

Exemple #17

0

Afficher le fichier

Fichier : test_index_accuracy.py Projet : PhilipBAdams/faiss-learned-termination-prior-weighted

    def test_polysemous(self):
        index = faiss.IndexPQ(d, M, nbits_per_index)
        index.do_polysemous_training = True
        # reduce nb iterations to speed up training for the test
        index.polysemous_training.n_iter = 50000
        index.polysemous_training.n_redo = 1
        res = ev.launch('normal PQ', index)
        e_baseline = ev.evalres(res)
        index.search_type = faiss.IndexPQ.ST_polysemous

        index.polysemous_ht = int(M / 16. * 58)

        stats = faiss.cvar.indexPQ_stats
        stats.reset()

        res = ev.launch('Polysemous ht=%d' % index.polysemous_ht, index)
        e_polysemous = ev.evalres(res)
        print(e_baseline, e_polysemous, index.polysemous_ht)
        print(stats.n_hamming_pass, stats.ncode)
        # The randu dataset is difficult, so we are not too picky on
        # the results. Here we assert that we have < 10 % loss when
        # computing full PQ on fewer than 20% of the data.
        assert stats.n_hamming_pass < stats.ncode / 5

Exemple #18

0

Afficher le fichier

Fichier : test_index_accuracy.py Projet : xiongziqi/faiss

 def test_IndexPQ(self):
     q = faiss.IndexPQ(d, M, nbits_per_index)
     res = ev.launch('FLAT / PQ L2', q)
     e = ev.evalres(res)
     # should give 0.070  0.230  0.260
     assert e[10] > 0.2

Exemple #19

0

Afficher le fichier

 def __init__(self, d, nlist=100):
     self.index = faiss.IndexPQ(d, 16, 8)
     self.index.nprobe = 10
     self.index2 = faiss.IndexIDMap(self.index)

Exemple #20

0

Afficher le fichier

Fichier : faiss.py Projet : mayankanand007/pyserini

            with open(path) as f_in, open(os.path.join(args.output, 'docid'),
                                          'w') as f_out:
                for line in f_in:
                    info = json.loads(line)
                    docid = info['id']
                    vector = info['vector']
                    f_out.write(f'{docid}\n')
                    vectors.append(vector)
    vectors = np.array(vectors, dtype='float32')
    print(vectors.shape)

    if args.hnsw and args.pq:
        index = faiss.IndexHNSWPQ(args.dim, args.pq_m, args.M)
        index.hnsw.efConstruction = args.efC
        index.metric_type = faiss.METRIC_INNER_PRODUCT
    elif args.hnsw:
        index = faiss.IndexHNSWFlat(args.dim, args.M,
                                    faiss.METRIC_INNER_PRODUCT)
        index.hnsw.efConstruction = args.efC
    elif args.pq:
        index = faiss.IndexPQ(args.dim, args.M, 8, faiss.METRIC_INNER_PRODUCT)
    else:
        index = faiss.IndexFlatIP(args.dim)
    index.verbose = True

    if args.pq:
        index.train(vectors)

    index.add(vectors)
    faiss.write_index(index, os.path.join(args.output, 'index'))

Exemple #21

0

Afficher le fichier

codes = rs.randint(1 << 31, size=nb).astype('uint64')

print("code_size=%d nv=%d %.2f bits" %
      (codec.code_size, codec.nv, np.log2(codec.nv)))
assert codec.code_size == 8

dis = np.empty((nq, k), dtype='float32')
labels = np.empty((nq, k), dtype='int64')

t0 = time.time()
codec.find_nn(codes, xq)
t1 = time.time()

print("time for code_size=%d nq=%d nb=%d: %.3f s (%.3f ms/query)" %
      (codec.code_size, nq, nb, t1 - t0, (t1 - t0) * 1000 / nq))

index = faiss.IndexPQ(dim, 8, 8)

xb = rs.randn(nb, dim).astype('float32')
print("train")
index.train(xb)
print("add")
index.add(xb)

t0 = time.time()
index.search(xq, 1)
t1 = time.time()

print("time for IndexPQ code_size=%d nq=%d nb=%d: %.3f s (%.3f ms/query)" %
      (index.pq.code_size, nq, nb, t1 - t0, (t1 - t0) * 1000 / nq))

Exemple #22

0

Afficher le fichier

Fichier : bench_polysemous_sift1m.py Projet : walkerlala/apppp

#!/usr/bin/env python2

from __future__ import print_function
import time
import numpy as np

import faiss
from datasets import load_sift1M, evaluate

print("load data")
xb, xq, xt, gt = load_sift1M()
nq, d = xq.shape

# index with 16 subquantizers, 8 bit each
index = faiss.IndexPQ(d, 16, 8)
index.do_polysemous_training = True
index.verbose = True

print("train")

index.train(xt)

print("add vectors to index")

index.add(xb)

nt = 1
faiss.omp_set_num_threads(1)

print("PQ baseline", end=' ')

Exemple #23

0

Afficher le fichier

                    for line in f_in:
                        info = json.loads(line)
                        docid = info['id']
                        vector = info['vector']
                        f_out.write(f'{docid}\n')
                        vectors.append(vector)
    vectors = np.array(vectors, dtype='float32')
    print(vectors.shape)

    if args.hnsw and args.pq:
        index = faiss.IndexHNSWPQ(args.dim, args.pq_m, args.M)
        index.hnsw.efConstruction = args.efC
        index.metric_type = faiss.METRIC_INNER_PRODUCT
    elif args.hnsw:
        index = faiss.IndexHNSWFlat(args.dim, args.M,
                                    faiss.METRIC_INNER_PRODUCT)
        index.hnsw.efConstruction = args.efC
    elif args.pq:
        index = faiss.IndexPQ(args.dim, args.pq_m, args.pq_nbits,
                              faiss.METRIC_INNER_PRODUCT)
    else:
        index = faiss.IndexFlatIP(args.dim)
    index.verbose = True

    if args.pq:
        index.train(vectors)

    index.add(vectors)
    print(index.ntotal)
    faiss.write_index(index, os.path.join(args.output, 'index'))

Exemple #24

0

Afficher le fichier

def PQCpu(config):
    print("PQCpu, ", config)
    d = config['dimension']  # dimension
    nb = config['db_size']  # database size
    nq = config['query_num']  # nb of queries
    topk = config['top_k']
    m = config['sub_quantizers']
    nbits = config['bits_per_code']
    search_repeat = 10

    index_list = []
    create_ave_duration = 0
    search_ave_duration = 0

    if config['test_batch_write'] == True:
        batch_write_ave_duration = 0
        batch_write_num = config['write_batch_num']
        batch_write_time = int(nb / config['write_batch_num'])
        print("batch_write_time = ", batch_write_num)
        for i in range(config['db_num']):
            index_pq = faiss.IndexPQ(d, m, nbits)
            batch_write_ave_one_lib = 0
            np.random.seed(66666)
            xb = np.random.random((10000, d)).astype('float32')
            xb[:, 0] += np.arange(10000) / 1000.
            if index_pq.is_trained == False:
                index_pq.train(xb)
            for j in range(batch_write_time):
                np.random.seed(i * batch_write_time + j)
                xb = np.random.random((batch_write_num, d)).astype('float32')
                xb[:, 0] += np.arange(batch_write_num) / 1000.
                begin_time = time.time()
                index_pq.add(xb)
                duration = time.time() - begin_time
                batch_write_ave_one_lib += duration
                batch_write_ave_duration += duration
            print("batch_write_ave_one_lib = ",
                  (batch_write_ave_one_lib / batch_write_time) * 1000 * 1000,
                  " us")
            index_list.append(index_pq)
        print("batch_write_ave_duration = ",
              (batch_write_ave_duration / len(index_list) / batch_write_time) *
              1000 * 1000, " us")

        return index_list

    for i in range(config['db_num']):
        np.random.seed(i)  # make reproducible
        xb = np.random.random((nb, d)).astype('float32')
        xb[:, 0] += np.arange(nb) / 1000.
        begin_time = time.time()

        index_pq = faiss.IndexPQ(d, m, nbits)
        assert not index_pq.is_trained
        index_pq.train(xb)
        assert index_pq.is_trained

        index_pq.add(xb)  # add vectors to the index
        duration = time.time() - begin_time
        create_ave_duration += duration
        index_list.append(index_pq)
    print("craete ave duration = ", create_ave_duration / len(index_list),
          " s")
    if len(index_list) == 0:
        return index_list
    for i in range(len(index_list)):
        for j in range(search_repeat):
            np.random.seed(i * search_repeat + j + config['db_num'])
            xq = np.random.random((nq, d)).astype('float32')
            xq[:, 0] += np.arange(nq) / 1000.
            begin_time = time.time()
            index_list[i].search(xq, topk)  # actual search
            duration = time.time() - begin_time
            search_ave_duration += duration

    print("search index aver time = ",
          search_ave_duration / len(index_list) / search_repeat, " s")

    return index_list

Exemple #25

0

Afficher le fichier

# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from __future__ import print_function
import faiss
from datasets import load_sift1M, evaluate

xb, xq, xt, gt = load_sift1M()
nq, d = xq.shape

k = 32

for nbits in 4, 6, 8, 10, 12:
    index = faiss.IndexPQ(d, 8, nbits)
    index.train(xt)
    index.add(xb)

    t, r = evaluate(index, xq, gt, k)
    print("\t %7.3f ms per query, R@1 %.4f" % (t, r[1]))
    del index

Exemple #26

0

Afficher le fichier

from recall_data import recall_data

# 基本参数
d = 300                 # 向量维数
data_size = 10000       # 数据库大小
k = 50
M = 30
nbits = 8

# 生成测试数据
numpy.random.seed(13)
data = numpy.random.random(size=(data_size, d)).astype('float32')
test_data = recall_data

# 创建索引模型并添加向量
index = faiss.IndexPQ(d, M, nbits)                 # 利用点积作为索引

# 　训练数据
start_time = time.time()
assert not index.is_trained
index.train(data)
assert index.is_trained
print "Train Index Used %.2f sec." % (time.time() - start_time)

# print(index.is_trained)                       # 该索引是否训练过
# print(index.ntotal)                           # 索引容量
start_time = time.time()
index.add(data)                                 # 将数据添加进索引
print "Add vector Used %.2f sec." % (time.time() - start_time)

start_time = time.time()

Exemple #27

0

Afficher le fichier

Fichier : bench_polysemous_1m.py Projet : ljeagle/faiss-quickeradc

def evaluate(dataset, poly, width):

    xt = fvecs_read(dataset+"/learn.fvecs")
    xb = fvecs_read(dataset+"/base.fvecs")
    xq = fvecs_read(dataset+"/query.fvecs")
    #xt = xt[:32000]
    #xb = xb[:32000]
    #xq = xb[:320]
    nq, d = xq.shape

    #print xq.shape
    gt = ivecs_read(dataset+"/groundtruth.ivecs")



    # index with 16 subquantizers, 8 bit each
    index = faiss.IndexPQ(d, width, 8)

    if(poly):
        index.search_type = faiss.IndexPQ.ST_polysemous
        index.do_polysemous_training = True
    else:
        index.do_polysemous_training = False
        index.search_type = faiss.IndexPQ.ST_PQ
    index.verbose=False

    
    index.train(xt)


    index.add(xb)

    nt = 1
    faiss.omp_set_num_threads(1)




    if(poly and width == 8):
        htList =[0,17,19,21,23,25]
    elif(poly and width == 16):
        htList =[0,43,45,47,49,51,53,55,57,59]
    elif(poly and width == 32):
        htList =[0,78,81,84,87,90,93,96,99,102,105,108,111,114,117]
    else:
        htList = [0]

    for ht in htList:
        index.polysemous_ht = ht
        t0 = time.time()
        D, I = index.search(xq, 100)
        t1 = time.time()
        recall_at_1 = (I[:, :1] == gt[:, :1]).sum() / float(nq)
        recall_at_100 = (I[:, :100] == gt[:, :1]).sum() / float(nq)
        if(poly):
             print "Poly ",
        else:
             print "PQ ",
        print width,"x8 tau=", ht," ",      
        print "\t %7.3f ms per query, R@1 %.4f   R@100  %.4f"   % (
                (t1 - t0) * 1000.0 / nq * nt, recall_at_1, recall_at_100)

Exemple #28

0

Afficher le fichier

Fichier : retrieve.py Projet : trungv0/zl_challenge

    logging.info('d = %d, k = %d', d, args.k)

    stn = time.time()
    if args.norm:
        feats_index = feats_index / np.linalg.norm(
            feats_index, axis=1, keepdims=True)
        feats_query = feats_query / np.linalg.norm(
            feats_query, axis=1, keepdims=True)
        logging.info('Feats normalization took %.2f', (time.time() - stn))

    nbits = 2**3

    if args.index_choice == 'flat':
        index = faiss.IndexFlatL2(d)
    elif args.index_choice == 'pq':
        index = faiss.IndexPQ(d, args.M, nbits)
        stt = time.time()
        index.train(feats_index)
        logging.info('Train took %.2f', (time.time() - stt))

    sti = time.time()
    index.add(feats_index)
    logging.info('Index took %.2f', (time.time() - sti))

    sts = time.time()
    D, I = index.search(
        feats_query[:args.limit] if args.limit > 0 else feats_query, args.k)
    logging.info('Search took %.2f', (time.time() - sts))

    results = np.empty_like(I, dtype='S8')
    for i in range(I.shape[0]):