Esempio n. 1
0
    def test_equiv_rcq_rq(self):
        """ make sure that the codes generated by the standalone codec are the same
        between an
           IndexRefine with ResidualQuantizer
        and
           IVF with ResidualCoarseQuantizer
        both are the centroid id concatenated with the code.
        """
        ds = SyntheticDataset(16, 400, 100, 0)
        index1 = faiss.index_factory(ds.d, "RQ2x3,Refine(Flat)")
        index1.train(ds.get_train())
        irq = faiss.downcast_index(index1.base_index)
        # because the default beam factor for RCQ is 4
        irq.rq.max_beam_size = 4

        index2 = faiss.index_factory(ds.d, "IVF64(RCQ2x3),Flat")
        index2.train(ds.get_train())
        quantizer = faiss.downcast_index(index2.quantizer)
        quantizer.rq = irq.rq
        index2.is_trained = True

        codes1 = index1.sa_encode(ds.get_database())
        codes2 = index2.sa_encode(ds.get_database())

        np.testing.assert_array_equal(codes1, codes2)
Esempio n. 2
0
    def test_factory_NSG(self):
        index = faiss.index_factory(12, "NSG64")
        assert isinstance(index, faiss.IndexNSGFlat)
        assert index.nsg.R == 64

        index = faiss.index_factory(12, "NSG64", faiss.METRIC_INNER_PRODUCT)
        assert isinstance(index, faiss.IndexNSGFlat)
        assert index.nsg.R == 64
        assert index.metric_type == faiss.METRIC_INNER_PRODUCT

        index = faiss.index_factory(12, "NSG64,Flat")
        assert isinstance(index, faiss.IndexNSGFlat)
        assert index.nsg.R == 64

        index = faiss.index_factory(12, "IVF65536_NSG64,Flat")
        index_nsg = faiss.downcast_index(index.quantizer)
        assert isinstance(index, faiss.IndexIVFFlat)
        assert isinstance(index_nsg, faiss.IndexNSGFlat)
        assert index.nlist == 65536 and index_nsg.nsg.R == 64

        index = faiss.index_factory(12, "IVF65536_NSG64,PQ2x8")
        index_nsg = faiss.downcast_index(index.quantizer)
        assert isinstance(index, faiss.IndexIVFPQ)
        assert isinstance(index_nsg, faiss.IndexNSGFlat)
        assert index.nlist == 65536 and index_nsg.nsg.R == 64
        assert index.pq.M == 2 and index.pq.nbits == 8
Esempio n. 3
0
def check_index_recon(embeds_path,
                      index_or_index_path,
                      embeds_format='labeled_numpy',
                      sort=True,
                      **kwargs):
    index = faiss.read_index(index_or_index_path) if isinstance(
        index_or_index_path, str) else index_or_index_path
    faiss.downcast_index(index).make_direct_map()
    embeds_list, _ = load_embeds(embeds_path=embeds_path,
                                 format=embeds_format,
                                 sort=sort,
                                 **kwargs)

    # tic("Gathering targets ...")
    # all_tgt_embeds = []
    # for file_path in Tqdm.tqdm(embeds_paths):
    #     embeds_group, batch_group = pickle_load(file_path)
    #     for embeds, batch in zip(embeds_group[embeds_key], batch_group):
    #         all_tgt_embeds.append(embeds[embeds_idx])
    #
    # toc("Done!")

    tic("Checking embedding reconstruction difference ...")
    all_embeds = np.concatenate(embeds_list)
    all_embeds_recon = index.reconstruct_n(0, len(all_embeds))
    embeds_diff = np.linalg.norm(all_embeds - all_embeds_recon)
    toc("Passed embedding reconstruction difference check.") \
        if embeds_diff == 0 else toc(f"Embedding reconstruction difference: {embeds_diff}.")
Esempio n. 4
0
def reverse_index_factory(index):
    """
    attempts to get the factory string the index was built with
    """
    index = faiss.downcast_index(index)
    if isinstance(index, faiss.IndexFlat):
        return "Flat"
    if isinstance(index, faiss.IndexIVF):
        quantizer = faiss.downcast_index(index.quantizer)

        if isinstance(quantizer, faiss.IndexFlat):
            prefix = "IVF%d" % index.nlist
        elif isinstance(quantizer, faiss.MultiIndexQuantizer):
            prefix = "IMI%dx%d" % (quantizer.pq.M, quantizer.pq.nbit)
        elif isinstance(quantizer, faiss.IndexHNSW):
            prefix = "IVF%d_HNSW%d" % (index.nlist, quantizer.hnsw.M)
        else:
            prefix = "IVF%d(%s)" % (index.nlist, reverse_index_factory(quantizer))

        if isinstance(index, faiss.IndexIVFFlat):
            return prefix + ",Flat"
        if isinstance(index, faiss.IndexIVFScalarQuantizer):
            return prefix + ",SQ8"

    raise NotImplementedError()
Esempio n. 5
0
    def test_equiv_sh(self):
        """ make sure that the IVFSpectralHash sa_encode function gives the same
        result as the concatenated RQ + LSH index sa_encode """
        ds = SyntheticDataset(32, 500, 100, 0)
        index1 = faiss.index_factory(ds.d, "RQ1x4,Refine(ITQ16,LSH)")
        index1.train(ds.get_train())

        # reproduce this in an IndexIVFSpectralHash
        coarse_quantizer = faiss.IndexFlat(ds.d)
        rq = faiss.downcast_index(index1.base_index).rq
        centroids = get_additive_quantizer_codebooks(rq)[0]
        coarse_quantizer.add(centroids)

        encoder = faiss.downcast_index(index1.refine_index)

        # larger than the magnitude of the vectors
        # negative because otherwise the bits are flipped
        period = -100000.0

        index2 = faiss.IndexIVFSpectralHash(coarse_quantizer, ds.d,
                                            coarse_quantizer.ntotal,
                                            encoder.sa_code_size() * 8, period)

        # replace with the vt of the encoder. Binarization is performed by
        # the IndexIVFSpectralHash itself
        index2.replace_vt(encoder)

        codes1 = index1.sa_encode(ds.get_database())
        codes2 = index2.sa_encode(ds.get_database())

        np.testing.assert_array_equal(codes1, codes2)
Esempio n. 6
0
    def test_nprobe_4(self):
        index = faiss.index_factory(32, "PCAR32,IVF32,SQ8,RFlat")
        ps = faiss.ParameterSpace()

        ps.set_index_parameter(index, "nprobe", 5)
        index2 = faiss.downcast_index(index.base_index)
        index2 = faiss.downcast_index(index2.index)
        self.assertEqual(index2.nprobe, 5)
Esempio n. 7
0
def unwind_index_ivf(index):
    if isinstance(index, faiss.IndexPreTransform):
        assert index.chain.size() == 1
        vt = index.chain.at(0)
        index_ivf, vt2 = unwind_index_ivf(faiss.downcast_index(index.index))
        assert vt2 is None
        return index_ivf, vt
    if hasattr(faiss, "IndexRefine") and isinstance(index, faiss.IndexRefine):
        return unwind_index_ivf(faiss.downcast_index(index.base_index))
    if isinstance(index, faiss.IndexIVF):
        return index, None
    else:
        return None, None
Esempio n. 8
0
def search_single_scan(index, xq, k, bs=128):
    """performs a search so that the inverted lists are accessed
    sequentially by blocks of size bs"""

    # handle pretransform
    if isinstance(index, faiss.IndexPreTransform):
        xq = index.apply_py(xq)
        index = faiss.downcast_index(index.index)

    # coarse assignment
    nprobe = min(index.nprobe, index.nlist)
    coarse_dis, assign = index.quantizer.search(xq, nprobe)
    nlist = index.nlist
    assign_buckets = assign // bs
    nq = len(xq)

    rh = faiss.ResultHeap(nq, k)
    index.parallel_mode |= index.PARALLEL_MODE_NO_HEAP_INIT

    for l0 in range(0, nlist, bs):
        bucket_no = l0 // bs
        skip_rows, skip_cols = np.where(assign_buckets != bucket_no)
        sub_assign = assign.copy()
        sub_assign[skip_rows, skip_cols] = -1

        index.search_preassigned(nq, faiss.swig_ptr(xq), k,
                                 faiss.swig_ptr(sub_assign),
                                 faiss.swig_ptr(coarse_dis),
                                 faiss.swig_ptr(rh.D), faiss.swig_ptr(rh.I),
                                 False, None)

    rh.finalize()

    return rh.D, rh.I
Esempio n. 9
0
    def test_nsg_sq(self):
        """Test IndexNSGSQ"""
        d = self.xq.shape[1]
        R = 32
        index = faiss.index_factory(d, f"NSG{R}_SQ8")
        assert isinstance(index, faiss.IndexNSGSQ)
        idxsq = faiss.downcast_index(index.storage)
        assert index.nsg.R == R
        assert idxsq.sq.qtype == faiss.ScalarQuantizer.QT_8bit

        flat_index = faiss.IndexFlat(d)
        flat_index.add(self.xb)
        Dref, Iref = flat_index.search(self.xq, k=1)

        index.train(self.xb)
        index.add(self.xb)
        D, I = index.search(self.xq, k=1)

        # test accuracy
        recalls = (Iref == I).sum()
        print("IndexNSGSQ", recalls)
        self.assertGreaterEqual(recalls, 405)  # 411

        # test I/O
        self.subtest_io_and_clone(index, D, I)
Esempio n. 10
0
    def test_nsg_pq(self):
        """Test IndexNSGPQ"""
        d = self.xq.shape[1]
        R, pq_M = 32, 4
        index = faiss.index_factory(d, f"NSG{R}_PQ{pq_M}")
        assert isinstance(index, faiss.IndexNSGPQ)
        idxpq = faiss.downcast_index(index.storage)
        assert index.nsg.R == R and idxpq.pq.M == pq_M

        flat_index = faiss.IndexFlat(d)
        flat_index.add(self.xb)
        Dref, Iref = flat_index.search(self.xq, k=1)

        index.GK = 32
        index.train(self.xb)
        index.add(self.xb)
        D, I = index.search(self.xq, k=1)

        # test accuracy
        recalls = (Iref == I).sum()
        print("IndexNSGPQ", recalls)
        self.assertGreaterEqual(recalls, 190)  # 193

        # test I/O
        self.subtest_io_and_clone(index, D, I)
Esempio n. 11
0
 def query(self, vecs, topk, param):
     #self.index.nprobe = param["nprobe"]
     #faiss.omp_set_num_threads(1)  # Make sure this is on a single thread mode
     for n_gpu in range(self.index.count()):
         faiss.downcast_index(self.index.at(n_gpu)).nprobe = param["nprobe"]
     _, ids = self.index.search(x=vecs, k=topk)
     return ids
Esempio n. 12
0
def train_ivf_index_with_2level(index, xt, **args):
    """
    Applies 2-level clustering to an index_ivf embedded in an index.
    """
    # handle PreTransforms
    index = faiss.downcast_index(index)
    if isinstance(index, faiss.IndexPreTransform):
        for i in range(index.chain.size()):
            vt = index.chain.at(i)
            vt.train(xt)
            xt = vt.apply(xt)
        train_ivf_index_with_2level(index.index, xt)
        index.is_trained = True
        return
    assert isinstance(index, faiss.IndexIVF)
    assert index.metric_type == faiss.METRIC_L2
    # now do 2-level clustering
    nc1 = int(np.sqrt(index.nlist))
    cc = np.arange(nc1 + 1) * index.nlist // nc1
    all_nc2 = cc[1:] - cc[:-1]
    centroids, _ = two_level_clustering(xt, nc1, all_nc2, **args)
    index.quantizer.train(centroids)
    index.quantizer.add(centroids)
    # finish training
    index.train(xt)
Esempio n. 13
0
def search(img, k=10, nprobe=10):
    """Find k near neighbours in index
    
    Arguments:
        img {PIL.Image} -- Face
    
    Keyword Arguments:
        k {int} -- Count of nearest neighbours (default: {10})
        nprobe {int} -- How many nearest clusters to scan (read in Faiss docs) (default: {10})
    
    Returns:
        np.array -- k distances
        np.array -- k indices
    """
    backbone = ResNet_50([112, 112])
    pth = os.path.join(settings.BACKBONE_DIR,
                       BACKBONE_FILE)  # Pretrained backbone for ResNet50

    index = faiss.read_index(os.path.join(DATASET_PATH,
                                          DATASET_INDEX))  # load index
    index_ivf = faiss.downcast_index(index.index)
    index_ivf.nprobe = nprobe  # change nprobe

    query = np.array(extract_one_embedding(img, backbone,
                                           pth)).astype('float32').reshape(
                                               1, -1)
    D, I = index.search(query, k)
    return D[0], I[0]
Esempio n. 14
0
 def nearest(self, vector=None, n=12, nprobe=16):
     logging.info("Index size {} with {} loaded entries in {}".format(
         self.faiss_index.ntotal, len(self.loaded_entries), self.name))
     if type(self.faiss_index) == faiss.swigfaiss.IndexPreTransform:
         index_ivf = faiss.downcast_index(self.faiss_index.index)
         index_ivf.nprobe = nprobe
     else:
         self.faiss_index.nprobe = nprobe
     vector = np.atleast_2d(vector)
     if vector.shape[-1] != self.faiss_index.d:
         vector = vector.T
     results = []
     dist, ids = self.faiss_index.search(vector, n)
     for i, k in enumerate(ids[0]):
         if k >= 0:
             index_entry = sorted(self.tree[k])[0]
             temp = {
                 'rank': i + 1,
                 'algo': self.name,
                 'dist': float(dist[0, i]),
                 'indexentries_pk': index_entry.data,
                 'offset': k - index_entry.begin
             }
             results.append(temp)
     return results
Esempio n. 15
0
def compute_populated_index(preproc):
    """Add elements to a sharded index. Return the index and if available
    a sharded gpu_index that contains the same data. """

    indexall = prepare_trained_index(preproc)

    co = faiss.GpuMultipleClonerOptions()
    co.useFloat16 = use_float16
    co.useFloat16CoarseQuantizer = False
    co.usePrecomputed = use_precomputed_tables
    co.indicesOptions = faiss.INDICES_CPU
    co.verbose = True
    co.reserveVecs = max_add if max_add > 0 else xb.shape[0]
    co.shard = True
    assert co.shard_type in (0, 1, 2)
    vres, vdev = make_vres_vdev()
    gpu_index = faiss.index_cpu_to_gpu_multiple(vres, vdev, indexall, co)

    print("add...")
    t0 = time.time()
    nb = xb.shape[0]
    for i0, xs in dataset_iterator(xb, preproc, add_batch_size):
        i1 = i0 + xs.shape[0]
        gpu_index.add_with_ids(xs, np.arange(i0, i1))
        if max_add > 0 and gpu_index.ntotal > max_add:
            print("Flush indexes to CPU")
            for i in range(ngpu):
                index_src_gpu = faiss.downcast_index(gpu_index.at(i))
                index_src = faiss.index_gpu_to_cpu(index_src_gpu)
                print("  index %d size %d" % (i, index_src.ntotal))
                index_src.copy_subset_to(indexall, 0, 0, nb)
                index_src_gpu.reset()
                index_src_gpu.reserveMemory(max_add)
            gpu_index.sync_with_shard_indexes()

        print('\r%d/%d (%.3f s)  ' % (i0, nb, time.time() - t0), end=' ')
        sys.stdout.flush()
    print("Add time: %.3f s" % (time.time() - t0))

    print("Aggregate indexes to CPU")
    t0 = time.time()

    if hasattr(gpu_index, 'at'):
        # it is a sharded index
        for i in range(ngpu):
            index_src = faiss.index_gpu_to_cpu(gpu_index.at(i))
            print("  index %d size %d" % (i, index_src.ntotal))
            index_src.copy_subset_to(indexall, 0, 0, nb)
    else:
        # simple index
        index_src = faiss.index_gpu_to_cpu(gpu_index)
        index_src.copy_subset_to(indexall, 0, 0, nb)

    print("  done in %.3f s" % (time.time() - t0))

    if max_add > 0:
        # it does not contain all the vectors
        gpu_index = None

    return gpu_index, indexall
Esempio n. 16
0
def _load_ann_index(index_filename: str, device: int) -> faiss.Index:
    """
    Load the ANN index from the given file and move it to the GPU(s).

    Parameters
    ----------
    index_filename : str
        The ANN index filename.

    Returns
    -------
    faiss.Index
        The Faiss `Index`.
    """
    # https://github.com/facebookresearch/faiss/blob/2cce2e5f59a5047aa9a1729141e773da9bec6b78/benchs/bench_gpu_1bn.py#L608
    # logger.debug('Load the ANN index from file %s', index_filename)
    index_cpu = faiss.read_index(index_filename)
    res = faiss.StandardGpuResources()
    co = faiss.GpuClonerOptions()
    co.useFloat16 = True
    co.useFloat16CoarseQuantizer = False
    co.indicesOptions = faiss.INDICES_CPU
    co.reserveVecs = index_cpu.ntotal
    index = faiss.index_cpu_to_gpu(res, device, index_cpu, co)
    if hasattr(index, 'at'):
        for i in range(index.count()):
            simple_index = faiss.downcast_index(index.at(i))
            simple_index.nprobe = min(math.ceil(simple_index.nlist / 2),
                                      config.num_probe)
    else:
        index.nprobe = min(math.ceil(index.nlist / 2), config.num_probe)
    return index
Esempio n. 17
0
    def test_ivfsq(self):
        ds = datasets.SyntheticDataset(32, 3000, 1000, 100)

        xt = ds.get_train()
        xb = ds.get_database()

        gt = ds.get_groundtruth(1)

        # RQ 2x5 = 10 bits = 1024 centroids
        index = faiss.index_factory(ds.d, "IVF1024(RCQ2x5),SQ8")
        quantizer = faiss.downcast_index(index.quantizer)
        rq = quantizer.rq
        rq.train_type = faiss.ResidualQuantizer.Train_default

        index.train(xt)
        index.add(xb)

        # make sure that increasing the nprobe increases accuracy

        index.nprobe = 10
        D, I = index.search(ds.get_queries(), 10)
        r10 = (I == gt[None, :]).sum() / ds.nq

        index.nprobe = 40
        D, I = index.search(ds.get_queries(), 10)
        r40 = (I == gt[None, :]).sum() / ds.nq

        self.assertGreater(r40, r10)
Esempio n. 18
0
    def set_nprobe(self, nprobe) -> int:
        """Set the value of nprobe.

        Args:
            nprobe: The new value for nprobe
        """
        faiss.ParameterSpace().set_index_parameter(self.index, "nprobe",
                                                   nprobe)
        return faiss.downcast_index(self.index).nprobe
 def flushGPUIndex(self):
     for i in range(self.ngpu):
         if self.ngpu > 1:
             index_src_gpu = faiss.downcast_index(self.gpu_index.at(i))
         else:
             index_src_gpu = faiss.downcast_index(self.gpu_index)
         index_src = faiss.index_gpu_to_cpu(index_src_gpu)
         # print("  index %d size %d" % (i, index_src.ntotal))
         if self.index is None:
             self.index = faiss.read_index(self.emptyIndexPath)
         index_src.copy_subset_to(
             self.index, 0, 0,
             self.totalImagesToIndex * self.feats_per_file)
         index_src_gpu.reset()
         index_src.reset()
         index_src_gpu.reserveMemory(self.max_add)
     if self.ngpu > 1:
         self.gpu_index.sync_with_shard_indexes()
Esempio n. 20
0
    def test_factory_3(self):

        index = faiss.index_factory(12, "IVF10,PQ4")
        faiss.ParameterSpace().set_index_parameter(index, "nprobe", 3)
        assert index.nprobe == 3

        index = faiss.index_factory(12, "PCAR8,IVF10,PQ4")
        faiss.ParameterSpace().set_index_parameter(index, "nprobe", 3)
        assert faiss.downcast_index(index.index).nprobe == 3
Esempio n. 21
0
    def test_factory_3(self):

        index = faiss.index_factory(12, "IVF10,PQ4")
        faiss.ParameterSpace().set_index_parameter(index, "nprobe", 3)
        assert index.nprobe == 3

        index = faiss.index_factory(12, "PCAR8,IVF10,PQ4")
        faiss.ParameterSpace().set_index_parameter(index, "nprobe", 3)
        assert faiss.downcast_index(index.index).nprobe == 3
Esempio n. 22
0
def compute_populated_index(preproc):
    """Add elements to a sharded index. Return the index and if available
    a sharded gpu_index that contains the same data. """

    indexall = prepare_trained_index(preproc)

    co = faiss.GpuMultipleClonerOptions()
    co.useFloat16 = use_float16
    co.useFloat16CoarseQuantizer = False
    co.usePrecomputed = use_precomputed_tables
    co.indicesOptions = faiss.INDICES_CPU
    co.verbose = 10
    co.reserveVecs = max_add if max_add > 0 else xb.shape[0]
    co.shard = True

    vres, vdev = make_vres_vdev()
    gpu_index = faiss.index_cpu_to_gpu_multiple(
        vres, vdev, indexall, co)

    print "add..."
    t0 = time.time()
    nb = xb.shape[0]
    for i0, xs in dataset_iterator(xb, preproc, add_batch_size):
        i1 = i0 + xs.shape[0]
        gpu_index.add_with_ids(xs, np.arange(i0, i1))
        if max_add > 0 and gpu_index.ntotal > max_add:
            print "Flush indexes to CPU"
            for i in range(ngpu):
                index_src_gpu = faiss.downcast_index(gpu_index.at(i))
                index_src = faiss.index_gpu_to_cpu(index_src_gpu)
                print "  index %d size %d" % (i, index_src.ntotal)
                index_src.copy_subset_to(indexall, 0, 0, nb)
                index_src_gpu.reset()
                index_src_gpu.reserveMemory(max_add)
            gpu_index.sync_with_shard_indexes()

        print '\r%d/%d (%.3f s)  ' % (
            i0, nb, time.time() - t0),
        sys.stdout.flush()
    print "Add time: %.3f s" % (time.time() - t0)

    print "Aggregate indexes to CPU"
    t0 = time.time()

    for i in range(ngpu):
        index_src = faiss.index_gpu_to_cpu(gpu_index.at(i))
        print "  index %d size %d" % (i, index_src.ntotal)
        index_src.copy_subset_to(indexall, 0, 0, nb)

    print "  done in %.3f s" % (time.time() - t0)

    if max_add > 0:
        # it does not contain all the vectors
        gpu_index = None

    return gpu_index, indexall
Esempio n. 23
0
 def test_factory_HNSW_newstyle(self):
     index = faiss.index_factory(12, "HNSW32,Flat")
     assert index.storage.sa_code_size() == 12 * 4
     index = faiss.index_factory(12, "HNSW32,SQ8", faiss.METRIC_INNER_PRODUCT)
     assert index.storage.sa_code_size() == 12
     assert index.metric_type == faiss.METRIC_INNER_PRODUCT
     index = faiss.index_factory(12, "HNSW32,PQ4")
     assert index.storage.sa_code_size() == 4
     index = faiss.index_factory(12, "HNSW32,PQ4np")
     indexpq = faiss.downcast_index(index.storage)
     assert not indexpq.do_polysemous_training
Esempio n. 24
0
    def test_equiv_rq(self):
        """
        make sure it is equivalent to search a RQ and to search an IVF
        with RCQ + RQ with the same codebooks.
        """
        ds = datasets.SyntheticDataset(32, 3000, 1000, 50)

        # make a flat RQ
        iflat = faiss.IndexResidualQuantizer(ds.d, 5, 4)
        iflat.rq.train_type = faiss.ResidualQuantizer.Train_default
        iflat.train(ds.get_train())
        iflat.add(ds.get_database())

        # ref search result
        Dref, Iref = iflat.search(ds.get_queries(), 10)

        # get its codebooks + encoded version of the dataset
        codebooks = get_additive_quantizer_codebooks(iflat.rq)
        codes = faiss.vector_to_array(iflat.codes).reshape(-1, iflat.code_size)

        # make an IVF with 2x4 + 3x4 = 5x4 bits
        ivf = faiss.index_factory(ds.d, "IVF256(RCQ2x4),RQ3x4")

        # initialize the codebooks
        rcq = faiss.downcast_index(ivf.quantizer)
        faiss.copy_array_to_vector(
            np.vstack(codebooks[:rcq.rq.M]).ravel(),
            rcq.rq.codebooks
        )
        rcq.rq.is_trained = True
        # translation of AdditiveCoarseQuantizer::train
        rcq.ntotal = 1 << rcq.rq.tot_bits
        rcq.centroid_norms.resize(rcq.ntotal)
        rcq.rq.compute_centroid_norms(rcq.centroid_norms.data())
        rcq.is_trained = True

        faiss.copy_array_to_vector(
            np.vstack(codebooks[rcq.rq.M:]).ravel(),
            ivf.rq.codebooks
        )
        ivf.rq.is_trained = True
        ivf.is_trained = True

        # add the codes (this works because 2x4 is a multiple of 8 bits)
        ivf.add_sa_codes(codes)

        # perform exhaustive search
        ivf.nprobe = ivf.nlist

        Dnew, Inew = ivf.search(ds.get_queries(), 10)

        np.testing.assert_array_equal(Iref, Inew)
        np.testing.assert_array_almost_equal(Dref, Dnew, decimal=5)
Esempio n. 25
0
    def __init__(self, ds, indexfile):
        self.d = ds.d
        self.metric = ds.metric
        self.nq = ds.nq
        self.xq = ds.get_queries()

        # get the xb set
        src_index = faiss.read_index(indexfile)
        src_quant = faiss.downcast_index(src_index.quantizer)
        centroids = faiss.vector_to_array(src_quant.xb)
        self.xb = centroids.reshape(-1, self.d)
        self.nb = self.nt = len(self.xb)
Esempio n. 26
0
    def _flush_to_cpu(self, index, nb, offset):
        print("Flush indexes to CPU")

        for i in range(self.ngpu):
            index_src_gpu = faiss.downcast_index(self.gpu_index if self.ngpu ==
                                                 1 else self.gpu_index.at(i))
            index_src = faiss.index_gpu_to_cpu(index_src_gpu)

            # index_src.copy_subset_to(index, 0, 0, nb)  # original
            index_src.copy_subset_to(index, 0, offset, offset + nb)
            index_src_gpu.reset()
            index_src_gpu.reserveMemory(self.max_add)

        if self.ngpu > 1:
            self.gpu_index.sync_with_shard_indexes()
Esempio n. 27
0
    def __init__(self,
                 invlist_fnames,
                 empty_index_fname,
                 masked_index_fname=None):

        self.indexes = indexes = []
        ilv = faiss.InvertedListsPtrVector()

        for fname in invlist_fnames:
            if os.path.exists(fname):
                print('reading', fname, end='\r', flush=True)
                index = faiss.read_index(fname)
                indexes.append(index)
                il = faiss.extract_index_ivf(index).invlists
            else:
                assert False
            ilv.push_back(il)
        print()

        self.big_il = faiss.VStackInvertedLists(ilv.size(), ilv.data())
        if masked_index_fname:
            self.big_il_base = self.big_il
            print('loading', masked_index_fname)
            self.masked_index = faiss.read_index(
                masked_index_fname,
                faiss.IO_FLAG_MMAP | faiss.IO_FLAG_READ_ONLY)
            self.big_il = faiss.MaskedInvertedLists(
                faiss.extract_index_ivf(self.masked_index).invlists,
                self.big_il_base)

        print('loading empty index', empty_index_fname)
        self.index = faiss.read_index(empty_index_fname)
        ntotal = self.big_il.compute_ntotal()

        print('replace invlists')
        index_ivf = faiss.extract_index_ivf(self.index)
        index_ivf.replace_invlists(self.big_il, False)
        index_ivf.ntotal = self.index.ntotal = ntotal
        index_ivf.parallel_mode = 1  # seems reasonable to do this all the time

        quantizer = faiss.downcast_index(index_ivf.quantizer)
        quantizer.hnsw.efSearch = 1024
Esempio n. 28
0
    def test_rcq_LUT(self):
        ds = datasets.SyntheticDataset(32, 3000, 1000, 100)

        xt = ds.get_train()
        xb = ds.get_database()

        # RQ 2x5 = 10 bits = 1024 centroids
        index = faiss.index_factory(ds.d, "IVF1024(RCQ2x5),SQ8")

        quantizer = faiss.downcast_index(index.quantizer)
        rq = quantizer.rq
        rq.train_type = faiss.ResidualQuantizer.Train_default

        index.train(xt)
        index.add(xb)
        index.nprobe = 10

        # set exact centroids as coarse quantizer
        all_centroids = quantizer.reconstruct_n(0, quantizer.ntotal)
        q2 = faiss.IndexFlatL2(32)
        q2.add(all_centroids)
        index.quantizer = q2
        Dref, Iref = index.search(ds.get_queries(), 10)
        index.quantizer = quantizer

        # search with LUT
        quantizer.set_beam_factor(-1)
        Dnew, Inew = index.search(ds.get_queries(), 10)

        np.testing.assert_array_almost_equal(Dref, Dnew, decimal=5)
        np.testing.assert_array_equal(Iref, Inew)

        # check i/o
        CDref, CIref = quantizer.search(ds.get_queries(), 10)
        quantizer2 = faiss.deserialize_index(faiss.serialize_index(quantizer))
        quantizer2.search(ds.get_queries(), 10)
        CDnew, CInew = quantizer2.search(ds.get_queries(), 10)
        np.testing.assert_array_almost_equal(CDref, CDnew, decimal=5)
        np.testing.assert_array_equal(CIref, CInew)
Esempio n. 29
0
    def subtest_add2col(self, xb, xq, index, qname):
        """Test with 2 additional dimensions to take also the non-SIMD
        codepath. We don't retrain anything but add 2 dims to the
        queries, the centroids and the trained ScalarQuantizer.
        """
        nb, d = xb.shape

        d2 = d + 2
        xb2 = self.add2columns(xb)
        xq2 = self.add2columns(xq)

        nlist = index.nlist
        quantizer = faiss.downcast_index(index.quantizer)
        quantizer2 = faiss.IndexFlat(d2, index.metric_type)
        centroids = faiss.vector_to_array(quantizer.xb).reshape(nlist, d)
        centroids2 = self.add2columns(centroids)
        quantizer2.add(centroids2)
        index2 = faiss.IndexIVFScalarQuantizer(
            quantizer2, d2, index.nlist, index.sq.qtype,
            index.metric_type)
        index2.nprobe = 4
        if qname in ('8bit', '4bit'):
            trained = faiss.vector_to_array(index.sq.trained).reshape(2, -1)
            nt = trained.shape[1]
            # 2 lines: vmins and vdiffs
            new_nt = int(nt * d2 / d)
            trained2 = np.hstack((
                trained,
                np.zeros((2, new_nt - nt), dtype='float32')
            ))
            trained2[1, nt:] = 1.0   # set vdiff to 1 to avoid div by 0
            faiss.copy_array_to_vector(trained2.ravel(), index2.sq.trained)
        else:
            index2.sq.trained = index.sq.trained

        index2.is_trained = True
        index2.add(xb2)
        return index2.search(xq2, 10)
Esempio n. 30
0
                                                          xq.shape, gt.shape)

nq, d = xq.shape
nb, d = xb.shape

######################################################
# Make index
######################################################

if args.indexfile and os.path.exists(args.indexfile):

    print "reading", args.indexfile
    index = faiss.read_index(args.indexfile)

    if isinstance(index, faiss.IndexPreTransform):
        index_ivf = faiss.downcast_index(index.index)
    else:
        index_ivf = index
        assert isinstance(index_ivf, faiss.IndexIVF)
        vec_transform = lambda x: x
    assert isinstance(index_ivf, faiss.IndexIVF)

else:

    print "build index, key=", args.indexkey

    index = faiss.index_factory(d, args.indexkey)

    if isinstance(index, faiss.IndexPreTransform):
        index_ivf = faiss.downcast_index(index.index)
        vec_transform = index.chain.at(0).apply_py
Esempio n. 31
0
 def test_rcq(self):
     index = faiss.index_factory(12, "IVF256(RCQ2x4),RQ3x4")
     self.assertEqual(
         faiss.downcast_index(index.quantizer).__class__,
         faiss.ResidualCoarseQuantizer
     )
Esempio n. 32
0
 def test_ivf_parent(self):
     index = faiss.index_factory(123, "IVF100(LSHr),Flat")
     quantizer = faiss.downcast_index(index.quantizer)
     self.assertEqual(quantizer.__class__, faiss.IndexLSH)