Ejemplo n.º 1
0
def get_populated_index(preproc):

    if not index_cachefile or not os.path.exists(index_cachefile):
        if not altadd:
            gpu_index, indexall = compute_populated_index(preproc)
        else:
            gpu_index, indexall = compute_populated_index_2(preproc)
        if index_cachefile:
            print "store", index_cachefile
            faiss.write_index(indexall, index_cachefile)
    else:
        print "load", index_cachefile
        indexall = faiss.read_index(index_cachefile)
        gpu_index = None

    co = faiss.GpuMultipleClonerOptions()
    co.useFloat16 = use_float16
    co.useFloat16CoarseQuantizer = False
    co.usePrecomputed = use_precomputed_tables
    co.indicesOptions = 0
    co.verbose = True
    co.shard = True    # the replicas will be made "manually"
    t0 = time.time()
    print "CPU index contains %d vectors, move to GPU" % indexall.ntotal
    if replicas == 1:

        if not gpu_index:
            print "copying loaded index to GPUs"
            vres, vdev = make_vres_vdev()
            index = faiss.index_cpu_to_gpu_multiple(
                vres, vdev, indexall, co)
        else:
            index = gpu_index

    else:
        del gpu_index # We override the GPU index

        print "Copy CPU index to %d sharded GPU indexes" % replicas

        index = faiss.IndexProxy()

        for i in range(replicas):
            gpu0 = ngpu * i / replicas
            gpu1 = ngpu * (i + 1) / replicas
            vres, vdev = make_vres_vdev(gpu0, gpu1)

            print "   dispatch to GPUs %d:%d" % (gpu0, gpu1)

            index1 = faiss.index_cpu_to_gpu_multiple(
                vres, vdev, indexall, co)
            index1.this.disown()
            index.addIndex(index1)
        index.own_fields = True
    del indexall
    print "move to GPU done in %.3f s" % (time.time() - t0)
    return index
Ejemplo n.º 2
0
def get_populated_index(preproc):

    if not index_cachefile or not os.path.exists(index_cachefile):
        if not altadd:
            gpu_index, indexall = compute_populated_index(preproc)
        else:
            gpu_index, indexall = compute_populated_index_2(preproc)
        if index_cachefile:
            print "store", index_cachefile
            faiss.write_index(indexall, index_cachefile)
    else:
        print "load", index_cachefile
        indexall = faiss.read_index(index_cachefile)
        gpu_index = None

    co = faiss.GpuMultipleClonerOptions()
    co.useFloat16 = use_float16
    co.useFloat16CoarseQuantizer = False
    co.usePrecomputed = use_precomputed_tables
    co.indicesOptions = 0
    co.verbose = 10
    co.shard = True # the replicas will be made "manually"
    t0 = time.time()
    print "CPU index contains %d vectors, move to GPU" % indexall.ntotal
    if replicas == 1:

        if not gpu_index:
            print "copying loaded index to GPUs"
            vres, vdev = make_vres_vdev()
            index = faiss.index_cpu_to_gpu_multiple(
                vres, vdev, indexall, co)
        else:
            index = gpu_index

    else:
        del gpu_index # We override the GPU index

        print "Copy CPU index to %d sharded GPU indexes" % replicas

        index = faiss.IndexProxy()

        for i in range(replicas):
            gpu0 = ngpu * i / replicas
            gpu1 = ngpu * (i + 1) / replicas
            vres, vdev = make_vres_vdev(gpu0, gpu1)

            print "   dispatch to GPUs %d:%d" % (gpu0, gpu1)

            index1 = faiss.index_cpu_to_gpu_multiple(
                vres, vdev, indexall, co)
            index1.this.disown()
            index.addIndex(index1)
        index.own_fields = True
    del indexall
    print "move to GPU done in %.3f s" % (time.time() - t0)
    return index
Ejemplo n.º 3
0
def convert_index_to_gpu(index, faiss_gpu_index, useFloat16=False):
    if type(faiss_gpu_index) == list and len(faiss_gpu_index) == 1:
        faiss_gpu_index = faiss_gpu_index[0]
    if isinstance(faiss_gpu_index, int):
        res = faiss.StandardGpuResources()
        res.setTempMemory(512 * 1024 * 1024)
        co = faiss.GpuClonerOptions()
        co.useFloat16 = useFloat16
        index = faiss.index_cpu_to_gpu(res, faiss_gpu_index, index, co)
    else:
        global gpu_resources
        if len(gpu_resources) == 0:
            import torch
            for i in range(torch.cuda.device_count()):
                res = faiss.StandardGpuResources()
                res.setTempMemory(256 * 1024 * 1024)
                gpu_resources.append(res)

        assert isinstance(faiss_gpu_index, list)
        vres = faiss.GpuResourcesVector()
        vdev = faiss.IntVector()
        co = faiss.GpuMultipleClonerOptions()
        co.shard = True
        co.useFloat16 = useFloat16
        for i in faiss_gpu_index:
            vdev.push_back(i)
            vres.push_back(gpu_resources[i])
        index = faiss.index_cpu_to_gpu_multiple(vres, vdev, index, co)

    return index
Ejemplo n.º 4
0
def get_gpu_index(cpu_index):
    gpu_resources = []
    ngpu = faiss.get_num_gpus()
    tempmem = -1
    for i in range(ngpu):
        res = faiss.StandardGpuResources()
        if tempmem >= 0:
            res.setTempMemory(tempmem)
        gpu_resources.append(res)

    def make_vres_vdev(i0=0, i1=-1):
        " return vectors of device ids and resources useful for gpu_multiple"
        vres = faiss.GpuResourcesVector()
        vdev = faiss.IntVector()
        if i1 == -1:
            i1 = ngpu
        for i in range(i0, i1):
            vdev.push_back(i)
            vres.push_back(gpu_resources[i])
        return vres, vdev

    co = faiss.GpuMultipleClonerOptions()
    co.shard = True
    gpu_vector_resources, gpu_devices_vector = make_vres_vdev(0, ngpu)
    gpu_index = faiss.index_cpu_to_gpu_multiple(gpu_vector_resources,
                                                gpu_devices_vector, cpu_index,
                                                co)
    return gpu_index
Ejemplo n.º 5
0
def compute_populated_index(preproc):
    """Add elements to a sharded index. Return the index and if available
    a sharded gpu_index that contains the same data. """

    indexall = prepare_trained_index(preproc)

    co = faiss.GpuMultipleClonerOptions()
    co.useFloat16 = use_float16
    co.useFloat16CoarseQuantizer = False
    co.usePrecomputed = use_precomputed_tables
    co.indicesOptions = faiss.INDICES_CPU
    co.verbose = True
    co.reserveVecs = max_add if max_add > 0 else xb.shape[0]
    co.shard = True
    assert co.shard_type in (0, 1, 2)
    vres, vdev = make_vres_vdev()
    gpu_index = faiss.index_cpu_to_gpu_multiple(vres, vdev, indexall, co)

    print("add...")
    t0 = time.time()
    nb = xb.shape[0]
    for i0, xs in dataset_iterator(xb, preproc, add_batch_size):
        i1 = i0 + xs.shape[0]
        gpu_index.add_with_ids(xs, np.arange(i0, i1))
        if max_add > 0 and gpu_index.ntotal > max_add:
            print("Flush indexes to CPU")
            for i in range(ngpu):
                index_src_gpu = faiss.downcast_index(gpu_index.at(i))
                index_src = faiss.index_gpu_to_cpu(index_src_gpu)
                print("  index %d size %d" % (i, index_src.ntotal))
                index_src.copy_subset_to(indexall, 0, 0, nb)
                index_src_gpu.reset()
                index_src_gpu.reserveMemory(max_add)
            gpu_index.sync_with_shard_indexes()

        print('\r%d/%d (%.3f s)  ' % (i0, nb, time.time() - t0), end=' ')
        sys.stdout.flush()
    print("Add time: %.3f s" % (time.time() - t0))

    print("Aggregate indexes to CPU")
    t0 = time.time()

    if hasattr(gpu_index, 'at'):
        # it is a sharded index
        for i in range(ngpu):
            index_src = faiss.index_gpu_to_cpu(gpu_index.at(i))
            print("  index %d size %d" % (i, index_src.ntotal))
            index_src.copy_subset_to(indexall, 0, 0, nb)
    else:
        # simple index
        index_src = faiss.index_gpu_to_cpu(gpu_index)
        index_src.copy_subset_to(indexall, 0, 0, nb)

    print("  done in %.3f s" % (time.time() - t0))

    if max_add > 0:
        # it does not contain all the vectors
        gpu_index = None

    return gpu_index, indexall
Ejemplo n.º 6
0
def compute_GT():
    print "compute GT"
    t0 = time.time()

    gt_I = np.zeros((nq_gt, gt_sl), dtype='int64')
    gt_D = np.zeros((nq_gt, gt_sl), dtype='float32')
    heaps = faiss.float_maxheap_array_t()
    heaps.k = gt_sl
    heaps.nh = nq_gt
    heaps.val = faiss.swig_ptr(gt_D)
    heaps.ids = faiss.swig_ptr(gt_I)
    heaps.heapify()
    bs = 10**5

    n, d = xb.shape
    xqs = sanitize(xq[:nq_gt])

    db_gt = faiss.IndexFlatL2(d)
    vres, vdev = make_vres_vdev()
    db_gt_gpu = faiss.index_cpu_to_gpu_multiple(vres, vdev, db_gt)

    # compute ground-truth by blocks of bs, and add to heaps
    for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs):
        db_gt_gpu.add(xsl)
        D, I = db_gt_gpu.search(xqs, gt_sl)
        I += i0
        heaps.addn_with_ids(gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl)
        db_gt_gpu.reset()
        print "\r   %d/%d, %.3f s" % (i0, n, time.time() - t0),
    print
    heaps.reorder()

    print "GT time: %.3f s" % (time.time() - t0)
    return gt_I
Ejemplo n.º 7
0
def compute_populated_index(preproc):
    """Add elements to a sharded index. Return the index and if available
    a sharded gpu_index that contains the same data. """

    indexall = prepare_trained_index(preproc)

    co = faiss.GpuMultipleClonerOptions()
    co.useFloat16 = use_float16
    co.useFloat16CoarseQuantizer = False
    co.usePrecomputed = use_precomputed_tables
    co.indicesOptions = faiss.INDICES_CPU
    co.verbose = 10
    co.reserveVecs = max_add if max_add > 0 else xb.shape[0]
    co.shard = True

    vres, vdev = make_vres_vdev()
    gpu_index = faiss.index_cpu_to_gpu_multiple(
        vres, vdev, indexall, co)

    print "add..."
    t0 = time.time()
    nb = xb.shape[0]
    for i0, xs in dataset_iterator(xb, preproc, add_batch_size):
        i1 = i0 + xs.shape[0]
        gpu_index.add_with_ids(xs, np.arange(i0, i1))
        if max_add > 0 and gpu_index.ntotal > max_add:
            print "Flush indexes to CPU"
            for i in range(ngpu):
                index_src_gpu = faiss.downcast_index(gpu_index.at(i))
                index_src = faiss.index_gpu_to_cpu(index_src_gpu)
                print "  index %d size %d" % (i, index_src.ntotal)
                index_src.copy_subset_to(indexall, 0, 0, nb)
                index_src_gpu.reset()
                index_src_gpu.reserveMemory(max_add)
            gpu_index.sync_with_shard_indexes()

        print '\r%d/%d (%.3f s)  ' % (
            i0, nb, time.time() - t0),
        sys.stdout.flush()
    print "Add time: %.3f s" % (time.time() - t0)

    print "Aggregate indexes to CPU"
    t0 = time.time()

    for i in range(ngpu):
        index_src = faiss.index_gpu_to_cpu(gpu_index.at(i))
        print "  index %d size %d" % (i, index_src.ntotal)
        index_src.copy_subset_to(indexall, 0, 0, nb)

    print "  done in %.3f s" % (time.time() - t0)

    if max_add > 0:
        # it does not contain all the vectors
        gpu_index = None

    return gpu_index, indexall
Ejemplo n.º 8
0
    def build_index(self):
        nt, d = self.xt.shape
        index = faiss.index_factory(d, "IVF4096,PQ64")
        self.index = faiss.index_cpu_to_gpu_multiple(self.vres, self.vdev,
                                                     index, self.co)

        self.index.train(self.xt)
        self.index.add(self.xb)

        self.ps.initialize(self.index)
        self.ps.set_index_parameter(self.index, 'nprobe', self.nprobe)

        print("finish building index")
Ejemplo n.º 9
0
 def _to_gpu(self, index):
     if self.device > -1:
         self.faiss_res = faiss.StandardGpuResources()
         return faiss.index_cpu_to_gpu(self.faiss_res, self.device, index)
     elif self.faiss_gpu_options is not None:
         return faiss.index_cpu_to_gpu_multiple(
             self.faiss_gpu_options.resource_vec,
             self.faiss_gpu_options.device_vec,
             index,
             self.faiss_gpu_options.cloner_options,
         )
     else:
         return index
 def moveCPUtoGPU(self):
     co = faiss.GpuMultipleClonerOptions()
     co.useFloat16 = self.use_float16
     co.useFloat16CoarseQuantizer = False
     co.usePrecomputed = self.use_precomputed_tables
     co.indicesOptions = faiss.INDICES_CPU
     co.verbose = True
     co.reserveVecs = self.max_add
     co.shard = True
     vres, vdev = indexfunctions.make_vres_vdev(self.gpu_resources,
                                                ngpu=self.ngpu)
     self.gpu_index = faiss.index_cpu_to_gpu_multiple(
         vres, vdev, self.index, co)
Ejemplo n.º 11
0
def compute_GT_GPU(xb, xq, gt_sl):
    nq_gt, _ = xq.shape
    print("compute GT GPU")
    t0 = time.time()

    gt_I = np.zeros((nq_gt, gt_sl), dtype='int64')
    gt_D = np.zeros((nq_gt, gt_sl), dtype='float32')
    heaps = faiss.float_maxheap_array_t()
    heaps.k = gt_sl
    heaps.nh = nq_gt
    heaps.val = faiss.swig_ptr(gt_D)
    heaps.ids = faiss.swig_ptr(gt_I)
    heaps.heapify()
    bs = 10 ** 5
    # Please change this based on your GPU memory size.
    tempmem = 3500*1024*1024

    n, d = xb.shape
    xqs = sanitize(xq[:nq_gt])
 
    ngpu = faiss.get_num_gpus()
    gpu_resources = []

    for i in range(ngpu):
        res = faiss.StandardGpuResources()
        res.setTempMemory(tempmem)
        gpu_resources.append(res)

    vres = faiss.GpuResourcesVector()
    vdev = faiss.IntVector()
    for i in range(0, ngpu):
        vdev.push_back(i)
        vres.push_back(gpu_resources[i])

    db_gt = faiss.IndexFlatL2(d)
    db_gt_gpu = faiss.index_cpu_to_gpu_multiple(
        vres, vdev, db_gt)

    # compute ground-truth by blocks of bs, and add to heaps
    for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs):
        db_gt_gpu.add(xsl)
        D, I = db_gt_gpu.search(xqs, gt_sl)
        I += i0
        heaps.addn_with_ids(
            gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl)
        db_gt_gpu.reset()
    heaps.reorder()

    print("GT GPU time: {} s".format(time.time() - t0))
    return gt_I, gt_D
Ejemplo n.º 12
0
def copyToGpu(index_cpu):

    co = faiss.GpuMultipleClonerOptions()
    co.useFloat16 = useFloat16
    co.useFloat16CoarseQuantizer = False
    co.usePrecomputed = usePrecomputed
    co.indicesOptions = faiss.INDICES_CPU
    co.verbose = True
    co.reserveVecs = N
    co.shard = True
    assert co.shard_type in (0, 1, 2)
    vres, vdev = make_vres_vdev()
    index_gpu = faiss.index_cpu_to_gpu_multiple(vres, vdev, index_cpu, co)

    return index_gpu
Ejemplo n.º 13
0
def IVFPQMultiGpu(config):
    print("IVFPQMultiGpu, ", config)
    d = config['dimension']  # dimension
    nb = config['db_size']  # database size
    nq = config['query_num']  # nb of queries
    k = config['top_k']
    config_gpus = config['gpus']

    ngpus = faiss.get_num_gpus()
    print("number of GPUs:", ngpus, ",running on gpus:", config_gpus)
    gpus = range(config_gpus)
    res = [faiss.StandardGpuResources() for _ in gpus]
    vres = faiss.GpuResourcesVector()
    vdev = faiss.IntVector()
    for i, res in zip(gpus, res):
        vdev.push_back(i)
        vres.push_back(res)

    index_list = []

    for i in range(config['db_num']):
        # Using an IVFPQ index
        np.random.seed(i)
        xb = np.random.random((nb, d)).astype('float32')
        xb[:, 0] += np.arange(nb) / 1000.
        nlist = config['nlist']
        m = config['sub_quantizers']
        code = config['bits_per_code']
        # begin_time = time.time()
        quantizer = faiss.IndexFlatL2(d)  # the other index
        index_ivfpq = faiss.IndexIVFPQ(quantizer, d, nlist, m, code)
        # here we specify METRIC_L2, by default it performs inner-product search

        # build the index
        gpu_index_ivfpq = faiss.index_cpu_to_gpu_multiple(
            vres, vdev, index_ivfpq)
        gpu_index_ivfpq.referenced_objects = res

        assert not gpu_index_ivfpq.is_trained
        gpu_index_ivfpq.train(xb)  # add vectors to the index
        assert gpu_index_ivfpq.is_trained

        gpu_index_ivfpq.add(xb)  # add vectors to the index
        print(i, ",size = ", gpu_index_ivfpq.ntotal)
        index_list.append(gpu_index_ivfpq)
    return index_list
Ejemplo n.º 14
0
def train_coarse_quantizer(x, k, preproc):
    d = preproc.d_out
    clus = faiss.Clustering(d, k)
    clus.verbose = True
    # clus.niter = 2
    clus.max_points_per_centroid = 10000000

    print "apply preproc on shape", x.shape, 'k=', k
    t0 = time.time()
    x = preproc.apply_py(sanitize(x))
    print "   preproc %.3f s output shape %s" % (time.time() - t0, x.shape)

    vres, vdev = make_vres_vdev()
    index = faiss.index_cpu_to_gpu_multiple(vres, vdev, faiss.IndexFlatL2(d))

    clus.train(x, index)
    centroids = faiss.vector_float_to_array(clus.centroids)

    return centroids.reshape(k, d)
Ejemplo n.º 15
0
def compute_populated_index_2(preproc):

    indexall = prepare_trained_index(preproc)

    # set up a 3-stage pipeline that does:
    # - stage 1: load + preproc
    # - stage 2: assign on GPU
    # - stage 3: add to index

    stage1 = dataset_iterator(xb, preproc, add_batch_size)

    vres, vdev = make_vres_vdev()
    coarse_quantizer_gpu = faiss.index_cpu_to_gpu_multiple(
        vres, vdev, indexall.quantizer)

    def quantize(args):
        (i0, xs) = args
        _, assign = coarse_quantizer_gpu.search(xs, 1)
        return i0, xs, assign.ravel()

    stage2 = rate_limited_imap(quantize, stage1)

    print("add...")
    t0 = time.time()
    nb = xb.shape[0]

    for i0, xs, assign in stage2:
        i1 = i0 + xs.shape[0]
        if indexall.__class__ == faiss.IndexIVFPQ:
            indexall.add_core_o(i1 - i0, faiss.swig_ptr(xs), None, None,
                                faiss.swig_ptr(assign))
        elif indexall.__class__ == faiss.IndexIVFFlat:
            indexall.add_core(i1 - i0, faiss.swig_ptr(xs), None,
                              faiss.swig_ptr(assign))
        else:
            assert False

        print('\r%d/%d (%.3f s)  ' % (i0, nb, time.time() - t0), end=' ')
        sys.stdout.flush()
    print("Add time: %.3f s" % (time.time() - t0))

    return None, indexall
Ejemplo n.º 16
0
def compute_populated_index_2(preproc):

    indexall = prepare_trained_index(preproc)

    # set up a 3-stage pipeline that does:
    # - stage 1: load + preproc
    # - stage 2: assign on GPU
    # - stage 3: add to index

    stage1 = dataset_iterator(xb, preproc, add_batch_size)

    vres, vdev = make_vres_vdev()
    coarse_quantizer_gpu = faiss.index_cpu_to_gpu_multiple(
        vres, vdev, indexall.quantizer)

    def quantize((i0, xs)):
        _, assign = coarse_quantizer_gpu.search(xs, 1)
        return i0, xs, assign.ravel()

    stage2 = rate_limited_imap(quantize, stage1)

    print "add..."
    t0 = time.time()
    nb = xb.shape[0]

    for i0, xs, assign in stage2:
        i1 = i0 + xs.shape[0]
        if indexall.__class__ == faiss.IndexIVFPQ:
            indexall.add_core_o(i1 - i0, faiss.swig_ptr(xs),
                                None, None, faiss.swig_ptr(assign))
        elif indexall.__class__ == faiss.IndexIVFFlat:
            indexall.add_core(i1 - i0, faiss.swig_ptr(xs), None,
                              faiss.swig_ptr(assign))
        else:
            assert False

        print '\r%d/%d (%.3f s)  ' % (
            i0, nb, time.time() - t0),
        sys.stdout.flush()
    print "Add time: %.3f s" % (time.time() - t0)

    return None, indexall
Ejemplo n.º 17
0
    def adding_initialize(self, index):
        """
        The index should be owned by caller.
        """

        assert self.ngpu > 0
        print_message('Adding initialize...')
        self.co = faiss.GpuMultipleClonerOptions()
        self.co.useFloat16 = True
        self.co.useFloat16CoarseQuantizer = False
        self.co.usePrecomputed = False
        self.co.indicesOptions = faiss.INDICES_CPU
        self.co.verbose = True
        self.co.reserveVecs = self.max_add
        self.co.shard = True
        assert self.co.shard_type in (0, 1, 2)

        self.vres, self.vdev = self._make_vres_vdev()
        self.gpu_index = faiss.index_cpu_to_gpu_multiple(
            self.vres, self.vdev, index, self.co)
Ejemplo n.º 18
0
    def __loadIndex(self):
        assert self.dbs != [], "You should load db before load index, use self.loadDB() ..."
        d = self.dbs[0].shape[-1]
        ngpu = faiss.get_num_gpus()
        index = faiss.IndexFlatL2(d)
        vres = faiss.GpuResourcesVector()
        vdev = faiss.IntVector()
        gpu_resources = []

        for i in range(0, ngpu):
            res = faiss.StandardGpuResources()
            gpu_resources.append(res)
            vdev.push_back(i)
            vres.push_back(res)

        co = faiss.GpuMultipleClonerOptions()
        co.shard = True
        self.gpu_index = faiss.index_cpu_to_gpu_multiple(vres, vdev, index, co)
        self.gpu_index.referenced_objects = gpu_resources
        self.gpu_index.add(self.dbs)
Ejemplo n.º 19
0
def train_coarse_quantizer(x, k, preproc):
    d = preproc.d_out
    clus = faiss.Clustering(d, k)
    clus.verbose = True
    # clus.niter = 2
    clus.max_points_per_centroid = 10000000

    print "apply preproc on shape", x.shape, 'k=', k
    t0 = time.time()
    x = preproc.apply_py(sanitize(x))
    print "   preproc %.3f s output shape %s" % (
        time.time() - t0, x.shape)

    vres, vdev = make_vres_vdev()
    index = faiss.index_cpu_to_gpu_multiple(
        vres, vdev, faiss.IndexFlatL2(d))

    clus.train(x, index)
    centroids = faiss.vector_float_to_array(clus.centroids)

    return centroids.reshape(k, d)
Ejemplo n.º 20
0
def load_index(passage_embeddings, index_path, faiss_gpu_index, use_gpu):
    dim = passage_embeddings.shape[1]
    if index_path is None:
        index = faiss.index_factory(dim, "Flat", faiss.METRIC_INNER_PRODUCT)
        index.add(passage_embeddings)
    else:
        index = faiss.read_index(index_path)
    if faiss_gpu_index and use_gpu:
        if len(faiss_gpu_index) == 1:
            res = faiss.StandardGpuResources()
            res.setTempMemory(1024 * 1024 * 1024)
            co = faiss.GpuClonerOptions()
            if index_path:
                co.useFloat16 = True
            else:
                co.useFloat16 = False
            index = faiss.index_cpu_to_gpu(res, faiss_gpu_index, index, co)
        else:
            assert not index_path  # Only need one GPU for compressed index
            global gpu_resources
            import torch
            for i in range(torch.cuda.device_count()):
                res = faiss.StandardGpuResources()
                res.setTempMemory(128 * 1024 * 1024)
                gpu_resources.append(res)

            assert isinstance(faiss_gpu_index, list)
            vres = faiss.GpuResourcesVector()
            vdev = faiss.IntVector()
            co = faiss.GpuMultipleClonerOptions()
            co.shard = True
            for i in faiss_gpu_index:
                vdev.push_back(i)
                vres.push_back(gpu_resources[i])
            index = faiss.index_cpu_to_gpu_multiple(vres, vdev, index, co)

    return index
Ejemplo n.º 21
0
def compute_GT():
    print "compute GT"
    t0 = time.time()

    gt_I = np.zeros((nq_gt, gt_sl), dtype='int64')
    gt_D = np.zeros((nq_gt, gt_sl), dtype='float32')
    heaps = faiss.float_maxheap_array_t()
    heaps.k = gt_sl
    heaps.nh = nq_gt
    heaps.val = faiss.swig_ptr(gt_D)
    heaps.ids = faiss.swig_ptr(gt_I)
    heaps.heapify()
    bs = 10 ** 5

    n, d = xb.shape
    xqs = sanitize(xq[:nq_gt])

    db_gt = faiss.IndexFlatL2(d)
    vres, vdev = make_vres_vdev()
    db_gt_gpu = faiss.index_cpu_to_gpu_multiple(
        vres, vdev, db_gt)

    # compute ground-truth by blocks of bs, and add to heaps
    for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs):
        db_gt_gpu.add(xsl)
        D, I = db_gt_gpu.search(xqs, gt_sl)
        I += i0
        heaps.addn_with_ids(
            gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl)
        db_gt_gpu.reset()
        print "\r   %d/%d, %.3f s" % (i0, n, time.time() - t0),
    print
    heaps.reorder()

    print "GT time: %.3f s" % (time.time() - t0)
    return gt_I