def get_populated_index(preproc): if not index_cachefile or not os.path.exists(index_cachefile): if not altadd: gpu_index, indexall = compute_populated_index(preproc) else: gpu_index, indexall = compute_populated_index_2(preproc) if index_cachefile: print "store", index_cachefile faiss.write_index(indexall, index_cachefile) else: print "load", index_cachefile indexall = faiss.read_index(index_cachefile) gpu_index = None co = faiss.GpuMultipleClonerOptions() co.useFloat16 = use_float16 co.useFloat16CoarseQuantizer = False co.usePrecomputed = use_precomputed_tables co.indicesOptions = 0 co.verbose = True co.shard = True # the replicas will be made "manually" t0 = time.time() print "CPU index contains %d vectors, move to GPU" % indexall.ntotal if replicas == 1: if not gpu_index: print "copying loaded index to GPUs" vres, vdev = make_vres_vdev() index = faiss.index_cpu_to_gpu_multiple( vres, vdev, indexall, co) else: index = gpu_index else: del gpu_index # We override the GPU index print "Copy CPU index to %d sharded GPU indexes" % replicas index = faiss.IndexProxy() for i in range(replicas): gpu0 = ngpu * i / replicas gpu1 = ngpu * (i + 1) / replicas vres, vdev = make_vres_vdev(gpu0, gpu1) print " dispatch to GPUs %d:%d" % (gpu0, gpu1) index1 = faiss.index_cpu_to_gpu_multiple( vres, vdev, indexall, co) index1.this.disown() index.addIndex(index1) index.own_fields = True del indexall print "move to GPU done in %.3f s" % (time.time() - t0) return index
def get_populated_index(preproc): if not index_cachefile or not os.path.exists(index_cachefile): if not altadd: gpu_index, indexall = compute_populated_index(preproc) else: gpu_index, indexall = compute_populated_index_2(preproc) if index_cachefile: print "store", index_cachefile faiss.write_index(indexall, index_cachefile) else: print "load", index_cachefile indexall = faiss.read_index(index_cachefile) gpu_index = None co = faiss.GpuMultipleClonerOptions() co.useFloat16 = use_float16 co.useFloat16CoarseQuantizer = False co.usePrecomputed = use_precomputed_tables co.indicesOptions = 0 co.verbose = 10 co.shard = True # the replicas will be made "manually" t0 = time.time() print "CPU index contains %d vectors, move to GPU" % indexall.ntotal if replicas == 1: if not gpu_index: print "copying loaded index to GPUs" vres, vdev = make_vres_vdev() index = faiss.index_cpu_to_gpu_multiple( vres, vdev, indexall, co) else: index = gpu_index else: del gpu_index # We override the GPU index print "Copy CPU index to %d sharded GPU indexes" % replicas index = faiss.IndexProxy() for i in range(replicas): gpu0 = ngpu * i / replicas gpu1 = ngpu * (i + 1) / replicas vres, vdev = make_vres_vdev(gpu0, gpu1) print " dispatch to GPUs %d:%d" % (gpu0, gpu1) index1 = faiss.index_cpu_to_gpu_multiple( vres, vdev, indexall, co) index1.this.disown() index.addIndex(index1) index.own_fields = True del indexall print "move to GPU done in %.3f s" % (time.time() - t0) return index
def convert_index_to_gpu(index, faiss_gpu_index, useFloat16=False): if type(faiss_gpu_index) == list and len(faiss_gpu_index) == 1: faiss_gpu_index = faiss_gpu_index[0] if isinstance(faiss_gpu_index, int): res = faiss.StandardGpuResources() res.setTempMemory(512 * 1024 * 1024) co = faiss.GpuClonerOptions() co.useFloat16 = useFloat16 index = faiss.index_cpu_to_gpu(res, faiss_gpu_index, index, co) else: global gpu_resources if len(gpu_resources) == 0: import torch for i in range(torch.cuda.device_count()): res = faiss.StandardGpuResources() res.setTempMemory(256 * 1024 * 1024) gpu_resources.append(res) assert isinstance(faiss_gpu_index, list) vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() co = faiss.GpuMultipleClonerOptions() co.shard = True co.useFloat16 = useFloat16 for i in faiss_gpu_index: vdev.push_back(i) vres.push_back(gpu_resources[i]) index = faiss.index_cpu_to_gpu_multiple(vres, vdev, index, co) return index
def get_gpu_index(cpu_index): gpu_resources = [] ngpu = faiss.get_num_gpus() tempmem = -1 for i in range(ngpu): res = faiss.StandardGpuResources() if tempmem >= 0: res.setTempMemory(tempmem) gpu_resources.append(res) def make_vres_vdev(i0=0, i1=-1): " return vectors of device ids and resources useful for gpu_multiple" vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() if i1 == -1: i1 = ngpu for i in range(i0, i1): vdev.push_back(i) vres.push_back(gpu_resources[i]) return vres, vdev co = faiss.GpuMultipleClonerOptions() co.shard = True gpu_vector_resources, gpu_devices_vector = make_vres_vdev(0, ngpu) gpu_index = faiss.index_cpu_to_gpu_multiple(gpu_vector_resources, gpu_devices_vector, cpu_index, co) return gpu_index
def compute_populated_index(preproc): """Add elements to a sharded index. Return the index and if available a sharded gpu_index that contains the same data. """ indexall = prepare_trained_index(preproc) co = faiss.GpuMultipleClonerOptions() co.useFloat16 = use_float16 co.useFloat16CoarseQuantizer = False co.usePrecomputed = use_precomputed_tables co.indicesOptions = faiss.INDICES_CPU co.verbose = True co.reserveVecs = max_add if max_add > 0 else xb.shape[0] co.shard = True assert co.shard_type in (0, 1, 2) vres, vdev = make_vres_vdev() gpu_index = faiss.index_cpu_to_gpu_multiple(vres, vdev, indexall, co) print("add...") t0 = time.time() nb = xb.shape[0] for i0, xs in dataset_iterator(xb, preproc, add_batch_size): i1 = i0 + xs.shape[0] gpu_index.add_with_ids(xs, np.arange(i0, i1)) if max_add > 0 and gpu_index.ntotal > max_add: print("Flush indexes to CPU") for i in range(ngpu): index_src_gpu = faiss.downcast_index(gpu_index.at(i)) index_src = faiss.index_gpu_to_cpu(index_src_gpu) print(" index %d size %d" % (i, index_src.ntotal)) index_src.copy_subset_to(indexall, 0, 0, nb) index_src_gpu.reset() index_src_gpu.reserveMemory(max_add) gpu_index.sync_with_shard_indexes() print('\r%d/%d (%.3f s) ' % (i0, nb, time.time() - t0), end=' ') sys.stdout.flush() print("Add time: %.3f s" % (time.time() - t0)) print("Aggregate indexes to CPU") t0 = time.time() if hasattr(gpu_index, 'at'): # it is a sharded index for i in range(ngpu): index_src = faiss.index_gpu_to_cpu(gpu_index.at(i)) print(" index %d size %d" % (i, index_src.ntotal)) index_src.copy_subset_to(indexall, 0, 0, nb) else: # simple index index_src = faiss.index_gpu_to_cpu(gpu_index) index_src.copy_subset_to(indexall, 0, 0, nb) print(" done in %.3f s" % (time.time() - t0)) if max_add > 0: # it does not contain all the vectors gpu_index = None return gpu_index, indexall
def compute_GT(): print "compute GT" t0 = time.time() gt_I = np.zeros((nq_gt, gt_sl), dtype='int64') gt_D = np.zeros((nq_gt, gt_sl), dtype='float32') heaps = faiss.float_maxheap_array_t() heaps.k = gt_sl heaps.nh = nq_gt heaps.val = faiss.swig_ptr(gt_D) heaps.ids = faiss.swig_ptr(gt_I) heaps.heapify() bs = 10**5 n, d = xb.shape xqs = sanitize(xq[:nq_gt]) db_gt = faiss.IndexFlatL2(d) vres, vdev = make_vres_vdev() db_gt_gpu = faiss.index_cpu_to_gpu_multiple(vres, vdev, db_gt) # compute ground-truth by blocks of bs, and add to heaps for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs): db_gt_gpu.add(xsl) D, I = db_gt_gpu.search(xqs, gt_sl) I += i0 heaps.addn_with_ids(gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl) db_gt_gpu.reset() print "\r %d/%d, %.3f s" % (i0, n, time.time() - t0), print heaps.reorder() print "GT time: %.3f s" % (time.time() - t0) return gt_I
def compute_populated_index(preproc): """Add elements to a sharded index. Return the index and if available a sharded gpu_index that contains the same data. """ indexall = prepare_trained_index(preproc) co = faiss.GpuMultipleClonerOptions() co.useFloat16 = use_float16 co.useFloat16CoarseQuantizer = False co.usePrecomputed = use_precomputed_tables co.indicesOptions = faiss.INDICES_CPU co.verbose = 10 co.reserveVecs = max_add if max_add > 0 else xb.shape[0] co.shard = True vres, vdev = make_vres_vdev() gpu_index = faiss.index_cpu_to_gpu_multiple( vres, vdev, indexall, co) print "add..." t0 = time.time() nb = xb.shape[0] for i0, xs in dataset_iterator(xb, preproc, add_batch_size): i1 = i0 + xs.shape[0] gpu_index.add_with_ids(xs, np.arange(i0, i1)) if max_add > 0 and gpu_index.ntotal > max_add: print "Flush indexes to CPU" for i in range(ngpu): index_src_gpu = faiss.downcast_index(gpu_index.at(i)) index_src = faiss.index_gpu_to_cpu(index_src_gpu) print " index %d size %d" % (i, index_src.ntotal) index_src.copy_subset_to(indexall, 0, 0, nb) index_src_gpu.reset() index_src_gpu.reserveMemory(max_add) gpu_index.sync_with_shard_indexes() print '\r%d/%d (%.3f s) ' % ( i0, nb, time.time() - t0), sys.stdout.flush() print "Add time: %.3f s" % (time.time() - t0) print "Aggregate indexes to CPU" t0 = time.time() for i in range(ngpu): index_src = faiss.index_gpu_to_cpu(gpu_index.at(i)) print " index %d size %d" % (i, index_src.ntotal) index_src.copy_subset_to(indexall, 0, 0, nb) print " done in %.3f s" % (time.time() - t0) if max_add > 0: # it does not contain all the vectors gpu_index = None return gpu_index, indexall
def build_index(self): nt, d = self.xt.shape index = faiss.index_factory(d, "IVF4096,PQ64") self.index = faiss.index_cpu_to_gpu_multiple(self.vres, self.vdev, index, self.co) self.index.train(self.xt) self.index.add(self.xb) self.ps.initialize(self.index) self.ps.set_index_parameter(self.index, 'nprobe', self.nprobe) print("finish building index")
def _to_gpu(self, index): if self.device > -1: self.faiss_res = faiss.StandardGpuResources() return faiss.index_cpu_to_gpu(self.faiss_res, self.device, index) elif self.faiss_gpu_options is not None: return faiss.index_cpu_to_gpu_multiple( self.faiss_gpu_options.resource_vec, self.faiss_gpu_options.device_vec, index, self.faiss_gpu_options.cloner_options, ) else: return index
def moveCPUtoGPU(self): co = faiss.GpuMultipleClonerOptions() co.useFloat16 = self.use_float16 co.useFloat16CoarseQuantizer = False co.usePrecomputed = self.use_precomputed_tables co.indicesOptions = faiss.INDICES_CPU co.verbose = True co.reserveVecs = self.max_add co.shard = True vres, vdev = indexfunctions.make_vres_vdev(self.gpu_resources, ngpu=self.ngpu) self.gpu_index = faiss.index_cpu_to_gpu_multiple( vres, vdev, self.index, co)
def compute_GT_GPU(xb, xq, gt_sl): nq_gt, _ = xq.shape print("compute GT GPU") t0 = time.time() gt_I = np.zeros((nq_gt, gt_sl), dtype='int64') gt_D = np.zeros((nq_gt, gt_sl), dtype='float32') heaps = faiss.float_maxheap_array_t() heaps.k = gt_sl heaps.nh = nq_gt heaps.val = faiss.swig_ptr(gt_D) heaps.ids = faiss.swig_ptr(gt_I) heaps.heapify() bs = 10 ** 5 # Please change this based on your GPU memory size. tempmem = 3500*1024*1024 n, d = xb.shape xqs = sanitize(xq[:nq_gt]) ngpu = faiss.get_num_gpus() gpu_resources = [] for i in range(ngpu): res = faiss.StandardGpuResources() res.setTempMemory(tempmem) gpu_resources.append(res) vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() for i in range(0, ngpu): vdev.push_back(i) vres.push_back(gpu_resources[i]) db_gt = faiss.IndexFlatL2(d) db_gt_gpu = faiss.index_cpu_to_gpu_multiple( vres, vdev, db_gt) # compute ground-truth by blocks of bs, and add to heaps for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs): db_gt_gpu.add(xsl) D, I = db_gt_gpu.search(xqs, gt_sl) I += i0 heaps.addn_with_ids( gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl) db_gt_gpu.reset() heaps.reorder() print("GT GPU time: {} s".format(time.time() - t0)) return gt_I, gt_D
def copyToGpu(index_cpu): co = faiss.GpuMultipleClonerOptions() co.useFloat16 = useFloat16 co.useFloat16CoarseQuantizer = False co.usePrecomputed = usePrecomputed co.indicesOptions = faiss.INDICES_CPU co.verbose = True co.reserveVecs = N co.shard = True assert co.shard_type in (0, 1, 2) vres, vdev = make_vres_vdev() index_gpu = faiss.index_cpu_to_gpu_multiple(vres, vdev, index_cpu, co) return index_gpu
def IVFPQMultiGpu(config): print("IVFPQMultiGpu, ", config) d = config['dimension'] # dimension nb = config['db_size'] # database size nq = config['query_num'] # nb of queries k = config['top_k'] config_gpus = config['gpus'] ngpus = faiss.get_num_gpus() print("number of GPUs:", ngpus, ",running on gpus:", config_gpus) gpus = range(config_gpus) res = [faiss.StandardGpuResources() for _ in gpus] vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() for i, res in zip(gpus, res): vdev.push_back(i) vres.push_back(res) index_list = [] for i in range(config['db_num']): # Using an IVFPQ index np.random.seed(i) xb = np.random.random((nb, d)).astype('float32') xb[:, 0] += np.arange(nb) / 1000. nlist = config['nlist'] m = config['sub_quantizers'] code = config['bits_per_code'] # begin_time = time.time() quantizer = faiss.IndexFlatL2(d) # the other index index_ivfpq = faiss.IndexIVFPQ(quantizer, d, nlist, m, code) # here we specify METRIC_L2, by default it performs inner-product search # build the index gpu_index_ivfpq = faiss.index_cpu_to_gpu_multiple( vres, vdev, index_ivfpq) gpu_index_ivfpq.referenced_objects = res assert not gpu_index_ivfpq.is_trained gpu_index_ivfpq.train(xb) # add vectors to the index assert gpu_index_ivfpq.is_trained gpu_index_ivfpq.add(xb) # add vectors to the index print(i, ",size = ", gpu_index_ivfpq.ntotal) index_list.append(gpu_index_ivfpq) return index_list
def train_coarse_quantizer(x, k, preproc): d = preproc.d_out clus = faiss.Clustering(d, k) clus.verbose = True # clus.niter = 2 clus.max_points_per_centroid = 10000000 print "apply preproc on shape", x.shape, 'k=', k t0 = time.time() x = preproc.apply_py(sanitize(x)) print " preproc %.3f s output shape %s" % (time.time() - t0, x.shape) vres, vdev = make_vres_vdev() index = faiss.index_cpu_to_gpu_multiple(vres, vdev, faiss.IndexFlatL2(d)) clus.train(x, index) centroids = faiss.vector_float_to_array(clus.centroids) return centroids.reshape(k, d)
def compute_populated_index_2(preproc): indexall = prepare_trained_index(preproc) # set up a 3-stage pipeline that does: # - stage 1: load + preproc # - stage 2: assign on GPU # - stage 3: add to index stage1 = dataset_iterator(xb, preproc, add_batch_size) vres, vdev = make_vres_vdev() coarse_quantizer_gpu = faiss.index_cpu_to_gpu_multiple( vres, vdev, indexall.quantizer) def quantize(args): (i0, xs) = args _, assign = coarse_quantizer_gpu.search(xs, 1) return i0, xs, assign.ravel() stage2 = rate_limited_imap(quantize, stage1) print("add...") t0 = time.time() nb = xb.shape[0] for i0, xs, assign in stage2: i1 = i0 + xs.shape[0] if indexall.__class__ == faiss.IndexIVFPQ: indexall.add_core_o(i1 - i0, faiss.swig_ptr(xs), None, None, faiss.swig_ptr(assign)) elif indexall.__class__ == faiss.IndexIVFFlat: indexall.add_core(i1 - i0, faiss.swig_ptr(xs), None, faiss.swig_ptr(assign)) else: assert False print('\r%d/%d (%.3f s) ' % (i0, nb, time.time() - t0), end=' ') sys.stdout.flush() print("Add time: %.3f s" % (time.time() - t0)) return None, indexall
def compute_populated_index_2(preproc): indexall = prepare_trained_index(preproc) # set up a 3-stage pipeline that does: # - stage 1: load + preproc # - stage 2: assign on GPU # - stage 3: add to index stage1 = dataset_iterator(xb, preproc, add_batch_size) vres, vdev = make_vres_vdev() coarse_quantizer_gpu = faiss.index_cpu_to_gpu_multiple( vres, vdev, indexall.quantizer) def quantize((i0, xs)): _, assign = coarse_quantizer_gpu.search(xs, 1) return i0, xs, assign.ravel() stage2 = rate_limited_imap(quantize, stage1) print "add..." t0 = time.time() nb = xb.shape[0] for i0, xs, assign in stage2: i1 = i0 + xs.shape[0] if indexall.__class__ == faiss.IndexIVFPQ: indexall.add_core_o(i1 - i0, faiss.swig_ptr(xs), None, None, faiss.swig_ptr(assign)) elif indexall.__class__ == faiss.IndexIVFFlat: indexall.add_core(i1 - i0, faiss.swig_ptr(xs), None, faiss.swig_ptr(assign)) else: assert False print '\r%d/%d (%.3f s) ' % ( i0, nb, time.time() - t0), sys.stdout.flush() print "Add time: %.3f s" % (time.time() - t0) return None, indexall
def adding_initialize(self, index): """ The index should be owned by caller. """ assert self.ngpu > 0 print_message('Adding initialize...') self.co = faiss.GpuMultipleClonerOptions() self.co.useFloat16 = True self.co.useFloat16CoarseQuantizer = False self.co.usePrecomputed = False self.co.indicesOptions = faiss.INDICES_CPU self.co.verbose = True self.co.reserveVecs = self.max_add self.co.shard = True assert self.co.shard_type in (0, 1, 2) self.vres, self.vdev = self._make_vres_vdev() self.gpu_index = faiss.index_cpu_to_gpu_multiple( self.vres, self.vdev, index, self.co)
def __loadIndex(self): assert self.dbs != [], "You should load db before load index, use self.loadDB() ..." d = self.dbs[0].shape[-1] ngpu = faiss.get_num_gpus() index = faiss.IndexFlatL2(d) vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() gpu_resources = [] for i in range(0, ngpu): res = faiss.StandardGpuResources() gpu_resources.append(res) vdev.push_back(i) vres.push_back(res) co = faiss.GpuMultipleClonerOptions() co.shard = True self.gpu_index = faiss.index_cpu_to_gpu_multiple(vres, vdev, index, co) self.gpu_index.referenced_objects = gpu_resources self.gpu_index.add(self.dbs)
def train_coarse_quantizer(x, k, preproc): d = preproc.d_out clus = faiss.Clustering(d, k) clus.verbose = True # clus.niter = 2 clus.max_points_per_centroid = 10000000 print "apply preproc on shape", x.shape, 'k=', k t0 = time.time() x = preproc.apply_py(sanitize(x)) print " preproc %.3f s output shape %s" % ( time.time() - t0, x.shape) vres, vdev = make_vres_vdev() index = faiss.index_cpu_to_gpu_multiple( vres, vdev, faiss.IndexFlatL2(d)) clus.train(x, index) centroids = faiss.vector_float_to_array(clus.centroids) return centroids.reshape(k, d)
def load_index(passage_embeddings, index_path, faiss_gpu_index, use_gpu): dim = passage_embeddings.shape[1] if index_path is None: index = faiss.index_factory(dim, "Flat", faiss.METRIC_INNER_PRODUCT) index.add(passage_embeddings) else: index = faiss.read_index(index_path) if faiss_gpu_index and use_gpu: if len(faiss_gpu_index) == 1: res = faiss.StandardGpuResources() res.setTempMemory(1024 * 1024 * 1024) co = faiss.GpuClonerOptions() if index_path: co.useFloat16 = True else: co.useFloat16 = False index = faiss.index_cpu_to_gpu(res, faiss_gpu_index, index, co) else: assert not index_path # Only need one GPU for compressed index global gpu_resources import torch for i in range(torch.cuda.device_count()): res = faiss.StandardGpuResources() res.setTempMemory(128 * 1024 * 1024) gpu_resources.append(res) assert isinstance(faiss_gpu_index, list) vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() co = faiss.GpuMultipleClonerOptions() co.shard = True for i in faiss_gpu_index: vdev.push_back(i) vres.push_back(gpu_resources[i]) index = faiss.index_cpu_to_gpu_multiple(vres, vdev, index, co) return index
def compute_GT(): print "compute GT" t0 = time.time() gt_I = np.zeros((nq_gt, gt_sl), dtype='int64') gt_D = np.zeros((nq_gt, gt_sl), dtype='float32') heaps = faiss.float_maxheap_array_t() heaps.k = gt_sl heaps.nh = nq_gt heaps.val = faiss.swig_ptr(gt_D) heaps.ids = faiss.swig_ptr(gt_I) heaps.heapify() bs = 10 ** 5 n, d = xb.shape xqs = sanitize(xq[:nq_gt]) db_gt = faiss.IndexFlatL2(d) vres, vdev = make_vres_vdev() db_gt_gpu = faiss.index_cpu_to_gpu_multiple( vres, vdev, db_gt) # compute ground-truth by blocks of bs, and add to heaps for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs): db_gt_gpu.add(xsl) D, I = db_gt_gpu.search(xqs, gt_sl) I += i0 heaps.addn_with_ids( gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl) db_gt_gpu.reset() print "\r %d/%d, %.3f s" % (i0, n, time.time() - t0), print heaps.reorder() print "GT time: %.3f s" % (time.time() - t0) return gt_I