def convert_index_to_gpu(index, faiss_gpu_index, useFloat16=False): if type(faiss_gpu_index) == list and len(faiss_gpu_index) == 1: faiss_gpu_index = faiss_gpu_index[0] if isinstance(faiss_gpu_index, int): res = faiss.StandardGpuResources() res.setTempMemory(512 * 1024 * 1024) co = faiss.GpuClonerOptions() co.useFloat16 = useFloat16 index = faiss.index_cpu_to_gpu(res, faiss_gpu_index, index, co) else: global gpu_resources if len(gpu_resources) == 0: import torch for i in range(torch.cuda.device_count()): res = faiss.StandardGpuResources() res.setTempMemory(256 * 1024 * 1024) gpu_resources.append(res) assert isinstance(faiss_gpu_index, list) vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() co = faiss.GpuMultipleClonerOptions() co.shard = True co.useFloat16 = useFloat16 for i in faiss_gpu_index: vdev.push_back(i) vres.push_back(gpu_resources[i]) index = faiss.index_cpu_to_gpu_multiple(vres, vdev, index, co) return index
def __init__(self, xt_path="/home/wenqingfu/sift1b/bigann_learn.bvecs", xb_path="/home/wenqingfu/sift1b/bigann_base.bvecs", ngpu=3): self.xt = self.mmap_bvecs(xt_path) self.xb = self.mmap_bvecs(xb_path) self.xt = self.sanitize(self.xt[:1000000]) self.xb = self.sanitize(self.xb[self.db_start * 1000 * 1000:self.db_end * 1000 * 1000]) self.gpu_resources = [] for i in range(0, ngpu): res = faiss.StandardGpuResources() if tempmem >= 0: res.setTempMemory(tempmem) print("set tempemm to %d" % tempmem) self.gpu_resources.append(res) self.vres = faiss.GpuResourcesVector() self.vdev = faiss.IntVector() for i in range(0, ngpu): self.vdev.push_back(i) self.vres.push_back(self.gpu_resources[i]) self.co = faiss.GpuMultipleClonerOptions() self.co.useFloat16 = True self.co.useFloat16CoarseQuantizer = False self.co.usePrecomputed = False self.co.indicesOptions = 0 self.co.verbose = True self.co.shard = True self.ps = faiss.GpuParameterSpace()
def make_vres_vdev(i0=0, i1=-1): " return vectors of device ids and resources useful for gpu_multiple" vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() if i1 == -1: i1 = ngpu for i in range(i0, i1): vdev.push_back(i) vres.push_back(gpu_resources[i]) return vres, vdev
def make_vres_vdev(i0=0, i1=-1): " return vectors of device ids and resources useful for gpu_multiple" vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() if i1 == -1: i1 = ngpu # for i in range(i0, i1): #vdev.push_back(i) #vres.push_back(gpu_resources[i]) # WENQI: Start from assigned GPU for i in range(i0, i1): vdev.push_back(i + startgpu) vres.push_back(gpu_resources[i]) return vres, vdev
def compute_GT_GPU(xb, xq, gt_sl): nq_gt, _ = xq.shape print("compute GT GPU") t0 = time.time() gt_I = np.zeros((nq_gt, gt_sl), dtype='int64') gt_D = np.zeros((nq_gt, gt_sl), dtype='float32') heaps = faiss.float_maxheap_array_t() heaps.k = gt_sl heaps.nh = nq_gt heaps.val = faiss.swig_ptr(gt_D) heaps.ids = faiss.swig_ptr(gt_I) heaps.heapify() bs = 10 ** 5 # Please change this based on your GPU memory size. tempmem = 3500*1024*1024 n, d = xb.shape xqs = sanitize(xq[:nq_gt]) ngpu = faiss.get_num_gpus() gpu_resources = [] for i in range(ngpu): res = faiss.StandardGpuResources() res.setTempMemory(tempmem) gpu_resources.append(res) vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() for i in range(0, ngpu): vdev.push_back(i) vres.push_back(gpu_resources[i]) db_gt = faiss.IndexFlatL2(d) db_gt_gpu = faiss.index_cpu_to_gpu_multiple( vres, vdev, db_gt) # compute ground-truth by blocks of bs, and add to heaps for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs): db_gt_gpu.add(xsl) D, I = db_gt_gpu.search(xqs, gt_sl) I += i0 heaps.addn_with_ids( gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl) db_gt_gpu.reset() heaps.reorder() print("GT GPU time: {} s".format(time.time() - t0)) return gt_I, gt_D
def _make_vres_vdev(self): """ return vectors of device ids and resources useful for gpu_multiple """ assert self.ngpu > 0 vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() for i in range(self.ngpu): vdev.push_back(i) vres.push_back(self.gpu_resources[i]) return vres, vdev
def make_vres_vdev( gpu_resources, i0=0, i1=-1, ngpu=0, ): " return vectors of device ids and resources useful for gpu_multiple" vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() if i1 == -1: i1 = ngpu for i in range(int(i0), int(i1)): print("i0: " + str(i0) + "i1: " + str(i1)) vdev.push_back(i) vres.push_back(gpu_resources[i]) return vres, vdev
def IVFPQMultiGpu(config): print("IVFPQMultiGpu, ", config) d = config['dimension'] # dimension nb = config['db_size'] # database size nq = config['query_num'] # nb of queries k = config['top_k'] config_gpus = config['gpus'] ngpus = faiss.get_num_gpus() print("number of GPUs:", ngpus, ",running on gpus:", config_gpus) gpus = range(config_gpus) res = [faiss.StandardGpuResources() for _ in gpus] vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() for i, res in zip(gpus, res): vdev.push_back(i) vres.push_back(res) index_list = [] for i in range(config['db_num']): # Using an IVFPQ index np.random.seed(i) xb = np.random.random((nb, d)).astype('float32') xb[:, 0] += np.arange(nb) / 1000. nlist = config['nlist'] m = config['sub_quantizers'] code = config['bits_per_code'] # begin_time = time.time() quantizer = faiss.IndexFlatL2(d) # the other index index_ivfpq = faiss.IndexIVFPQ(quantizer, d, nlist, m, code) # here we specify METRIC_L2, by default it performs inner-product search # build the index gpu_index_ivfpq = faiss.index_cpu_to_gpu_multiple( vres, vdev, index_ivfpq) gpu_index_ivfpq.referenced_objects = res assert not gpu_index_ivfpq.is_trained gpu_index_ivfpq.train(xb) # add vectors to the index assert gpu_index_ivfpq.is_trained gpu_index_ivfpq.add(xb) # add vectors to the index print(i, ",size = ", gpu_index_ivfpq.ntotal) index_list.append(gpu_index_ivfpq) return index_list
def __loadIndex(self): assert self.dbs != [], "You should load db before load index, use self.loadDB() ..." d = self.dbs[0].shape[-1] ngpu = faiss.get_num_gpus() index = faiss.IndexFlatL2(d) vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() gpu_resources = [] for i in range(0, ngpu): res = faiss.StandardGpuResources() gpu_resources.append(res) vdev.push_back(i) vres.push_back(res) co = faiss.GpuMultipleClonerOptions() co.shard = True self.gpu_index = faiss.index_cpu_to_gpu_multiple(vres, vdev, index, co) self.gpu_index.referenced_objects = gpu_resources self.gpu_index.add(self.dbs)
def load_index(passage_embeddings, index_path, faiss_gpu_index, use_gpu): dim = passage_embeddings.shape[1] if index_path is None: index = faiss.index_factory(dim, "Flat", faiss.METRIC_INNER_PRODUCT) index.add(passage_embeddings) else: index = faiss.read_index(index_path) if faiss_gpu_index and use_gpu: if len(faiss_gpu_index) == 1: res = faiss.StandardGpuResources() res.setTempMemory(1024 * 1024 * 1024) co = faiss.GpuClonerOptions() if index_path: co.useFloat16 = True else: co.useFloat16 = False index = faiss.index_cpu_to_gpu(res, faiss_gpu_index, index, co) else: assert not index_path # Only need one GPU for compressed index global gpu_resources import torch for i in range(torch.cuda.device_count()): res = faiss.StandardGpuResources() res.setTempMemory(128 * 1024 * 1024) gpu_resources.append(res) assert isinstance(faiss_gpu_index, list) vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() co = faiss.GpuMultipleClonerOptions() co.shard = True for i in faiss_gpu_index: vdev.push_back(i) vres.push_back(gpu_resources[i]) index = faiss.index_cpu_to_gpu_multiple(vres, vdev, index, co) return index