def knnGPU(x, y, k, mem=512*1024*1024): ngpus = faiss.get_num_gpus() print("number of GPUs:", ngpus) dim = x.shape[1] batch_size = mem // (dim*4) sim = np.zeros((x.shape[0], k), dtype=np.float32) ind = np.zeros((x.shape[0], k), dtype=np.int64) for xfrom in range(0, x.shape[0], batch_size): xto = min(xfrom + batch_size, x.shape[0]) bsims, binds = [], [] for yfrom in range(0, y.shape[0], batch_size): yto = min(yfrom + batch_size, y.shape[0]) # print('{}-{} -> {}-{}'.format(xfrom, xto, yfrom, yto)) idx = faiss.IndexFlatIP(dim) # idx = faiss.GpuIndexIVFFlat(dim) idx = faiss.index_cpu_to_all_gpus(idx) idx.add(y[yfrom:yto]) bsim, bind = idx.search(x[xfrom:xto], min(k, yto-yfrom)) bsims.append(bsim) binds.append(bind + yfrom) del idx bsims = np.concatenate(bsims, axis=1) binds = np.concatenate(binds, axis=1) aux = np.argsort(-bsims, axis=1) for i in range(xfrom, xto): for j in range(k): sim[i, j] = bsims[i-xfrom, aux[i-xfrom, j]] ind[i, j] = binds[i-xfrom, aux[i-xfrom, j]] return sim, ind
def global_level_semantic_sim(embs, k=50, search_batch_sz=50000, index_batch_sz=500000, split=False, norm=True, gpu=True): print('FAISS number of GPUs=', faiss.get_num_gpus()) size = [embs[0].size(0), embs[1].size(0)] emb_size = embs[0].size(1) if norm: embs = apply(norm_process, *embs) emb_q, emb_id = apply(lambda x: x.cpu().numpy(), *embs) del embs gc.collect() vals, inds = [], [] total_size = emb_id.shape[0] for i_batch in range(0, total_size, index_batch_sz): i_end = min(total_size, i_batch + index_batch_sz) val, ind = faiss_search_impl(emb_q, emb_id[i_batch:i_end], emb_size, i_batch, k, search_batch_sz, gpu) vals.append(val) inds.append(ind) vals, inds = torch.cat(vals, dim=1), torch.cat(inds, dim=1) print(vals.size(), inds.size()) return topk2spmat(vals, inds, size, 0, torch.device('cpu'), split)
def get_knn(reference_embeddings, test_embeddings, k, embeddings_come_from_same_source=False): """ Finds the k elements in reference_embeddings that are closest to each element of test_embeddings. Args: reference_embeddings: numpy array of size (num_samples, dimensionality). test_embeddings: numpy array of size (num_samples2, dimensionality). k: int, number of nearest neighbors to find embeddings_come_from_same_source: if True, then the nearest neighbor of each element (which is actually itself) will be ignored. """ d = reference_embeddings.shape[1] logging.info("running k-nn with k=%d" % k) logging.info("embedding dimensionality is %d" % d) index = faiss.IndexFlatL2(d) if faiss.get_num_gpus() > 0: index = faiss.index_cpu_to_all_gpus(index) index.add(reference_embeddings) _, indices = index.search(test_embeddings, k + 1) if embeddings_come_from_same_source: return indices[:, 1:] return indices[:, :k]
def search_gpu(query_path, refer_path, output, topk=100): queryfeas, queryconts = loadFeaFromPickle(query_path) referfeas, referconts = loadFeaFromPickle(refer_path) assert(queryfeas.shape[1] == referfeas.shape[1]) dim = int(queryfeas.shape[1]) print("=> query feature shape: {}".format(queryfeas.shape), file=sys.stderr) print("=> refer feature shape: {}".format(referfeas.shape), file=sys.stderr) start = time.time() ngpus = faiss.get_num_gpus() print("=> search use gpu number of GPUs: {}".format(ngpus), file=sys.stderr) cpu_index = faiss.IndexFlat(dim, faiss.METRIC_INNER_PRODUCT) # build the index gpu_index = faiss.index_cpu_to_all_gpus( # build the index cpu_index ) gpu_index.add(referfeas) # add vectors to the index print(index.ntotal) print("=> building gpu index success, \ total index number: {}".format(gpu_index), file=sys.stderr) distance, ind = gpu_index.search(queryfeas, int(topk)) assert(distance.shape == ind.shape) end = time.time() print("=> searching total use time {}".format(end - start), file=sys.stderr) outdic = {} for key_id in range(queryfeas.shape[0]): querycont = queryconts[key_id] searchresult = [(referconts[ind[key_id][i]], distance[key_id][i]) \ for i in range(len(distance[key_id]))] outdic[querycont] = searchresult print("=> convert search gpu result to output format success") pickle.dump(outdic, open(output,"wb"), protocol=2)
def run_kmeans(x, nmb_clusters): """ Args: x: data nmb_clusters (int): number of clusters Returns: list: ids of data in each cluster """ x = c_f.to_numpy(x).astype(np.float32) n_data, d = x.shape logging.info("running k-means clustering with k=%d" % nmb_clusters) logging.info("embedding dimensionality is %d" % d) # faiss implementation of k-means clus = faiss.Clustering(d, nmb_clusters) clus.niter = 20 clus.max_points_per_centroid = 10000000 index = faiss.IndexFlatL2(d) if faiss.get_num_gpus() > 0: index = faiss.index_cpu_to_all_gpus(index) # perform the training clus.train(x, index) _, idxs = index.search(x, 1) return [int(n[0]) for n in idxs]
def get_gpu_index(cpu_index): gpu_resources = [] ngpu = faiss.get_num_gpus() tempmem = -1 for i in range(ngpu): res = faiss.StandardGpuResources() if tempmem >= 0: res.setTempMemory(tempmem) gpu_resources.append(res) def make_vres_vdev(i0=0, i1=-1): " return vectors of device ids and resources useful for gpu_multiple" vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() if i1 == -1: i1 = ngpu for i in range(i0, i1): vdev.push_back(i) vres.push_back(gpu_resources[i]) return vres, vdev co = faiss.GpuMultipleClonerOptions() co.shard = True gpu_vector_resources, gpu_devices_vector = make_vres_vdev(0, ngpu) gpu_index = faiss.index_cpu_to_gpu_multiple(gpu_vector_resources, gpu_devices_vector, cpu_index, co) return gpu_index
def clean_faiss_gpu(): ngpu = faiss.get_num_gpus() tempmem = 0 for i in range(ngpu): res = faiss.StandardGpuResources() if tempmem >= 0: res.setTempMemory(tempmem)
def load_index(path_index, mode="cpu"): index = faiss.read_index(path_index) if mode == "gpu": ngpus = faiss.get_num_gpus() if ngpus > 0: index = faiss.index_cpu_to_all_gpus(index) return index
def build_faiss_index(nd_feats_array, mode): """ build index on multi GPUs :param nd_feats_array: :param mode: 0: CPU; 1: GPU; 2: Multi-GPU :return: """ d = nd_feats_array.shape[1] cpu_index = faiss.IndexFlatL2(d) # build the index on CPU if mode == 0: print("[INFO] Is trained? >> {}".format(cpu_index.is_trained)) cpu_index.add(nd_feats_array) # add vectors to the index print("[INFO] Capacity of gallery: {}".format(cpu_index.ntotal)) return cpu_index elif mode == 1: ngpus = faiss.get_num_gpus() print("[INFO] number of GPUs:", ngpus) res = faiss.StandardGpuResources() # use a single GPU gpu_index = faiss.index_cpu_to_gpu(res, 0, cpu_index) gpu_index.add(nd_feats_array) # add vectors to the index print("[INFO] Capacity of gallery: {}".format(gpu_index.ntotal)) return gpu_index elif mode == 2: multi_gpu_index = faiss.index_cpu_to_all_gpus( cpu_index) # build the index on multi GPUs multi_gpu_index.add(nd_feats_array) # add vectors to the index print("[INFO] Capacity of gallery: {}".format(multi_gpu_index.ntotal)) return multi_gpu_index
def get_k(bases, xb, params, k_data, dates): if params['kLineFirst']: dim = len(params['pickedStockKLine']['values']) else: dim = len(params['pickedStockTicks']['values']) #query kLine = params['pickedStockKLine']['values'] open = list(map(lambda x: x[0], kLine)) close = list(map(lambda x: x[1], kLine)) low = list(map(lambda x: x[2], kLine)) high = list(map(lambda x: x[3], kLine)) volume = params['pickedStockKLine']['volumes'] query = list(map(lambda x: x / open[0], open)) + list( map(lambda x: x / open[0], close)) + list( map(lambda x: x / open[0], low)) + list( map(lambda x: x / open[0], high)) + list( map(lambda x: x[1] / volume[0][1], volume)) xq = np.array([np.array(query)]).astype('float32') ngpus = faiss.get_num_gpus() #build index start = clock() cpu_index = faiss.IndexFlatL2(dim * 5) gpu_index = faiss.index_cpu_to_all_gpus(cpu_index) gpu_index.add(xb) D, I = gpu_index.search(xq, 10) #have not done end = clock() print(end - start) results = list(map(lambda x: bases[x], I[0])) print(results) return jsonify(back_and_front(results, k_data, dates)) #not yet
def __init__(self, target, nprobe=128, index_factory_str=None, verbose=False, mode='proxy', using_gpu=True): self._res_list = [] num_gpu = faiss.get_num_gpus() print('[faiss gpu] #GPU: {}'.format(num_gpu)) size, dim = target.shape assert size > 0, "size: {}".format(size) index_factory_str = "IVF{},PQ{}".format( min(8192, 16 * round(np.sqrt(size))), 32) if index_factory_str is None else index_factory_str cpu_index = faiss.index_factory(dim, index_factory_str) cpu_index.nprobe = nprobe if mode == 'proxy': co = faiss.GpuClonerOptions() co.useFloat16 = True co.usePrecomputed = False index = faiss.IndexProxy() for i in range(num_gpu): res = faiss.StandardGpuResources() self._res_list.append(res) sub_index = faiss.index_cpu_to_gpu( res, i, cpu_index, co) if using_gpu else cpu_index index.addIndex(sub_index) elif mode == 'shard': co = faiss.GpuMultipleClonerOptions() co.useFloat16 = True co.usePrecomputed = False co.shard = True index = faiss.index_cpu_to_all_gpus(cpu_index, co, ngpu=num_gpu) else: raise KeyError("Unknown index mode") index = faiss.IndexIDMap(index) index.verbose = verbose # get nlist to decide how many samples used for training nlist = int( float([ item for item in index_factory_str.split(",") if 'IVF' in item ][0].replace("IVF", ""))) # training if not index.is_trained: indexes_sample_for_train = np.random.randint(0, size, nlist * 256) index.train(target[indexes_sample_for_train]) # add with ids target_ids = np.arange(0, size) index.add_with_ids(target, target_ids) self.index = index
def init_index(self): d = 128 ngpus = faiss.get_num_gpus() print("number of GPUs:", ngpus) self.cpu_index = faiss.IndexFlatL2(d) self.cpu_index.add(self.known_encoding_faces2)
def range_ground_truth(xq, db_iterator, threshold, metric_type=faiss.METRIC_L2, shard=False, ngpu=-1): """Computes the range-search search results for a dataset that possibly does not fit in RAM but for which we have an iterator that returns it block by block. """ nq, d = xq.shape t0 = time.time() xq = np.ascontiguousarray(xq, dtype='float32') index = faiss.IndexFlat(d, metric_type) if ngpu == -1: ngpu = faiss.get_num_gpus() if ngpu: LOG.info('running on %d GPUs' % ngpu) co = faiss.GpuMultipleClonerOptions() co.shard = shard index_gpu = faiss.index_cpu_to_all_gpus(index, co=co, ngpu=ngpu) # compute ground-truth by blocks i0 = 0 D = [[] for _i in range(nq)] I = [[] for _i in range(nq)] all_lims = [] for xbi in db_iterator: ni = xbi.shape[0] if ngpu > 0: index_gpu.add(xbi) lims_i, Di, Ii = range_search_gpu(xq, threshold, index_gpu, xbi) index_gpu.reset() else: index.add(xbi) lims_i, Di, Ii = index.range_search(xq, threshold) index.reset() Ii += i0 for j in range(nq): l0, l1 = lims_i[j], lims_i[j + 1] if l1 > l0: D[j].append(Di[l0:l1]) I[j].append(Ii[l0:l1]) i0 += ni LOG.info("%d db elements, %.3f s" % (i0, time.time() - t0)) empty_I = np.zeros(0, dtype='int64') empty_D = np.zeros(0, dtype='float32') # import pdb; pdb.set_trace() D = [(np.hstack(i) if i != [] else empty_D) for i in D] I = [(np.hstack(i) if i != [] else empty_I) for i in I] sizes = [len(i) for i in I] assert len(sizes) == nq lims = np.zeros(nq + 1, dtype="uint64") lims[1:] = np.cumsum(sizes) return lims, np.hstack(D), np.hstack(I)
def do_cpu_to_gpu(self, index_key): ts = [] ts.append(time.time()) (xt, xb, xq) = self.get_dataset(small_one=True) nb, d = xb.shape index = faiss.index_factory(d, index_key) if index.__class__ == faiss.IndexIVFPQ: # speed up test index.pq.cp.niter = 2 index.do_polysemous_training = False ts.append(time.time()) index.train(xt) ts.append(time.time()) # adding some ids because there was a bug in this case index.add_with_ids(xb, np.arange(nb).astype(np.int64) * 3 + 12345) ts.append(time.time()) index.nprobe = 4 D, Iref = index.search(xq, 10) ts.append(time.time()) res = faiss.StandardGpuResources() gpu_index = faiss.index_cpu_to_gpu(res, 0, index) ts.append(time.time()) gpu_index.setNumProbes(4) D, Inew = gpu_index.search(xq, 10) ts.append(time.time()) print('times:', [t - ts[0] for t in ts]) self.assertGreaterEqual((Iref == Inew).sum(), Iref.size) if faiss.get_num_gpus() == 1: return for shard in False, True: # test on just 2 GPUs res = [faiss.StandardGpuResources() for i in range(2)] co = faiss.GpuMultipleClonerOptions() co.shard = shard gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co) faiss.GpuParameterSpace().set_index_parameter( gpu_index, 'nprobe', 4) D, Inew = gpu_index.search(xq, 10) # 0.99: allow some tolerance in results otherwise test # fails occasionally (not reproducible) self.assertGreaterEqual((Iref == Inew).sum(), Iref.size * 0.99)
def to_gpu(self): if faiss.get_num_gpus() == 1: res = faiss.StandardGpuResources() self.index = faiss.index_cpu_to_gpu(res, 0, self.index) else: cloner_options = faiss.GpuMultipleClonerOptions() cloner_options.shard = True self.index = faiss.index_cpu_to_all_gpus(self.index, co=cloner_options) return self.index
def do_cpu_to_gpu(self, index_key): ts = [] ts.append(time.time()) (xt, xb, xq) = self.get_dataset(small_one=True) nb, d = xb.shape index = faiss.index_factory(d, index_key) if index.__class__ == faiss.IndexIVFPQ: # speed up test index.pq.cp.niter = 2 index.do_polysemous_training = False ts.append(time.time()) index.train(xt) ts.append(time.time()) # adding some ids because there was a bug in this case index.add_with_ids(xb, np.arange(nb) * 3 + 12345) ts.append(time.time()) index.nprobe = 4 D, Iref = index.search(xq, 10) ts.append(time.time()) res = faiss.StandardGpuResources() gpu_index = faiss.index_cpu_to_gpu(res, 0, index) ts.append(time.time()) gpu_index.setNumProbes(4) D, Inew = gpu_index.search(xq, 10) ts.append(time.time()) print 'times:', [t - ts[0] for t in ts] self.assertGreaterEqual((Iref == Inew).sum(), Iref.size) if faiss.get_num_gpus() == 1: return for shard in False, True: # test on just 2 GPUs res = [faiss.StandardGpuResources() for i in range(2)] co = faiss.GpuMultipleClonerOptions() co.shard = shard gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co) faiss.GpuParameterSpace().set_index_parameter( gpu_index, 'nprobe', 4) D, Inew = gpu_index.search(xq, 10) self.assertGreaterEqual((Iref == Inew).sum(), Iref.size)
def load(self, path: str, device: Optional[str] = None) -> None: r"""Load the index and meta data from ``path`` directory. Args: path (str): A path to the directory to load the index from. device (optional str): Device to load the index into. If None, value will be picked from hyperparameters. """ if not os.path.exists(path): raise ValueError( f"Failed to load the index. {path} " f"does not exist." ) cpu_index = faiss.read_index(f"{path}/index.faiss") if device is None: device = self._config.device if device.lower().startswith("gpu"): gpu_resource = faiss.StandardGpuResources() gpu_id = int(device[3:]) if faiss.get_num_gpus() < gpu_id: gpu_id = 0 logging.warning( "Cannot create the index on device %s. " "Total number of GPUs on this machine is " "%s. Using the gpu0 for the index.", device, faiss.get_num_gpus(), ) self._index = faiss.index_cpu_to_gpu( gpu_resource, gpu_id, cpu_index ) else: self._index = cpu_index with open(f"{path}/index.meta_data", "rb") as f: self._meta_data = pickle.load(f)
def __init__(self): self.ngpu = faiss.get_num_gpus() if self.ngpu == 0: return self.tempmem = 1 << 33 self.max_add_per_gpu = 1 << 25 self.max_add = self.max_add_per_gpu * self.ngpu self.add_batch_size = 65536 self.gpu_resources = self._prepare_gpu_resources()
def __loadIndex(self): assert self.dbs != [], "You should load db before load index, use self.loadDB() ..." d = self.dbs[0].shape[-1] ngpu = faiss.get_num_gpus() index = faiss.IndexFlatL2(d) res = faiss.StandardGpuResources() for i, db in enumerate(self.dbs): gpu_index = faiss.index_cpu_to_gpu(res, i, index) gpu_index.add(db) self.gpu_index.append(gpu_index)
def __init__(self, config: Optional[Union[Dict, Config]] = None): super().__init__() self._config = Config( hparams=config, default_hparams=self.default_configs() ) self._meta_data: Dict[int, str] = {} index_type = self._config.index_type device = self._config.device dim = self._config.dim if device.lower().startswith("gpu"): if isinstance(index_type, str) and not index_type.startswith("Gpu"): index_type = "Gpu" + index_type index_class = utils.get_class(index_type, module_paths=["faiss"]) gpu_resource = faiss.StandardGpuResources() gpu_id = int(device[3:]) if faiss.get_num_gpus() < gpu_id: gpu_id = 0 logging.warning( "Cannot create the index on device %s. " "Total number of GPUs on this machine is " "%s. Using gpu0 for the index.", self._config.device, faiss.get_num_gpus(), ) config_class_name = self.INDEX_TYPE_TO_CONFIG.get( index_class.__name__ ) config = utils.get_class( config_class_name, module_paths=["faiss"] )() config.device = gpu_id self._index = index_class(gpu_resource, dim, config) else: index_class = utils.get_class(index_type, module_paths=["faiss"]) self._index = index_class(dim)
def to_all_gpus( cpu_index: faiss.Index, co: Optional['faiss.GpuMultipleClonerOptions'] = None) -> faiss.Index: """ TODO: docstring """ n_gpus = faiss.get_num_gpus() assert n_gpus != 0, 'Attempting to move index to GPU without any GPUs' gpu_index = faiss.index_cpu_to_all_gpus(cpu_index, co=co) return gpu_index
def upgrade_indices(self, new_index_type="IDMap,IVF100,PQ8"): for column_name, index in self.indices.items(): if faiss.get_num_gpus() > 0: index = faiss.index_cpu_to_gpu(faiss.StandardGpuResources(), self.rank, index) vectors = index.reconstruct_n(0, index.ntotal) ids = np.array([index.id_map.at(i) for i in range(index.id_map.size())]) assert len(vectors) == len(ids) new_index = faiss.index_factory(vectors.shape[1], new_index_type) if faiss.get_num_gpus() > 0: new_index = faiss.index_cpu_to_gpu(faiss.StandardGpuResources(), self.rank, new_index) if not new_index.is_trained: new_index.train(vectors) new_index.add_with_ids(vectors, ids) if faiss.get_num_gpus() > 0: new_index = faiss.index_gpu_to_cpu(new_index) faiss.write_index(new_index, f"{self.directory}_new/{column_name}.index")
def IndexLoad(idx_path, nprobe=0, gpu=False): print('Reading FAISS index', file=sys.stderr) print(' - index: {:s}'.format(idx_path), file=sys.stderr) index = faiss.read_index(idx_path) print(' - found {:d} sentences of dim {:d}'.format(index.ntotal, index.d), file=sys.stderr) print(' - setting nbprobe to {:d}'.format(nprobe), file=sys.stderr) if gpu: print(' - transfer index to %d GPUs ' % faiss.get_num_gpus(), file=sys.stderr) index = faiss.index_cpu_to_all_gpus(index) # co=co faiss.GpuParameterSpace().set_index_parameter(index, 'nprobe', nprobe) return index
def IndexLoad(idx_name, nprobe, gpu=False): print('Reading FAISS index') print(' - index: {:s}'.format(idx_name)) index = faiss.read_index(idx_name) print(' - found {:d} sentences of dim {:d}'.format(index.ntotal, index.d)) print(' - setting nbprobe to {:d}'.format(nprobe)) if gpu: print(' - transfer index to %d GPUs ' % faiss.get_num_gpus()) #co = faiss.GpuMultipleClonerOptions() #co.shard = True index = faiss.index_cpu_to_all_gpus(index) # co=co faiss.GpuParameterSpace().set_index_parameter(index, 'nprobe', nprobe) return index
def read_faiss_index_gpu(self, index_filepath): """ Load a FAISS index. If we're on GPU, then convert it to GPU index :param index_filepath: :return: """ print("read_faiss_index start.") index = faiss.read_index(index_filepath) if faiss.get_num_gpus(): print("read_faiss_index: Converting FAISS index from CPU to GPU.") index = faiss.index_cpu_to_gpu(faiss.StandardGpuResources(), 0, index) return index
def make_index(sx, preproc=ident): N, p = sx.shape ngpu = faiss.get_num_gpus() if N < 1000: indextype = 'Flat' elif N < 10**6: indextype = 'GPUFlat' elif N < 100000: indextype = 'GPUIVFFlat' else: indextype = 'GPUIVFFlatShards' if (indextype == 'IVFFlat' or indextype == 'GPUIVFFlat' or indextype == 'GPUIVFFlatShards'): ncentroids = int(4 * np.floor(np.sqrt(N))) nprobe = 256 print("using IndexIVFFlat with %d/%d centroids" % (nprobe, ncentroids)) q = faiss.IndexFlatL2(p) index = faiss.IndexIVFFlat(q, p, ncentroids, faiss.METRIC_INNER_PRODUCT) if nprobe >= ncentroids * 3 / 4: nprobe = int(ncentroids * 3 / 4) print(" forcing nprobe to %d" % nprobe) index.nprobe = nprobe index.quantizer_no_dealloc = q if indextype.startswith('GPU') and ngpu > 0: index = move_index_to_gpu(index, indextype == 'GPUIVFFlatShards') ntrain = min(ncentroids * 100, N) print("prepare train set, size=%d" % ntrain) trainset = sx[:ntrain] trainset.max() # force move to RAM print("train") index.train(trainset) elif indextype == 'GPUFlat' or indextype == 'Flat': index = faiss.IndexFlatIP(p) if indextype.startswith('GPU') and ngpu > 0: co = faiss.GpuMultipleClonerOptions() co.useFloat16 = True index = faiss.index_cpu_to_all_gpus(index, co) else: assert False bs = 16384 for i0, i1, block in dataset_iterator(sx, preproc, bs): print(" add %d:%d / %d\r" % (i0, i1, N), end=' ') sys.stdout.flush() index.add(block) return index
def compute_GT_GPU(xb, xq, gt_sl): nq_gt, _ = xq.shape print("compute GT GPU") t0 = time.time() gt_I = np.zeros((nq_gt, gt_sl), dtype='int64') gt_D = np.zeros((nq_gt, gt_sl), dtype='float32') heaps = faiss.float_maxheap_array_t() heaps.k = gt_sl heaps.nh = nq_gt heaps.val = faiss.swig_ptr(gt_D) heaps.ids = faiss.swig_ptr(gt_I) heaps.heapify() bs = 10 ** 5 # Please change this based on your GPU memory size. tempmem = 3500*1024*1024 n, d = xb.shape xqs = sanitize(xq[:nq_gt]) ngpu = faiss.get_num_gpus() gpu_resources = [] for i in range(ngpu): res = faiss.StandardGpuResources() res.setTempMemory(tempmem) gpu_resources.append(res) vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() for i in range(0, ngpu): vdev.push_back(i) vres.push_back(gpu_resources[i]) db_gt = faiss.IndexFlatL2(d) db_gt_gpu = faiss.index_cpu_to_gpu_multiple( vres, vdev, db_gt) # compute ground-truth by blocks of bs, and add to heaps for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs): db_gt_gpu.add(xsl) D, I = db_gt_gpu.search(xqs, gt_sl) I += i0 heaps.addn_with_ids( gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl) db_gt_gpu.reset() heaps.reorder() print("GT GPU time: {} s".format(time.time() - t0)) return gt_I, gt_D
def init_index(self): d = 128 ngpus = faiss.get_num_gpus() print("number of GPUs:", ngpus) cpu_index = faiss.IndexFlatL2(d) self.gpu_index = faiss.index_cpu_to_all_gpus( # build the index cpu_index ) self.gpu_index.add(self.known_encoding_faces2) # add vectors to the index print('index', self.gpu_index.ntotal)
def build(self, use_gpu=False): self.vectors = np.array(self.vectors) faiss.normalize_L2(self.vectors) logging.info('Indexing {} vectors'.format(self.vectors.shape[0])) if self.vectors.shape[0] > 50000: num_centroids = 8 * int( math.sqrt(math.pow(2, int(math.log(self.vectors.shape[0], 2))))) logging.info('Using {} centroids'.format(num_centroids)) self.index = faiss.index_factory( self.d, "IVF{}_HNSW32,Flat".format(num_centroids)) ngpu = faiss.get_num_gpus() if ngpu > 0 and use_gpu: logging.info('Using {} GPUs'.format(ngpu)) index_ivf = faiss.extract_index_ivf(self.index) clustering_index = faiss.index_cpu_to_all_gpus( faiss.IndexFlatL2(self.d)) index_ivf.clustering_index = clustering_index logging.info('Training index...') self.index.train(self.vectors) else: self.index = faiss.IndexFlatL2(self.d) if faiss.get_num_gpus() > 0 and use_gpu: self.index = faiss.index_cpu_to_all_gpus(self.index) logging.info('Adding vectors to index...') self.index.add(self.vectors)
def move_index_to_gpu(index, shard=False): ngpu = faiss.get_num_gpus() gpu_resources = [faiss.StandardGpuResources() for i in range(ngpu)] co = faiss.GpuMultipleClonerOptions() co.useFloat16 = True co.shard = shard co.shard_type = 1 print(" moving to %d GPUs" % ngpu) t0 = time.time() index = faiss.index_cpu_to_gpu_multiple_py(gpu_resources, index, co) index.dont_dealloc_me = gpu_resources print(" done in %.3f s" % (time.time() - t0)) return index
def init_index(known_encoding_faces2): known_encoding_faces2 = known_encoding_faces2.astype(np.float32) d = 512 ngpus = faiss.get_num_gpus() print("number of GPUs:", ngpus) cpu_index = faiss.IndexFlatL2(d) gpu_index = faiss.index_cpu_to_all_gpus( # build the index cpu_index) gpu_index.add(known_encoding_faces2) # add vectors to the index print('index', gpu_index.ntotal) return gpu_index
from __future__ import print_function import numpy as np d = 64 # dimension nb = 100000 # database size nq = 10000 # nb of queries np.random.seed(1234) # make reproducible xb = np.random.random((nb, d)).astype('float32') xb[:, 0] += np.arange(nb) / 1000. xq = np.random.random((nq, d)).astype('float32') xq[:, 0] += np.arange(nq) / 1000. import faiss # make faiss available print("number of GPUs:", faiss.get_num_gpus()) index = faiss.IndexFlatL2(d) # build the index res = faiss.StandardGpuResources() index = faiss.index_cpu_to_gpu(res, 0, index) index.add(xb) # add vectors to the index print(index.ntotal) k = 4 # we want to see 4 nearest neighbors D, I = index.search(xq, k) # actual search print(I[:5]) # neighbors of the 5 first queries print(I[-5:]) # neighbors of the 5 last queries
-knngraph instead of the standard setup for the dataset, compute a k-nn graph with nnn neighbors per element -oI xx%d.npy output the search result indices to this numpy file, %d will be replaced with the nprobe -oD xx%d.npy output the search result distances to this file """ sys.exit(1) # default values dbname = None index_key = None ngpu = faiss.get_num_gpus() replicas = 1 # nb of replicas of sharded dataset add_batch_size = 32768 query_batch_size = 16384 nprobes = [1 << l for l in range(9)] knngraph = False use_precomputed_tables = True tempmem = -1 # if -1, use system default max_add = -1 use_float16 = False use_cache = True nnn = 10 altadd = False I_fname = None D_fname = None