def __init__(self, xt_path="/home/wenqingfu/sift1b/bigann_learn.bvecs", xb_path="/home/wenqingfu/sift1b/bigann_base.bvecs", ngpu=3): self.xt = self.mmap_bvecs(xt_path) self.xb = self.mmap_bvecs(xb_path) self.xt = self.sanitize(self.xt[:1000000]) self.xb = self.sanitize(self.xb[self.db_start * 1000 * 1000:self.db_end * 1000 * 1000]) self.gpu_resources = [] for i in range(0, ngpu): res = faiss.StandardGpuResources() if tempmem >= 0: res.setTempMemory(tempmem) print("set tempemm to %d" % tempmem) self.gpu_resources.append(res) self.vres = faiss.GpuResourcesVector() self.vdev = faiss.IntVector() for i in range(0, ngpu): self.vdev.push_back(i) self.vres.push_back(self.gpu_resources[i]) self.co = faiss.GpuMultipleClonerOptions() self.co.useFloat16 = True self.co.useFloat16CoarseQuantizer = False self.co.usePrecomputed = False self.co.indicesOptions = 0 self.co.verbose = True self.co.shard = True self.ps = faiss.GpuParameterSpace()
def queryFeatures(self, featureResource, numberOfResultsToRetrieve): numberOfResultsToRetrieve = int(numberOfResultsToRetrieve) ps = None if self.ngpu > 0: ps = faiss.GpuParameterSpace() ps.initialize(self.index) #ps.set_index_parameter(self.index, 'nprobe', self.nprobe) features = self.deserializeFeatures(featureResource) pfeatures = self.preproc.apply_py(indexfunctions.sanitize(features)) D, I = self.index.search(pfeatures, numberOfResultsToRetrieve) sortedIDs, sortedVotes, maxvoteval = indexfunctions.tallyVotes( D, I, numcores=1) #print('number of ids: ',len(self.IDToImage)) # voteScores = 1.0 * sortedVotes / (1.0 * np.max(sortedVotes)) voteScores = 1.0 * sortedVotes / (maxvoteval) resultScores = filteringResults() resultScores.D = D resultScores.I = I #print(list(self.IDToImage.keys())[0]+'\n') for i in range(0, min(len(sortedIDs), numberOfResultsToRetrieve)): id = sortedIDs[i] id_str = str(id) #print(id_str) if id_str in self.IDToImage: imname = self.IDToImage[id_str] score = voteScores[i] resultScores.addScore(imname, score, ID=id) resultScores.pairDownResults(numberOfResultsToRetrieve) return resultScores
def _set_index_nprobe(self) -> bool: """ Try to set the currently configured nprobe value to the current faiss index. :returns: True if nprobe was actually set and False if it wasn't (not an appropriate index type). """ with self._model_lock: idx = self._faiss_index idx_name = idx.__class__.__name__ try: # Attempting to use GpuParameterSpace doesn't error and seems # to function even when there is no GPU available, so the usual # pythonic EAFP doesn't cause an exception to catch when doing # the "improper" thing first. if self._use_gpu and isinstance(idx, faiss.GpuIndex): ps = faiss.GpuParameterSpace() else: ps = faiss.ParameterSpace() ps.set_index_parameter(idx, 'nprobe', self._ivf_nprobe) LOG.debug(f"Set nprobe={self._ivf_nprobe} to index, instance " f"of {idx_name}") return True except RuntimeError as sip_ex: s_ex = str(sip_ex) if "could not set parameter nprobe" in s_ex: # OK, index does not support nprobe parameter LOG.debug(f"Current index ({idx_name}) does not " f"support the nprobe parameter.") return False # Otherwise re-raise raise
def IndexLoad(idx_name, nprobe): print(' - loading FAISS index', idx_name) index = faiss.read_index(idx_name) print(' - found {:d} sentences of dim {:d}'.format(index.ntotal, index.d)) print(' - setting nbprobe to {:d}'.format(nprobe)) faiss.GpuParameterSpace().set_index_parameter(index, 'nprobe', nprobe) return index
def eval_dataset(index, preproc): ps = faiss.GpuParameterSpace() ps.initialize(index) nq_gt = gt_I.shape[0] print "search..." sl = query_batch_size nq = xq.shape[0] for nprobe in nprobes: ps.set_index_parameter(index, 'nprobe', nprobe) t0 = time.time() if sl == 0: D, I = index.search(preproc.apply_py(sanitize(xq)), nnn) else: I = np.empty((nq, nnn), dtype='int32') D = np.empty((nq, nnn), dtype='float32') inter_res = '' for i0, xs in dataset_iterator(xq, preproc, sl): print '\r%d/%d (%.3f s%s) ' % ( i0, nq, time.time() - t0, inter_res), sys.stdout.flush() i1 = i0 + xs.shape[0] Di, Ii = index.search(xs, nnn) I[i0:i1] = Ii D[i0:i1] = Di if knngraph and not inter_res and i1 >= nq_gt: ires = eval_intersection_measure( gt_I[:, :nnn], I[:nq_gt]) inter_res = ', %.4f' % ires t1 = time.time() if knngraph: ires = eval_intersection_measure(gt_I[:, :nnn], I[:nq_gt]) print " probe=%-3d: %.3f s rank-%d intersection results: %.4f" % ( nprobe, t1 - t0, nnn, ires) else: print " probe=%-3d: %.3f s" % (nprobe, t1 - t0), gtc = gt_I[:, :1] nq = xq.shape[0] for rank in 1, 10, 100: if rank > nnn: continue nok = (I[:, :rank] == gtc).sum() print "1-R@%d: %.4f" % (rank, nok / float(nq)), print if I_fname: I_fname_i = I_fname % I print "storing", I_fname_i np.save(I, I_fname_i) if D_fname: D_fname_i = I_fname % I print "storing", D_fname_i np.save(D, D_fname_i)
def set_search_hyperparameters(index: faiss.Index, param_str: str, use_gpu: bool = False) -> None: """ set hyperparameters to an index """ # depends on installed faiss version # pylint: disable=no-member params = faiss.ParameterSpace( ) if not use_gpu else faiss.GpuParameterSpace() params.set_index_parameters(index, param_str)
def do_cpu_to_gpu(self, index_key): ts = [] ts.append(time.time()) (xt, xb, xq) = self.get_dataset(small_one=True) nb, d = xb.shape index = faiss.index_factory(d, index_key) if index.__class__ == faiss.IndexIVFPQ: # speed up test index.pq.cp.niter = 2 index.do_polysemous_training = False ts.append(time.time()) index.train(xt) ts.append(time.time()) # adding some ids because there was a bug in this case index.add_with_ids(xb, np.arange(nb).astype(np.int64) * 3 + 12345) ts.append(time.time()) index.nprobe = 4 D, Iref = index.search(xq, 10) ts.append(time.time()) res = faiss.StandardGpuResources() gpu_index = faiss.index_cpu_to_gpu(res, 0, index) ts.append(time.time()) gpu_index.setNumProbes(4) D, Inew = gpu_index.search(xq, 10) ts.append(time.time()) print('times:', [t - ts[0] for t in ts]) self.assertGreaterEqual((Iref == Inew).sum(), Iref.size) if faiss.get_num_gpus() == 1: return for shard in False, True: # test on just 2 GPUs res = [faiss.StandardGpuResources() for i in range(2)] co = faiss.GpuMultipleClonerOptions() co.shard = shard gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co) faiss.GpuParameterSpace().set_index_parameter( gpu_index, 'nprobe', 4) D, Inew = gpu_index.search(xq, 10) # 0.99: allow some tolerance in results otherwise test # fails occasionally (not reproducible) self.assertGreaterEqual((Iref == Inew).sum(), Iref.size * 0.99)
def IndexLoad(idx_name, nprobe, gpu=False): print('Reading FAISS index') print(' - index: {:s}'.format(idx_name)) index = faiss.read_index(idx_name) print(' - found {:d} sentences of dim {:d}'.format(index.ntotal, index.d)) print(' - setting nbprobe to {:d}'.format(nprobe)) if gpu: print(' - transfer index to %d GPUs ' % faiss.get_num_gpus()) #co = faiss.GpuMultipleClonerOptions() #co.shard = True index = faiss.index_cpu_to_all_gpus(index) # co=co faiss.GpuParameterSpace().set_index_parameter(index, 'nprobe', nprobe) return index
def IndexLoad(idx_path, nprobe=0, gpu=False): print('Reading FAISS index', file=sys.stderr) print(' - index: {:s}'.format(idx_path), file=sys.stderr) index = faiss.read_index(idx_path) print(' - found {:d} sentences of dim {:d}'.format(index.ntotal, index.d), file=sys.stderr) print(' - setting nbprobe to {:d}'.format(nprobe), file=sys.stderr) if gpu: print(' - transfer index to %d GPUs ' % faiss.get_num_gpus(), file=sys.stderr) index = faiss.index_cpu_to_all_gpus(index) # co=co faiss.GpuParameterSpace().set_index_parameter(index, 'nprobe', nprobe) return index
def compute_index(self, data_points: torch.Tensor, samples_for_pole: int = 10000): data_points = data_points.cpu() if samples_for_pole == 0: samples_for_pole = data_points.size(0) perm = torch.randperm(data_points.size(0)) idx = perm[:min(samples_for_pole, perm.size(0))] self.pole = compute_pole(data_points[idx], self.manifold) tqdm.write("Creating nns index") ivf_size = 2 ** (ceil(4 * sqrt(data_points.size(0)) - 1)).bit_length() index_flat = faiss.index_factory(data_points.size(-1), "Flat") #f"PCAR16,IVF{ivf_size},SQ4") general_config = get_config().general _use_gpu: bool = general_config.gpu >= 0 # make it into a gpu index if _use_gpu: res = faiss.StandardGpuResources() # make it into a gpu index self.index = faiss.index_cpu_to_gpu(res, 0, index_flat) params = faiss.GpuParameterSpace() else: self.index = index_flat params = faiss.ParameterSpace() # params.set_index_parameter(self.index, 'nprobe', 100) params.initialize(self.index) num_blocks = 200 block_size = ceil(data_points.size(0) / num_blocks) num_blocks = ceil(data_points.size(0) / block_size) self.data_embedding = data_points pole_batch = self.pole.unsqueeze(0).expand_as(data_points[:block_size]) for i in tqdm(range(num_blocks), desc="Euclidean Project", dynamic_ncols=True): start_index = i * block_size end_index = min((i + 1) * block_size, data_points.size(0)) self.data_embedding[start_index:end_index] = self.manifold.log( pole_batch[0: end_index - start_index], data_points[start_index:end_index]) tqdm.write("Training Index") train_size = int(20 * sqrt(data_points.size(0))) perm = torch.randperm(data_points.size(0)) train_points = \ self.data_embedding.cpu().detach()[perm[:train_size]].numpy() self.index.train(train_points) tqdm.write("Adding Vectors to Index") self.index.add(self.data_embedding.cpu().detach().numpy())
def compute_index(self, data_points: torch.Tensor, samples_for_pole: int = 10000): data_points = data_points.cpu() if samples_for_pole == 0: samples_for_pole = data_points.size(0) perm = torch.randperm(data_points.size(0)) idx = perm[:min(samples_for_pole, perm.size(0))] self.pole = compute_pole(data_points[idx], self.manifold) print("Creating nns index") res = faiss.StandardGpuResources() ivf_size = 2**(ceil(4 * sqrt(data_points.size(0)) - 1)).bit_length() index_flat = faiss.index_factory(data_points.size(-1), f"PCAR64,IVF{ivf_size},SQ8") # make it into a gpu index self.index = faiss.index_cpu_to_gpu(res, 0, index_flat) params = faiss.GpuParameterSpace() params.set_index_parameter(self.index, 'nprobe', 100) params.initialize(self.index) num_blocks = 50 block_size = ceil(data_points.size(0) / num_blocks) num_blocks = ceil(data_points.size(0) / block_size) self.data_embedding = data_points pole_batch = self.pole.unsqueeze(0).expand_as(data_points[:block_size]) print("Projecting to Euclidean space for nns:") for i in tqdm(range(num_blocks)): start_index = i * block_size end_index = min((i + 1) * block_size, data_points.size(0)) self.data_embedding[start_index:end_index] = self.manifold.log( pole_batch[0:end_index - start_index], data_points[start_index:end_index]) print("Training Index") self.index.train(self.data_embedding.cpu().detach().numpy()) print("Adding Vectors to Index") self.index.add(self.data_embedding.cpu().detach().numpy())
def test_set_gpu_param(self): index = faiss.index_factory(12, "PCAR8,IVF10,PQ4") res = faiss.StandardGpuResources() gpu_index = faiss.index_cpu_to_gpu(res, 0, index) faiss.GpuParameterSpace().set_index_parameter(gpu_index, "nprobe", 3)
op_per_key = [] # keep track of optimal operating points seen so far op = faiss.OperatingPoints() for index_key in keys_to_test: print "============ key", index_key # make the index described by the key index = faiss.index_factory(d, index_key) if use_gpu: # transfer to GPU (may be partial) index = faiss.index_cpu_to_gpu(res, dev_no, index) params = faiss.GpuParameterSpace() else: params = faiss.ParameterSpace() params.initialize(index) print "[%.3f s] train & add" % (time.time() - t0) index.train(xt) index.add(xb) print "[%.3f s] explore op points" % (time.time() - t0) # find operating points for this index opi = params.explore(index, xq, crit)
def eval_dataset(index, preproc): ps = faiss.GpuParameterSpace() ps.initialize(index) nq_gt = gt_I.shape[0] print("search...") sl = query_batch_size nq = xq.shape[0] print(nq) for nprobe in nprobes: ps.set_index_parameter(index, 'nprobe', nprobe) t0 = time.time() if sl == 0: D, I = index.search(preproc.apply_py(sanitize(xq)), nnn) else: I = np.empty((nq, nnn), dtype='int32') D = np.empty((nq, nnn), dtype='float32') inter_res = '' for i0, xs in dataset_iterator(xq, preproc, sl): # print('\r%d/%d (%.3f s%s) ' % ( # i0, nq, time.time() - t0, inter_res), end=' ') # sys.stdout.flush() i1 = i0 + xs.shape[0] # Wenqi: debugging memory overflow # print(xs.shape) Di, Ii = index.search(xs, nnn) I[i0:i1] = Ii D[i0:i1] = Di if knngraph and not inter_res and i1 >= nq_gt: ires = eval_intersection_measure(gt_I[:, :nnn], I[:nq_gt]) inter_res = ', %.4f' % ires t1 = time.time() if knngraph: ires = eval_intersection_measure(gt_I[:, :nnn], I[:nq_gt]) print(" probe=%-3d: %.3f s rank-%d intersection results: %.4f" % (nprobe, t1 - t0, nnn, ires)) else: print(" probe=%-3d: %.3f s" % (nprobe, t1 - t0), end=' ') gtc = gt_I[:, :1] nq = xq.shape[0] # WENQI modified, when only using 1000 query, comment below # because groud truth verification have problems with shape for rank in 1, 10, 100: if rank > nnn: continue nok = (I[:, :rank] == gtc).sum() print("1-R@%d: %.4f" % (rank, nok / float(nq)), end=' ') print() if I_fname: I_fname_i = I_fname % I print("storing", I_fname_i) np.save(I, I_fname_i) if D_fname: D_fname_i = I_fname % I print("storing", D_fname_i) np.save(D, D_fname_i)
result = result.transpose(0, 1) result = pad(result) #result = result.to('cpu') testfeatures.append(result) testlabels.append(key) torch.cuda.empty_cache() print("building test mfcc took : ", time.time() - start, " seconds") ######################################################################################################### ##IF GPU MEMORY IS A PROBLEM for test/use files ## combine this with the read above and make liberal use of empty cache start = time.time() accuracy = [] faiss.GpuParameterSpace().set_index_parameter(gpu_index, "nprobe", 2) for l, m in tqdm(zip(testlabels, testfeatures), total=len(testlabels)): D, I = search_index_pytorch(gpu_index, m, 5) res.syncDefaultStreamCurrentDevice() r = torch.flatten(I).cpu().numpy() commons = Counter(r).most_common() most_likely = commons[0][0] accuracy.append(int(most_likely == l)) print("Running ", len(testlabels), " queries took ", time.time() - start, " seconds") print("Accuracy: ", np.mean(accuracy))
def do_cpu_to_gpu(self, index_key): ts = [] ts.append(time.time()) (xt, xb, xq) = self.get_dataset(small_one=True) nb, d = xb.shape index = faiss.index_factory(d, index_key) if index.__class__ == faiss.IndexIVFPQ: # speed up test index.pq.cp.niter = 2 index.do_polysemous_training = False ts.append(time.time()) index.train(xt) ts.append(time.time()) # adding some ids because there was a bug in this case; # those need to be cast to idx_t(= int64_t), because # on windows the numpy int default is int32 ids = (np.arange(nb) * 3 + 12345).astype('int64') index.add_with_ids(xb, ids) ts.append(time.time()) index.nprobe = 4 Dref, Iref = index.search(xq, 10) ts.append(time.time()) res = faiss.StandardGpuResources() gpu_index = faiss.index_cpu_to_gpu(res, 0, index) ts.append(time.time()) # Validate the layout of the memory info mem_info = res.getMemoryInfo() assert type(mem_info) == dict assert type(mem_info[0]['FlatData']) == tuple assert type(mem_info[0]['FlatData'][0]) == int assert type(mem_info[0]['FlatData'][1]) == int gpu_index.setNumProbes(4) Dnew, Inew = gpu_index.search(xq, 10) ts.append(time.time()) print('times:', [t - ts[0] for t in ts]) # Give us some margin of error self.assertGreaterEqual((Iref == Inew).sum(), Iref.size - 50) if faiss.get_num_gpus() == 1: return for shard in False, True: # test on just 2 GPUs res = [faiss.StandardGpuResources() for i in range(2)] co = faiss.GpuMultipleClonerOptions() co.shard = shard gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co) faiss.GpuParameterSpace().set_index_parameter( gpu_index, 'nprobe', 4) Dnew, Inew = gpu_index.search(xq, 10) # 0.99: allow some tolerance in results otherwise test # fails occasionally (not reproducible) self.assertGreaterEqual((Iref == Inew).sum(), Iref.size * 0.99)
def build_faiss(args, log2kstar, cacheroot, coarse_dir, split, N_, D, index_key, is_cached, query_, train=None, base=None): # set global variables name1_to_metric = { "dot_product": faiss.METRIC_INNER_PRODUCT, "squared_l2": faiss.METRIC_L2 } global fmetric fmetric = name1_to_metric[args.metric] global xt if is_cached == False: xt = sanitize(train) global xb if is_cached == False: xb = base global dbname dbname = args.dataset global dim dim = D global gpu_resources global ngpu global usePrecomputed global useFloat16 global query query = sanitize(query_) global N N = N_ usePrecomputed = False useFloat16 = True print("usefloat16? ", useFloat16) replicas = 1 addBatchSize = 32768 ngpu = faiss.get_num_gpus() tempmem = -1 if ngpu == 0 and args.is_gpu == True: assert False, "Cannot detect gpu in this machine" # process index_key preproc_str, ivf_str, pqflat_str = process_index_key(index_key) ncentroid = int(ivf_str[3:]) # check cache files if not os.path.isdir(cacheroot): print("%s does not exist, creating it" % cacheroot) os.makedirs(cacheroot, exist_ok=True) print("cachefiles:") if preproc_str: preproc_cachefile = '%s%s_preproc_%s_%s.vectrans' % ( cacheroot, args.metric, dbname, preproc_str[:-1]) print(preproc_cachefile) else: preproc_str = '' preproc_cachefile = None cent_cachefile = '%s%s_cent_%s_%s%s_%s.npy' % ( coarse_dir, args.metric, dbname, preproc_str, ivf_str, D) index_cachefile = '%s%s_%s_%s_%s_%s%s,%s.index' % ( cacheroot, args.metric, dbname, split, args.num_split, preproc_str, ivf_str, pqflat_str) first_index_cachefile = '%s%s_%s_0_%s_%s%s,%s.index' % ( cacheroot, args.metric, dbname, args.num_split, preproc_str, ivf_str, pqflat_str) if log2kstar == 4 and args.is_gpu == False: if preproc_str: preproc_cachefile = preproc_cachefile + "fs" cent_cachefile = cent_cachefile + "fs.npy" index_cachefile = index_cachefile + "fs" first_index_cachefile = first_index_cachefile + "fs" print(preproc_cachefile) print(cent_cachefile) print(index_cachefile) print(first_index_cachefile) # GPU resources if args.is_gpu: gpu_resources = [] for i in range(ngpu): res = faiss.StandardGpuResources() if tempmem >= 0: res.setTempMemory(tempmem) gpu_resources.append(res) # pre-processing preproc = get_preprocessor(preproc_str, preproc_cachefile) # build index if not index_cachefile or not os.path.exists(index_cachefile): # train index coarse_quantizer = prepare_coarse_quantizer(preproc, cent_cachefile, ncentroid, args.is_gpu) if split == 0: index_trained = prepare_trained_index(preproc, coarse_quantizer, ncentroid, pqflat_str, args.is_gpu) else: index_trained = faiss.read_index(first_index_cachefile) index_trained.ntotal = 0 index_trained.invlists.reset() # centroids = faiss.vector_to_array(index_trained.pq.centroids).reshape(index_trained.pq.M, index_trained.pq.ksub, index_trained.pq.dsub) # print("index_load: ", centroids.shape) # print("index_load: ", centroids) index_all, index_gpu = add_vectors(index_trained, preproc, args.is_gpu, addBatchSize) if index_cachefile: print("store", index_cachefile) faiss.write_index(index_all, index_cachefile) if args.is_gpu: index = index_gpu else: index = index_all else: print("load", index_cachefile) index_load = faiss.read_index(index_cachefile) # move to GPU if args.is_gpu: index = copyToGpu(index_load) del index_load else: index = index_load global ps index.use_precomputed_table = usePrecomputed if args.is_gpu: ps = faiss.GpuParameterSpace() ps.initialize(index) # ps.set_index_parameter(index, 'nprobe', w) else: # faiss.omp_set_num_threads(faiss.omp_get_max_threads()) faiss.omp_set_num_threads(args.batch) # index.nprobe = w return index, preproc