def faiss_search(index, preproc, args, reorder, w, csize): # search environment # index.use_precomputed_table = usePrecomputed # if args.is_gpu: # ps = faiss.GpuParameterSpace() # ps.initialize(index) # ps.set_index_parameter(index, 'nprobe', w) # else: # faiss.omp_set_num_threads(faiss.omp_get_max_threads()) # index.nprobe = w if args.is_gpu: # ps = faiss.GpuParameterSpace() # ps.initialize(index) ps.set_index_parameter(index, 'nprobe', w) else: # faiss.omp_set_num_threads(faiss.omp_get_max_threads()) index.nprobe = w # reorder if reorder != -1 and not args.is_gpu: index_refine = faiss.IndexRefineFlat(index, faiss.swig_ptr(xb)) index_refine.k_factor = reorder / args.topk index_ready = index_refine else: index_ready = index # search print("Batch size: ", args.batch) nq = query.shape[0] I = np.empty((nq, args.topk), dtype='int32') D = np.empty((nq, args.topk), dtype='float32') total_latency = 0.0 if args.is_gpu == True: print("Sending ", args.batch, " queries at once") for i0, xs in dataset_iterator(query, preproc, args.batch): i1 = i0 + xs.shape[0] start = time.time() Di, Ii = index_ready.search(xs, args.topk, None, None, w, csize) total_latency += 1000 * (time.time() - start) I[i0:i1] = Ii D[i0:i1] = Di else: cstep = nq / csize print("cstep =", str(int(cstep)), ", csize =", csize) for step in range(int(cstep)): input_queries = query[step * args.csize:(step + 1) * args.csize, :] if step == 0: print("Sending ", input_queries.shape[0], " queries at once") i0 = step * input_queries.shape[0] i1 = i0 + input_queries.shape[0] start = time.time() Di, Ii = index_ready.search(input_queries, args.topk, None, None, w, csize) total_latency += 1000 * (time.time() - start) I[i0:i1] = Ii D[i0:i1] = Di return I, D, total_latency
def test_downcast_Refine(self): index = faiss.IndexRefineFlat( faiss.IndexScalarQuantizer(10, faiss.ScalarQuantizer.QT_8bit)) # serialize and deserialize index2 = faiss.deserialize_index(faiss.serialize_index(index)) assert isinstance(index2, faiss.IndexRefineFlat)
def test_IndexPQ_refined(self): q = faiss.IndexPQ(d, M, nbits_per_index) res = ev.launch('PQ non-refined', q) e = ev.evalres(res) q.reset() rq = faiss.IndexRefineFlat(q) res = ev.launch('PQ refined', rq) e2 = ev.evalres(res) assert e2[10] >= e[10] rq.k_factor = 4 res = ev.launch('PQ refined*4', rq) e3 = ev.evalres(res) assert e3[10] >= e2[10]
def fit(self, X): if X.dtype != numpy.float32: X = X.astype(numpy.float32) if self._metric == 'angular': faiss.normalize_L2(X) d = X.shape[1] faiss_metric = faiss.METRIC_INNER_PRODUCT if self._metric == 'angular' else faiss.METRIC_L2 factory_string = f"IVF{self._n_list},PQ{d//2}x4fs" index = faiss.index_factory(d, factory_string, faiss_metric) index.train(X) index.add(X) index_refine = faiss.IndexRefineFlat(index, faiss.swig_ptr(X)) self.base_index = index self.refine_index = index_refine
def fit(self, X): if self._metric == 'angular': X = sklearn.preprocessing.normalize(X, axis=1, norm='l2') if X.dtype != numpy.float32: X = X.astype(numpy.float32) M = X.shape[1] // 2 index_build_str = f"IVF{self._n_list},PQ{M}x4fs,RFlat" print(f"index_build_str={index_build_str}") index = faiss.index_factory(X.shape[1], index_build_str, faiss.METRIC_INNER_PRODUCT) index.train(X[:250000]) index.add(X) index_refine = faiss.IndexRefineFlat(index, faiss.swig_ptr(X)) self.index = index self.index_refine = index_refine
def faiss_eval_search(index, xq, xb, nprobe_tab, pre_reorder_k_tab, k, gt, nrun, measure): import faiss print("use precomputed table=", index.use_precomputed_table, "by residual=", index.by_residual) print("adding a refine index") index_refine = faiss.IndexRefineFlat(index, faiss.swig_ptr(xb)) print("set single thread") faiss.omp_set_num_threads(1) print("warmup") for _run in range(5): index.search(xq, k) print("run timing") for nprobe in nprobe_tab: for pre_reorder_k in pre_reorder_k_tab: index.nprobe = nprobe times = [] for _run in range(nrun): if pre_reorder_k == 0: t0 = time.time() D, I = index.search(xq, k) t1 = time.time() else: index_refine.k_factor = pre_reorder_k / k t0 = time.time() D, I = index_refine.search(xq, k) t1 = time.time() times.append(t1 - t0) header = "Faiss nprobe=%4d reo=%4d" % (nprobe, pre_reorder_k) if measure == "1-recall": eval_recalls(header, I, gt, times) else: eval_inters(header, I, gt, times)