def knn_ground_truth(xq, db_iterator, k, metric_type=faiss.METRIC_L2): """Computes the exact KNN search results for a dataset that possibly does not fit in RAM but for which we have an iterator that returns it block by block. """ LOG.info("knn_ground_truth queries size %s k=%d" % (xq.shape, k)) t0 = time.time() nq, d = xq.shape rh = faiss.ResultHeap(nq, k) index = faiss.IndexFlat(d, metric_type) if faiss.get_num_gpus(): LOG.info('running on %d GPUs' % faiss.get_num_gpus()) index = faiss.index_cpu_to_all_gpus(index) # compute ground-truth by blocks, and add to heaps i0 = 0 for xbi in db_iterator: ni = xbi.shape[0] index.add(xbi) D, I = index.search(xq, k) I += i0 rh.add_result(D, I) index.reset() i0 += ni LOG.info("%d db elements, %.3f s" % (i0, time.time() - t0)) rh.finalize() LOG.info("GT time: %.3f s (%d vectors)" % (time.time() - t0, i0)) return rh.D, rh.I
def search_single_scan(index, xq, k, bs=128): """performs a search so that the inverted lists are accessed sequentially by blocks of size bs""" # handle pretransform if isinstance(index, faiss.IndexPreTransform): xq = index.apply_py(xq) index = faiss.downcast_index(index.index) # coarse assignment nprobe = min(index.nprobe, index.nlist) coarse_dis, assign = index.quantizer.search(xq, nprobe) nlist = index.nlist assign_buckets = assign // bs nq = len(xq) rh = faiss.ResultHeap(nq, k) index.parallel_mode |= index.PARALLEL_MODE_NO_HEAP_INIT for l0 in range(0, nlist, bs): bucket_no = l0 // bs skip_rows, skip_cols = np.where(assign_buckets != bucket_no) sub_assign = assign.copy() sub_assign[skip_rows, skip_cols] = -1 index.search_preassigned(nq, faiss.swig_ptr(xq), k, faiss.swig_ptr(sub_assign), faiss.swig_ptr(coarse_dis), faiss.swig_ptr(rh.D), faiss.swig_ptr(rh.I), False, None) rh.finalize() return rh.D, rh.I
def search(self, x, k: int): rh = faiss.ResultHeap(x.shape[0], k) for Di, Ii in self.pool.imap(lambda idx: idx.search(x, k), self.sub_indexes): rh.add_result(Di, Ii) rh.finalize() return rh.D, rh.I
def run_test(self, keep_max): nq = 100 nb = 1000 restab = faiss.rand((nq, nb), 123) ids = faiss.randint((nq, nb), 1324, 10000) all_rh = {} for nstep in 1, 3: rh = faiss.ResultHeap(nq, 10, keep_max=keep_max) for i in range(nstep): i0, i1 = i * nb // nstep, (i + 1) * nb // nstep D = restab[:, i0:i1].copy() I = ids[:, i0:i1].copy() rh.add_result(D, I) rh.finalize() if keep_max: assert np.all(rh.D[:, :-1] >= rh.D[:, 1:]) else: assert np.all(rh.D[:, :-1] <= rh.D[:, 1:]) all_rh[nstep] = rh np.testing.assert_equal(all_rh[1].D, all_rh[3].D) np.testing.assert_equal(all_rh[1].I, all_rh[3].I)