def compute_GT(): print "compute GT" t0 = time.time() gt_I = np.zeros((nq_gt, gt_sl), dtype='int64') gt_D = np.zeros((nq_gt, gt_sl), dtype='float32') heaps = faiss.float_maxheap_array_t() heaps.k = gt_sl heaps.nh = nq_gt heaps.val = faiss.swig_ptr(gt_D) heaps.ids = faiss.swig_ptr(gt_I) heaps.heapify() bs = 10**5 n, d = xb.shape xqs = sanitize(xq[:nq_gt]) db_gt = faiss.IndexFlatL2(d) vres, vdev = make_vres_vdev() db_gt_gpu = faiss.index_cpu_to_gpu_multiple(vres, vdev, db_gt) # compute ground-truth by blocks of bs, and add to heaps for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs): db_gt_gpu.add(xsl) D, I = db_gt_gpu.search(xqs, gt_sl) I += i0 heaps.addn_with_ids(gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl) db_gt_gpu.reset() print "\r %d/%d, %.3f s" % (i0, n, time.time() - t0), print heaps.reorder() print "GT time: %.3f s" % (time.time() - t0) return gt_I
def search_knn(xq, xb, k, distance_type=faiss.METRIC_L2): """ wrapper around the faiss knn functions without index """ nq, d = xq.shape nb, d2 = xb.shape assert d == d2 I = np.empty((nq, k), dtype='int64') D = np.empty((nq, k), dtype='float32') if distance_type == faiss.METRIC_L2: heaps = faiss.float_maxheap_array_t() heaps.k = k heaps.nh = nq heaps.val = faiss.swig_ptr(D) heaps.ids = faiss.swig_ptr(I) faiss.knn_L2sqr(faiss.swig_ptr(xq), faiss.swig_ptr(xb), d, nq, nb, heaps) elif distance_type == faiss.METRIC_INNER_PRODUCT: heaps = faiss.float_minheap_array_t() heaps.k = k heaps.nh = nq heaps.val = faiss.swig_ptr(D) heaps.ids = faiss.swig_ptr(I) faiss.knn_inner_product(faiss.swig_ptr(xq), faiss.swig_ptr(xb), d, nq, nb, heaps) return D, I
def compute_GT_CPU(xb, xq, gt_sl): nq_gt, _ = xq.shape print("compute GT CPU") t0 = time.time() gt_I = np.zeros((nq_gt, gt_sl), dtype='int64') gt_D = np.zeros((nq_gt, gt_sl), dtype='float32') heaps = faiss.float_maxheap_array_t() heaps.k = gt_sl heaps.nh = nq_gt heaps.val = faiss.swig_ptr(gt_D) heaps.ids = faiss.swig_ptr(gt_I) heaps.heapify() bs = 10 ** 5 n, d = xb.shape xqs = sanitize(xq[:nq_gt]) db_gt = faiss.IndexFlatL2(d) # compute ground-truth by blocks of bs, and add to heaps for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs): db_gt.add(xsl) D, I = db_gt.search(xqs, gt_sl) I += i0 heaps.addn_with_ids( gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl) db_gt.reset() heaps.reorder() print("GT CPU time: {} s".format(time.time() - t0)) return gt_I, gt_D
def __init__(self, nq, k): " nq: number of query vectors, k: number of results per query " self.I = np.zeros((nq, k), dtype='int64') self.D = np.zeros((nq, k), dtype='float32') self.nq, self.k = nq, k heaps = faiss.float_maxheap_array_t() heaps.k = k heaps.nh = nq heaps.val = faiss.swig_ptr(self.D) heaps.ids = faiss.swig_ptr(self.I) heaps.heapify() self.heaps = heaps
def compute_GT_GPU(xb, xq, gt_sl): nq_gt, _ = xq.shape print("compute GT GPU") t0 = time.time() gt_I = np.zeros((nq_gt, gt_sl), dtype='int64') gt_D = np.zeros((nq_gt, gt_sl), dtype='float32') heaps = faiss.float_maxheap_array_t() heaps.k = gt_sl heaps.nh = nq_gt heaps.val = faiss.swig_ptr(gt_D) heaps.ids = faiss.swig_ptr(gt_I) heaps.heapify() bs = 10 ** 5 # Please change this based on your GPU memory size. tempmem = 3500*1024*1024 n, d = xb.shape xqs = sanitize(xq[:nq_gt]) ngpu = faiss.get_num_gpus() gpu_resources = [] for i in range(ngpu): res = faiss.StandardGpuResources() res.setTempMemory(tempmem) gpu_resources.append(res) vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() for i in range(0, ngpu): vdev.push_back(i) vres.push_back(gpu_resources[i]) db_gt = faiss.IndexFlatL2(d) db_gt_gpu = faiss.index_cpu_to_gpu_multiple( vres, vdev, db_gt) # compute ground-truth by blocks of bs, and add to heaps for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs): db_gt_gpu.add(xsl) D, I = db_gt_gpu.search(xqs, gt_sl) I += i0 heaps.addn_with_ids( gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl) db_gt_gpu.reset() heaps.reorder() print("GT GPU time: {} s".format(time.time() - t0)) return gt_I, gt_D
def compute_GT(): print "compute GT" t0 = time.time() gt_I = np.zeros((nq_gt, gt_sl), dtype='int64') gt_D = np.zeros((nq_gt, gt_sl), dtype='float32') heaps = faiss.float_maxheap_array_t() heaps.k = gt_sl heaps.nh = nq_gt heaps.val = faiss.swig_ptr(gt_D) heaps.ids = faiss.swig_ptr(gt_I) heaps.heapify() bs = 10 ** 5 n, d = xb.shape xqs = sanitize(xq[:nq_gt]) db_gt = faiss.IndexFlatL2(d) vres, vdev = make_vres_vdev() db_gt_gpu = faiss.index_cpu_to_gpu_multiple( vres, vdev, db_gt) # compute ground-truth by blocks of bs, and add to heaps for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs): db_gt_gpu.add(xsl) D, I = db_gt_gpu.search(xqs, gt_sl) I += i0 heaps.addn_with_ids( gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl) db_gt_gpu.reset() print "\r %d/%d, %.3f s" % (i0, n, time.time() - t0), print heaps.reorder() print "GT time: %.3f s" % (time.time() - t0) return gt_I
def _knn_search(queries, data, k, return_neighbours=False, res=None): num_queries, dim = queries.shape if res is None: dists, idxs = np.empty((num_queries, k), dtype=np.float32), np.empty( (num_queries, k), dtype=np.int64) heaps = faiss.float_maxheap_array_t() heaps.k, heaps.nh = k, num_queries heaps.val, heaps.ids = faiss.swig_ptr(dists), faiss.swig_ptr(idxs) faiss.knn_L2sqr(faiss.swig_ptr(queries), faiss.swig_ptr(data), dim, num_queries, data.shape[0], heaps) else: dists, idxs = torch.empty(num_queries, k, dtype=torch.float32, device=queries.device), torch.empty( num_queries, k, dtype=torch.int64, device=queries.device) faiss.bruteForceKnn( res, faiss.METRIC_L2, faiss.cast_integer_to_float_ptr(data.storage().data_ptr() + data.storage_offset() * 4), data.is_contiguous(), data.shape[0], faiss.cast_integer_to_float_ptr(queries.storage().data_ptr() + queries.storage_offset() * 4), queries.is_contiguous(), num_queries, dim, k, faiss.cast_integer_to_float_ptr(dists.storage().data_ptr() + dists.storage_offset() * 4), faiss.cast_integer_to_long_ptr(idxs.storage().data_ptr() + idxs.storage_offset() * 8)) if return_neighbours: neighbours = data[idxs.reshape(-1)].reshape(-1, k, dim) return dists, idxs, neighbours else: return dists, idxs
def test_doxygen_comments(self): maxheap_array = faiss.float_maxheap_array_t() self.assertTrue("a template structure for a set of [min|max]-heaps" in maxheap_array.__doc__)