def train_kmeans(x, k, ngpu): "Runs kmeans on one or several GPUs" d = x.shape[1] clus = faiss.Clustering(d, k) clus.verbose = True clus.niter = 20 # otherwise the kmeans implementation sub-samples the training set clus.max_points_per_centroid = 10000000 res = [faiss.StandardGpuResources() for i in range(ngpu)] flat_config = [] for i in range(ngpu): cfg = faiss.GpuIndexFlatConfig() cfg.useFloat16 = False cfg.device = i flat_config.append(cfg) if ngpu == 1: index = faiss.GpuIndexFlatIP(res[-1], d, flat_config[0]) else: indexes = [ faiss.GpuIndexFlatIP(res[i], d, flat_config[i]) for i in range(ngpu) ] index = faiss.IndexProxy() for sub_index in indexes: index.addIndex(sub_index) # perform the training clus.train(x, index) centroids = faiss.vector_float_to_array(clus.centroids) obj = faiss.vector_float_to_array(clus.obj) print("final objective: %.4g" % obj[-1]) return centroids.reshape(k, d)
def get_faiss_nearest_neighbours(emb_src, emb_wrt, k, use_gpu=True, gpu_device=0): """ Gets source points'/embeddings' nearest neighbours with respect to a set of target embeddings. inputs: :param emb_src (np.ndarray) : the source embedding matrix :param emb_wrt (np.ndarray) : the embedding matrix in which nearest neighbours are to be found :param k (int) : the number of nearest neightbours to find :param use_gpu (bool) : true if the gpu is to be used :param gpu_device (int) : the GPU to be used outputs: :returns distance (np.ndarray) : [len(emb_src), k] matrix of distance of each source point to each of its k nearest neighbours :returns indices (np.ndarray) : [len(emb_src), k] matrix of indices of each source point to each of its k nearest neighbours """ if use_gpu: res = faiss.StandardGpuResources() cfg = faiss.GpuIndexFlatConfig() cfg.device = gpu_device index = faiss.GpuIndexFlatIP(res, emb_wrt.shape[1], cfg) else: index = faiss.IndexFlatIP(emb_wrt.shape[1]) print('Building Faiss index') index.add(emb_wrt.cpu().detach().numpy()) print('... Done!') res = index.search(emb_src.cpu().detach().numpy(), k) return torch.tensor(res).cuda()
def _main(a): print('Loading embeddings...') sys.stdout.flush() t = torch.utils.serialization.load_lua(a.embeddings) print('Loading sentences...') sys.stdout.flush() sent = _open(a.sentences, 'rb').readlines() res = faiss.StandardGpuResources() index = faiss.GpuIndexFlatIP(res, t.size()[1]) print('Building index on GPU...') sys.stdout.flush() index.add(x=t.numpy()) print('Searching index...') sys.stdout.flush() distances, indices = index.search(x=t[a.sent_index:a.sent_index + 1].numpy(), k=a.k) print('Searched for: "{}"'.format(sent[a.sent_index].encode('string-escape'))) print('Nearest neighbors (distance \\t index \\t sentence):') for d, i in itertools.izip(distances[0], indices[0]): j = i + a.offset c_sentence = sent[j].encode('string-escape') print('{dist:.2f}\t{index}\t"{sentence}"'.format(dist=d, index=j, sentence=c_sentence)) sys.stdout.flush() print('Done') sys.stdout.flush()
def get_faiss_nearest_neighbours(emb_src, emb_wrt, k, use_gpu=True, gpu_device=0): """ Gets source points'/embeddings' nearest neighbours with respect to a set of target embeddings. inputs: :param emb_src (np.ndarray) : the source embedding matrix :param emb_wrt (np.ndarray) : the embedding matrix in which nearest neighbours are to be found :param k (int) : the number of nearest neightbours to find :param use_gpu (bool) : true if the gpu is to be used :param gpu_device (int) : the GPU to be used outputs: :returns distance (np.ndarray) : [len(emb_src), k] matrix of distance of each source point to each of its k nearest neighbours :returns indices (np.ndarray) : [len(emb_src), k] matrix of indices of each source point to each of its k nearest neighbours """ if use_gpu: res = faiss.StandardGpuResources() cfg = faiss.GpuIndexFlatConfig() cfg.device = int(gpu_device[-1]) index = faiss.GpuIndexFlatIP(res, emb_wrt.shape[1], cfg) else: index = faiss.IndexFlatIP(emb_wrt.shape[1]) index.add(emb_wrt.astype('float32')) return index.search(emb_src.astype('float32'), k)
def test_interop(self): d = 128 nq = 100 nb = 1000 k = 10 xq = faiss.randn(nq * d, 1234).reshape(nq, d) xb = faiss.randn(nb * d, 1235).reshape(nb, d) res = faiss.StandardGpuResources() # Let's run on a non-default stream s = torch.cuda.Stream() # Torch will run on this stream with torch.cuda.stream(s): # query is pytorch tensor (CPU and GPU) xq_torch_cpu = torch.FloatTensor(xq) xq_torch_gpu = xq_torch_cpu.cuda() index = faiss.GpuIndexFlatIP(res, d) index.add(xb) # Query with GPU tensor (this will be done on the current pytorch stream) D2, I2 = search_index_pytorch(res, index, xq_torch_gpu, k) Dref, Iref = index.search(xq, k) assert np.all(Iref == I2.cpu().numpy()) # Query with CPU tensor D3, I3 = search_index_pytorch(res, index, xq_torch_cpu, k) assert np.all(Iref == I3.numpy())
def _retrieve_knn_faiss_gpu_inner_product(query_embeddings, db_embeddings, k, gpu_id=0): """ Retrieve k nearest neighbor based on inner product Args: query_embeddings: numpy array of size [NUM_QUERY_IMAGES x EMBED_SIZE] db_embeddings: numpy array of size [NUM_DB_IMAGES x EMBED_SIZE] k: number of nn results to retrieve excluding query gpu_id: gpu device id to use for nearest neighbor (if possible for `metric` chosen) Returns: dists: numpy array of size [NUM_QUERY_IMAGES x k], distances of k nearest neighbors for each query retrieved_db_indices: numpy array of size [NUM_QUERY_IMAGES x k], indices of k nearest neighbors for each query """ import faiss res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.device = gpu_id # Evaluate with inner product index = faiss.GpuIndexFlatIP(res, db_embeddings.shape[1], flat_config) index.add(db_embeddings) # retrieved k+1 results in case that query images are also in the db dists, retrieved_result_indices = index.search(query_embeddings, k + 1) return dists, retrieved_result_indices
def test_interop(self): d = 16 nq = 5 nb = 20 xq = faiss.randn(nq * d, 1234).reshape(nq, d) xb = faiss.randn(nb * d, 1235).reshape(nb, d) res = faiss.StandardGpuResources() index = faiss.GpuIndexFlatIP(res, d) index.add(xb) # reference CPU result Dref, Iref = index.search(xq, 5) # query is pytorch tensor (CPU) xq_torch = torch.FloatTensor(xq) D2, I2 = search_index_pytorch(index, xq_torch, 5) assert np.all(Iref == I2.numpy()) # query is pytorch tensor (GPU) xq_torch = xq_torch.cuda() # no need for a sync here D3, I3 = search_index_pytorch(index, xq_torch, 5) # D3 and I3 are on torch tensors on GPU as well. # this does a sync, which is useful because faiss and # pytorch use different Cuda streams. res.syncDefaultStreamCurrentDevice() assert np.all(Iref == I3.cpu().numpy())
def generate_nns(embeddings, id2word, k, outfile): if hasattr(faiss, 'StandardGpuResources'): # gpu mode res = faiss.StandardGpuResources() config = faiss.GpuIndexFlatConfig() config.device = 0 # index = faiss.GpuIndexFlatIP(res, emb.shape[1], config) dim = embeddings.shape[1] emb = normalize(embeddings) # nbrs = faiss.IndexFlatL2(dim) nbrs = faiss.GpuIndexFlatIP(res, dim, config) emb_est = np.ascontiguousarray(emb.astype(np.float32)) nbrs.add(emb_est) _, indices = nbrs.search(np.ascontiguousarray(emb.astype(np.float32)), k=k + 1) indices = indices[:, 1:] res = [] assert len(id2word) == len(indices) for i in range(len(id2word)): word_vec = [str(int(x)) for x in indices[i]] res.append(id2word[i] + ' ' + ' '.join(word_vec)) # write into file if os.path.exists(outfile): os.remove(outfile) out_f = open(outfile, 'w') out_f.write('\n'.join(res)) return indices
def get_nn_avg_dist(emb, query, knn): """ Compute the average distance of the `knn` nearest neighbors for a given set of embeddings and queries. Use Faiss if available. """ if FAISS_AVAILABLE: emb = emb.cpu().numpy() query = query.cpu().numpy() if hasattr(faiss, 'StandardGpuResources'): # gpu mode res = faiss.StandardGpuResources() config = faiss.GpuIndexFlatConfig() config.device = 0 index = faiss.GpuIndexFlatIP(res, emb.shape[1], config) else: # cpu mode index = faiss.IndexFlatIP(emb.shape[1]) index.add(emb) distances, _ = index.search(query, knn) return distances.mean(1) else: bs = 1024 all_distances = [] emb = emb.transpose(0, 1).contiguous() for i in range(0, query.shape[0], bs): distances = query[i:i + bs].mm(emb) best_distances, _ = distances.topk(knn, dim=1, largest=True, sorted=True) all_distances.append(best_distances.mean(1).cpu()) all_distances = torch.cat(all_distances) return all_distances.numpy()
def __init__(self, feats, k, index_path='', index_key='', nprobe=128, omp_num_threads=None, rebuild_index=True, verbose=True, **kwargs): import faiss if omp_num_threads is not None: faiss.omp_set_num_threads(omp_num_threads) self.verbose = verbose with Timer('[my faiss gpu] build index', verbose): if index_path != '' and not rebuild_index and os.path.exists( index_path): print('[my faiss gpu] read index from {}'.format(index_path)) index = faiss.read_index(index_path) else: feats = feats.astype('float32') size, dim = feats.shape res = faiss.StandardGpuResources() index = faiss.GpuIndexFlatIP(res, dim) if index_key != '': assert index_key.find( 'HNSW') < 0, 'HNSW returns distances insted of sims' metric = faiss.METRIC_INNER_PRODUCT nlist = min(4096, 8 * round(math.sqrt(size))) if index_key == 'IVF': quantizer = index index = faiss.IndexIVFFlat(quantizer, dim, nlist, metric) else: index = faiss.index_factory(dim, index_key, metric) if index_key.find('Flat') < 0: assert not index.is_trained index.train(feats) index.nprobe = min(nprobe, nlist) assert index.is_trained print('nlist: {}, nprobe: {}'.format(nlist, nprobe)) index.add(feats) if index_path != '': print('[my faiss gpu] save index to {}'.format(index_path)) mkdir_if_no_exists(index_path) index_cpu = faiss.index_gpu_to_cpu(index) faiss.write_index(index_cpu, index_path) with Timer('[my faiss gpu] query topk {}'.format(k), verbose): knn_ofn = index_path + '.npz' if os.path.exists(knn_ofn): print('[my faiss gpu] read knns from {}'.format(knn_ofn)) self.knns = np.load(knn_ofn)['data'] else: sims, nbrs = index.search(feats, k=k) self.knns = [(np.array(nbr, dtype=np.int32), 1 - np.array(sim, dtype=np.float32)) for nbr, sim in zip(nbrs, sims)]
def get_index(vector_size, gpu=True): if gpu and hasattr(faiss, 'StandardGpuResources'): res = faiss.StandardGpuResources() config = faiss.GpuIndexFlatConfig() config.device = 0 index = faiss.GpuIndexFlatIP(res, vector_size, config) return index, res, config # objects other than index are needed to prevent error due to GC else: index = faiss.IndexFlatIP(vector_size) return index, None, None
def _build_index(self, xb): d = xb.size(-1) self.res = faiss.StandardGpuResources() index = faiss.GpuIndexFlatIP(self.res, d) # brute-force # index = faiss.IndexFlatL2(d) index.add(xb.detach().cpu().numpy()) # index.add(xb) return index
def _init_faiss(self, ngpu, feat_len): self.flat_config = [] for i in range(ngpu): self.cfg = faiss.GpuIndexFlatConfig() self.cfg.useFloat16 = False self.cfg.device = i self.flat_config.append(self.cfg) self.res = [faiss.StandardGpuResources() for i in range(ngpu)] self.indexes = [faiss.GpuIndexFlatIP(self.res[i], feat_len, self.flat_config[i]) for i in range(ngpu)] self.index = faiss.IndexProxy() for sub_index in self.indexes: self.index.addIndex(sub_index)
def cluster_dbscan_gpu(X, eps, min_samples): d = X.shape[-1] res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.device = 0 print("Building faiss index, d=", d) index = faiss.GpuIndexFlatIP(res, d, flat_config) index.add(X) print("Running dbscan.") gd = GpuDbscan(X, gpu_index=index) cluster_ids, core_point_flag, visited_flag = gd.gpu_dbscan(1, 10) return cluster_ids
def exact_ann_index(trained_model): dim = trained_model.hparams.embedding_dim item_vectors = np.array(trained_model.item_embeddings.weight.data) res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.device = 0 exact_index = faiss.GpuIndexFlatIP(res, dim, flat_config) exact_index.add(item_vectors) return exact_index
def _get_knn_indices(k, xb, xq): xb = xb.cpu().numpy() xq = xq.cpu().numpy() d = xq.shape[1] if hasattr(faiss, 'StandardGpuResources'): res = faiss.StandardGpuResources() config = faiss.GpuIndexFlatConfig() config.device = 0 index = faiss.GpuIndexFlatIP(res, d, config) else: index = faiss.IndexFlatIP(d) index.add(xb) distances, knn_indices = index.search(xq, k) return distances, knn_indices
def test_stress(self): # a mixture of the above, from issue #631 target = np.random.rand(50, 16).astype('float32') index = faiss.IndexReplicas() size, dim = target.shape num_gpu = 4 for _i in range(num_gpu): config = faiss.GpuIndexFlatConfig() config.device = 0 # simulate on a single GPU sub_index = faiss.GpuIndexFlatIP(faiss.StandardGpuResources(), dim, config) index.addIndex(sub_index) index = faiss.IndexIDMap(index) ids = np.arange(size) index.add_with_ids(target, ids)
def knn_faiss(X, Q, k, gpu_id=0): D = X.shape[1] # CPU search if gpu_id = -1. GPU search otherwise. if gpu_id == -1: index = faiss.IndexFlatIP(D) else: res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.device = gpu_id index = faiss.GpuIndexFlatIP(res, D, flat_config) index.add(X) sim, knn = index.search(Q, min(k, X.shape[0])) index.reset() del index return knn, sim
def _faiss_knn(X,k, mode='mut', inner_prod = False): # kNN search for the graph X = np.ascontiguousarray(X) print("Number of GPUS detected by FAISS: {}".format(faiss.get_num_gpus() )) d = X.shape[1] res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.useFloat16 = False flat_config.device = 0 c = time.time() if inner_prod: faiss.normalize_L2(X) index = faiss.GpuIndexFlatIP(res,d,flat_config) else: index = faiss.GpuIndexFlatL2(res,d,flat_config) # build the index #normalize_L2(X) elapsed = time.time() - c LOG.info(f'kNN Index built in {elapsed:.3f} seconds',LOG.ll.UTILS) index.add(X) N = X.shape[0] Nidx = index.ntotal c = time.time() D, I = index.search(X, k + 1) elapsed = time.time() - c LOG.info(f'kNN Search done in {elapsed:.3f} seconds',LOG.ll.UTILS) # Create the graph D = np.sqrt(D[:,1:]) I = I[:,1:] row_idx = np.arange(N) row_idx_rep = np.tile(row_idx,(k,1)).T W = scipy.sparse.csr_matrix((D.flatten('F'), (row_idx_rep.flatten('F'), I.flatten('F'))), shape=(N, N)) W = __symmetrize_KNN(W,mode=mode) return W
def get_nearest_neighbors(queries, index_set, k): dimension = queries.shape[1] queries = queries.cpu().numpy().astype('float32') index_set = index_set.cpu().numpy().astype('float32') resource = faiss.StandardGpuResources() queries_number = queries.shape[0] index_to_test = np.random.randint(low=0, high=queries_number) print('queries[index_to_test] ', queries[index_to_test]) assert np.abs(np.linalg.norm(queries[index_to_test]) - 1.0) <= 10e-1, \ 'Cosine similarity nearest neighbors search should work with normalized data! ' \ 'But np.linalg.norm(queries[%d] = %.10f' % (index_to_test, np.linalg.norm(queries[index_to_test])) index = faiss.GpuIndexFlatIP(resource, dimension) index.add(index_set) s, i = index.search(queries, k) return s, i
def _init(self): import faiss self.dim = self.source_data.shape[-1] if self.use_gpu: self.gpu_res = faiss.StandardGpuResources() if self.metric == 'IP': self.index = faiss.GpuIndexFlatIP(self.gpu_res, self.dim) elif self.metric == 'L2': self.index = faiss.GpuIndexFlatL2(self.gpu_res, self.dim) else: if self.metric == 'IP': self.index = faiss.IndexFlatIP(self.dim) elif self.metric == 'L2': self.index = faiss.IndexFlatL2(self.dim) self.index.add(self.source_data)
def _build_index(embed_paths, use_gpu=True): # this is a good place to decide if we want to build index on cpu or on gpu gpu_res = faiss.StandardGpuResources() # this one should be retreived from the embeddings file, we also should check if all embeddings # have the same size embed_size = 1024 index = faiss.GpuIndexFlatIP(gpu_res, embed_size) for path in embed_paths: _logger.info('loading embeddings from %s', path) t = torch.utils.serialization.load_lua(path) _logger.info('adding embeddings to the index') index.add(t.numpy()) # del t ? return index
def get_nn_avg_dist(src, tgt, knn): """ Compute the average distance of the `knn` nearest neighbors for a given set of embeddings and queries. Use Faiss if available. """ #print(FAISS_AVAILABLE) #if FAISS_AVAILABLE: if hasattr(faiss, 'StandardGpuResources'): # gpu mode res = faiss.StandardGpuResources() config = faiss.GpuIndexFlatConfig() config.device = 0 index = faiss.GpuIndexFlatIP(res, tgt.shape[1], config) logger.info("faiss gpu mode!") else: # cpu mode index = faiss.IndexFlatIP(tgt.shape[1]) index.add(src) distances, _ = index.search(src, knn) return distances.mean(1) """
def main(): dirname = os.path.dirname(__file__) output_dir = os.path.join(dirname, 'features') train_ims = load_h5('train_ims', os.path.join(output_dir, 'trainIms.h5')) train_classes = load_h5('train_classes', os.path.join(output_dir, 'trainClasses.h5')) train_feats = load_h5('train_feats', os.path.join(output_dir, 'trainFeats.h5')) res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.device = 3 # specify which GPU to use gpu_index = faiss.GpuIndexFlatIP(res, train_feats.shape[1], flat_config) for feat in train_feats: gpu_index.add(np.expand_dims(feat, 0)) csv_dir = os.path.join(dirname, 'csv_output') if not os.path.exists(csv_dir): os.makedirs(csv_dir) occlusion_levels = [ 'unoccluded', 'low_occlusions', 'medium_occlusions', 'high_occlusions' ] for occlusion in occlusion_levels: with open(os.path.join(csv_dir, occlusion + '.csv'), 'wb') as csv_file: test_output_dir = os.path.join(output_dir, occlusion) test_ims = load_h5('test_ims', os.path.join(test_output_dir, 'testIms.h5')) test_feats = load_h5('test_feats', os.path.join(test_output_dir, 'testFeats.h5')) for imId, ft in zip(test_ims, test_feats): result_dists, result_inds = gpu_index.search( np.expand_dims(ft, 0).astype('float32'), 100) result_im_inds = train_ims[result_inds[0]] csv_line = str(imId) + ',' + ','.join( [str(r) for r in result_im_inds]) + '\n' csv_file.writelines(csv_line)
def get_knn(inst_embeddings, label_embeddings, accelerator, top_k=100, bsz=65536): accelerator.print("FAISS") # logging.info("FAISS indexer building") res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.useFloat16 = False flat_config.device = accelerator.local_process_index indexer = faiss.GpuIndexFlatIP(res, inst_embeddings.shape[1], flat_config) indexer.add(label_embeddings) # logging.info("FAISS indexer searching") num_inst = inst_embeddings.shape[0] nr_batch = int(math.ceil(num_inst / bsz)) D_list, I_list = [], [] accelerator.print("index") for bidx in tqdm(range(nr_batch)): sidx = bidx * bsz eidx = min((bidx + 1) * bsz, num_inst) D, I = indexer.search(inst_embeddings[sidx:eidx], top_k) D_list.append(D) I_list.append(I) D = np.concatenate(D_list) I = np.concatenate(I_list) return D, I
def _faiss_knn(X, k, symm=True, inner_prod=False): # kNN search for the graph X = np.ascontiguousarray(X) d = X.shape[1] res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.device = 0 if inner_prod: faiss.normalize_L2(X) index = faiss.GpuIndexFlatIP(res, d, flat_config) else: index = faiss.GpuIndexFlatL2(res, d, flat_config) # build the index #normalize_L2(X) index.add(X) N = X.shape[0] Nidx = index.ntotal c = time.time() D, I = index.search(X, k + 1) elapsed = time.time() - c LOG.info(('kNN Search done in %d seconds'.format(elapsed)), LOG.ll.UTILS) # Create the graph D = np.sqrt(D[:, 1:]) I = I[:, 1:] row_idx = np.arange(N) row_idx_rep = np.tile(row_idx, (k, 1)).T W = scipy.sparse.csr_matrix( (D.flatten('F'), (row_idx_rep.flatten('F'), I.flatten('F'))), shape=(N, N)) if symm: W = W.minimum(W.T) return W
def search_and_on(idx,base,query,now_list): base = base.astype(np.float32) query = query.astype(np.float32) # we need only a StandardGpuResources per GPU res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.device = 0 index = faiss.GpuIndexFlatIP(res, d, flat_config) for nnn in range(1): index.add(base) D, I = index.search(query, k) I = I.tolist()[0] #to map to ori idx,similar is the indx for similar images map to original idx,left_base #is the next base for search,left_list is the next list for chose query similar = [] for i in range(len(I)): idx_this = now_list[I[i]] similar.append(idx_this) similar.append(idx) left_list = [x for x in now_list if x not in similar] left_base = np.delete(base, I, axis=0) return similar, left_base, left_list
def __init__(self, feats, k, index_path='', knn_method='faiss-cpu', verbose=True): import faiss with Timer('[{}] build index {}'.format(knn_method, k), verbose): knn_ofn = index_path + '.npz' if os.path.exists(knn_ofn): print('[{}] read knns from {}'.format(knn_method, knn_ofn)) self.knns = np.load(knn_ofn)['data'] else: feats = feats.astype('float32') size, dim = feats.shape if knn_method == 'faiss-gpu': import math i = math.ceil(size / 1000000) if i > 1: i = (i - 1) * 4 res = faiss.StandardGpuResources() res.setTempMemory(i * 1024 * 1024 * 1024) index = faiss.GpuIndexFlatIP(res, dim) else: index = faiss.IndexFlatIP(dim) index.add(feats) with Timer('[{}] query topk {}'.format(knn_method, k), verbose): knn_ofn = index_path + '.npz' if os.path.exists(knn_ofn): pass else: sims, nbrs = index.search(feats, k=k) # torch.cuda.empty_cache() self.knns = [(np.array(nbr, dtype=np.int32), 1 - np.array(sim, dtype=np.float32)) for nbr, sim in zip(nbrs, sims)]
h5_feats.create_dataset(data_description, data=data, dtype=data_type) h5_feats.close() def load_h5(data_description,path): with h5py.File(path, 'r') as hf: data = hf[data_description][:] return data res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.device = whichGPU train_feats = load_h5('train_feats',os.path.join(output_dir,'trainFeats.h5')) train_classes = load_h5('train_classes',os.path.join(output_dir,'trainClasses.h5')) train_ims = load_h5('train_ims',os.path.join(output_dir,'trainIms.h5')) gpu_index = faiss.GpuIndexFlatIP(res, train_feats.shape[1],flat_config) for feat in train_feats: gpu_index.add(np.expand_dims(feat,0)) test_datasets = ['./input/test_by_hotel.txt','./input/occluded_test/by_hotel/0.txt','./input/occluded_test/by_hotel/1.txt','./input/occluded_test/by_hotel/2.txt','./input/occluded_test/by_hotel/3.txt'] test_names = ['by_hotel','occluded0','occluded1','occluded2','occluded3'] for test_dataset, test_name in zip(test_datasets,test_names): test_output_dir = os.path.join(output_dir,test_name) if not os.path.exists(os.path.join(test_output_dir,'top_k.h5')): test_feats = load_h5('test_feats',os.path.join(test_output_dir,'testFeats.h5')) test_ims = load_h5('test_ims',os.path.join(test_output_dir,'testIms.h5')) test_classes = load_h5('test_classes',os.path.join(test_output_dir,'testClasses.h5')) top_ims = np.zeros((test_feats.shape[0],100)) for ind,feat,cls in zip(range(test_feats.shape[0]),test_feats,test_classes): print ind, ' out of ', test_feats.shape[0] result_dists, result_inds = gpu_index.search(np.expand_dims(feat,0).astype('float32'),100)
def main(args): paddle.seed(12345) # load config config = load_yaml(args.config_yaml) config["config_abs_dir"] = args.abs_dir # load static model class dy_model_class = load_dy_model_class(config) use_gpu = config.get("runner.use_gpu", True) test_data_dir = config.get("runner.test_data_dir", None) print_interval = config.get("runner.print_interval", None) model_load_path = config.get("runner.infer_load_path", "model_output") start_epoch = config.get("runner.infer_start_epoch", 0) end_epoch = config.get("runner.infer_end_epoch", 10) batch_size = config.get("runner.infer_batch_size", None) os.environ["CPU_NUM"] = str(config.get("runner.thread_num", 1)) logger.info("**************common.configs**********") logger.info( "use_gpu: {}, test_data_dir: {}, start_epoch: {}, end_epoch: {}, print_interval: {}, model_load_path: {}". format(use_gpu, test_data_dir, start_epoch, end_epoch, print_interval, model_load_path)) logger.info("**************common.configs**********") place = paddle.set_device('gpu' if use_gpu else 'cpu') dy_model = dy_model_class.create_model(config) test_dataloader = create_data_loader( config=config, place=place, mode="test") logger.info("read data") epoch_begin = time.time() interval_begin = time.time() for epoch_id in range(start_epoch, end_epoch): logger.info("load model epoch {}".format(epoch_id)) model_path = os.path.join(model_load_path, str(epoch_id)) load_model(model_path, dy_model) b = dy_model.item_emb.weight.numpy() import faiss if use_gpu: res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.device = 0 faiss_index = faiss.GpuIndexFlatIP(res, b.shape[-1], flat_config) faiss_index.add(b) else: faiss_index = faiss.IndexFlatIP(b.shape[-1]) faiss_index.add(b) total = 1 total_recall = 0.0 total_ndcg = 0.0 total_hitrate = 0 for batch_id, batch_data in enumerate(test_dataloader()): user_embs, _ = dy_model_class.infer_forward(dy_model, None, batch_data, config) user_embs = user_embs.numpy() target_items = np.squeeze(batch_data[-1].numpy(), axis=1) if len(user_embs.shape) == 2: D, I = faiss_index.search(user_embs, args.top_n) for i, iid_list in enumerate(target_items): recall = 0 dcg = 0.0 item_list = set(I[i]) iid_list = list(filter(lambda x: x != 0, list(iid_list))) for no, iid in enumerate(iid_list): if iid in item_list: recall += 1 dcg += 1.0 / math.log(no + 2, 2) idcg = 0.0 for no in range(recall): idcg += 1.0 / math.log(no + 2, 2) total_recall += recall * 1.0 / len(iid_list) if recall > 0: total_ndcg += dcg / idcg total_hitrate += 1 else: ni = user_embs.shape[1] user_embs = np.reshape(user_embs, [-1, user_embs.shape[-1]]) D, I = faiss_index.search(user_embs, args.top_n) for i, iid_list in enumerate(target_items): recall = 0 dcg = 0.0 item_list_set = set() item_list = list( zip( np.reshape(I[i * ni:(i + 1) * ni], -1), np.reshape(D[i * ni:(i + 1) * ni], -1))) item_list.sort(key=lambda x: x[1], reverse=True) for j in range(len(item_list)): if item_list[j][0] not in item_list_set and item_list[ j][0] != 0: item_list_set.add(item_list[j][0]) if len(item_list_set) >= args.top_n: break iid_list = list(filter(lambda x: x != 0, list(iid_list))) for no, iid in enumerate(iid_list): if iid == 0: break if iid in item_list_set: recall += 1 dcg += 1.0 / math.log(no + 2, 2) idcg = 0.0 for no in range(recall): idcg += 1.0 / math.log(no + 2, 2) total_recall += recall * 1.0 / len(iid_list) if recall > 0: total_ndcg += dcg / idcg total_hitrate += 1 total += target_items.shape[0] if batch_id % print_interval == 0: recall = total_recall / total ndcg = total_ndcg / total hitrate = total_hitrate * 1.0 / total metric_str = "" metric_str += "recall@%d: %.5f, " % (args.top_n, recall) metric_str += "ndcg@%d: %.5f, " % (args.top_n, ndcg) metric_str += "hitrate@%d: %.5f, " % (args.top_n, hitrate) logger.info("epoch: {}, batch_id: {}, ".format( epoch_id, batch_id) + metric_str + "speed: {:.2f} ins/s". format(print_interval * batch_size / (time.time( ) - interval_begin))) recall = total_recall / total ndcg = total_ndcg / total hitrate = total_hitrate * 1.0 / total metric_str = "" metric_str += "recall@%d: %.5f, " % (args.top_n, recall) metric_str += "ndcg@%d: %.5f, " % (args.top_n, ndcg) metric_str += "hitrate@%d: %.5f, " % (args.top_n, hitrate) logger.info("epoch: {} done, ".format(epoch_id) + metric_str + "epoch time: {:.2f} s".format(time.time() - epoch_begin))