def test_ranking(model, test_batches): num_batches = len(test_batches) _map, mrr, ndcg_1, ndcg_3, ndcg_5, ndcg_10 = 0, 0, 0, 0, 0, 0 for batch_no in range(1, num_batches + 1): queries, docs, click_labels = helper.batch_to_tensor(test_batches[batch_no - 1], model.dictionary) if model.config.cuda: queries = queries.cuda() docs = docs.cuda() click_labels = click_labels.cuda() score = model(queries, docs) _map += mean_average_precision(score, click_labels) mrr += MRR(score, click_labels) ndcg_1 += NDCG(score, click_labels, 1) ndcg_3 += NDCG(score, click_labels, 3) ndcg_5 += NDCG(score, click_labels, 5) ndcg_10 += NDCG(score, click_labels, 10) _map = _map / num_batches mrr = mrr / num_batches ndcg_1 = ndcg_1 / num_batches ndcg_3 = ndcg_3 / num_batches ndcg_5 = ndcg_5 / num_batches ndcg_10 = ndcg_10 / num_batches print('MAP - ', _map) print('MRR - ', mrr) print('NDCG@1 - ', ndcg_1) print('NDCG@3 - ', ndcg_3) print('NDCG@5 - ', ndcg_5) print('NDCG@10 - ', ndcg_10)
def test_ranking(model, test_batches): num_batches = len(test_batches) map, mrr, ndcg_1, ndcg_3, ndcg_5, ndcg_10 = 0, 0, 0, 0, 0, 0 for batch_no in range(1, num_batches + 1): test_queries, test_docs, test_labels = helper.batch_to_tensor( test_batches[batch_no - 1], model.dictionary, model.config.max_query_length, model.config.max_doc_length) if model.config.cuda: test_queries = test_queries.cuda() test_docs = test_docs.cuda() test_labels = test_labels.cuda() softmax_prob = model(test_queries, test_docs) map += mean_average_precision(softmax_prob, test_labels) mrr += MRR(softmax_prob, test_labels) ndcg_1 += NDCG(softmax_prob, test_labels, 1) ndcg_3 += NDCG(softmax_prob, test_labels, 3) ndcg_5 += NDCG(softmax_prob, test_labels, 5) ndcg_10 += NDCG(softmax_prob, test_labels, 10) map = map / num_batches mrr = mrr / num_batches ndcg_1 = ndcg_1 / num_batches ndcg_3 = ndcg_3 / num_batches ndcg_5 = ndcg_5 / num_batches ndcg_10 = ndcg_10 / num_batches print('MAP - ', map) print('MRR - ', mrr) print('NDCG@1 - ', ndcg_1) print('NDCG@3 - ', ndcg_3) print('NDCG@5 - ', ndcg_5) print('NDCG@10 - ', ndcg_10)
def retrival_metrics_test2train(sims_file, test_labels_file, train_labels_file): sims, labels_test, labels_train = np.load(sims_file), np.load( test_labels_file), np.load(train_labels_file) mAP_input = np.zeros(sims.shape) for i in range(sims.shape[0]): sims_row, curr_label = sims[i], labels_test[i] labels_row = labels_train[np.argsort(sims_row)] for j in range(labels_row.shape[0]): if labels_row[j] == curr_label: mAP_input[i][j] = 1 print(mAP_input) return mean_average_precision(mAP_input.tolist())
def retrival_metrics_all(sims_file, labels_file): sims, labels = np.load(sims_file), np.load(labels_file) #sims = np.array([[0,0.5,1],[0.5,0,2],[1,2,0]]) #labels = np.array([0,1,0]) mAP_input = np.zeros(sims.shape) for i in range(sims.shape[0]): sims_row, curr_label = sims[i], labels[i] labels_row = labels[np.argsort(sims_row)] for j in range(labels_row.shape[0]): if labels_row[j] == curr_label: mAP_input[i][j] = 1 #print(mAP_input) return mean_average_precision(mAP_input.tolist())
def compute_ranking_performance(model, session_queries, session_query_length, rel_docs, rel_docs_length, doc_labels): batch_queries = session_queries.view(-1, session_queries.size(-1)) batch_docs = rel_docs.view(-1, *rel_docs.size()[2:]) projected_queries = model.encode_query(batch_queries, session_query_length) projected_docs = model.encode_document(batch_docs, rel_docs_length) score = model.document_ranker(projected_queries, projected_docs, batch_queries, batch_docs) numpy_score, numpy_labels = score.data.cpu().numpy(), doc_labels.view( -1, doc_labels.size(-1)).data.cpu().numpy() map = mean_average_precision(numpy_score, numpy_labels) mrr = MRR(numpy_score, numpy_labels) NDCG_at_1 = NDCG(numpy_score, numpy_labels, 1) NDCG_at_3 = NDCG(numpy_score, numpy_labels, 3) NDCG_at_5 = NDCG(numpy_score, numpy_labels, 5) NDCG_at_10 = NDCG(numpy_score, numpy_labels, 10) return map, mrr, NDCG_at_1, NDCG_at_3, NDCG_at_5, NDCG_at_10
def test(model, test_dataset, dictionary, sess): batches_idx = helper.get_batches_idx(len(test_dataset), args.batch_size) print('number of test batches = ', len(batches_idx)) num_batches = len(batches_idx) predicts, targets = [], [] map, mrr, ndcg_1, ndcg_3, ndcg_5, ndcg_10 = 0, 0, 0, 0, 0, 0 for batch_no in range(1, num_batches + 1): #1,...,num_batches batch_idx = batches_idx[batch_no - 1] batch_data = [test_dataset.dataset[i] for i in batch_idx] #将一批数据转换为模型输入的格式 (hist_query_input, hist_doc_input, session_num, hist_query_num, hist_query_len, hist_click_num, hist_doc_len, cur_query_input, cur_doc_input, cur_query_num, cur_query_len, cur_click_num, cur_doc_len, query, q_len, doc, d_len, y, next_q, next_q_len, maximum_iterations) = helper.batch_to_tensor(batch_data, args.max_query_len, args.max_doc_len) indices, slots_num = model.get_memory_input(session_num) feed_dict = { model.hist_query_input: hist_query_input, model.hist_doc_input: hist_doc_input, model.session_num: session_num, model.hist_query_num: hist_query_num, model.hist_query_len: hist_query_len, model.hist_click_num: hist_click_num, model.hist_doc_len: hist_doc_len, model.cur_query_input: cur_query_input, model.cur_doc_input: cur_doc_input, model.cur_query_num: cur_query_num, model.cur_query_len: cur_query_len, model.cur_click_num: cur_click_num, model.cur_doc_len: cur_doc_len, model.q: query, model.q_len: q_len, model.d: doc, model.d_len: d_len, model.indices: indices, model.slots_num: slots_num, model.maximum_iterations: maximum_iterations } click_prob_, predicting_ids_, predicting_len_ = sess.run( [model.click_prob, model.predicting_ids, model.predicting_len], feed_dict=feed_dict) map += mean_average_precision(click_prob_, y) mrr += MRR(click_prob_, y) ndcg_1 += NDCG(click_prob_, y, 1) ndcg_3 += NDCG(click_prob_, y, 3) ndcg_5 += NDCG(click_prob_, y, 5) ndcg_10 += NDCG(click_prob_, y, 10) batch_predicting_text = helper.generate_predicting_text( predicting_ids_, predicting_len_, dictionary) batch_target_text, batch_query_text = helper.generate_target_text( batch_data, dictionary, args.max_query_len) predicts += batch_predicting_text targets += batch_target_text map = map / num_batches mrr = mrr / num_batches ndcg_1 = ndcg_1 / num_batches ndcg_3 = ndcg_3 / num_batches ndcg_5 = ndcg_5 / num_batches ndcg_10 = ndcg_10 / num_batches print('MAP - ', map) print('MRR - ', mrr) print('NDCG@1 - ', ndcg_1) print('NDCG@3 - ', ndcg_3) print('NDCG@5 - ', ndcg_5) print('NDCG@10 - ', ndcg_10) print("targets size = ", len(targets)) print("predicts size = ", len(predicts)) multi_bleu.print_multi_bleu(predicts, targets)
# In[36]: import numpy as np import rank_metrics import sys relevanceVector = np.loadtxt(open(sys.argv[1] + "/rv/relevanceVector_" + sys.argv[2]), delimiter=" ") f = open(sys.argv[1] + '/em/evalMetrics_' + sys.argv[2], 'w') for k in range(1, 16): total_precision_k = 0 total_dcg_k = 0 total_ndcg_k = 0 for row in relevanceVector: precision_k = rank_metrics.precision_at_k(row, k) dcg_k = rank_metrics.dcg_at_k(row, k, 0) ndcg_k = rank_metrics.ndcg_at_k(row, k, 0) total_precision_k = total_precision_k + precision_k total_dcg_k = total_dcg_k + dcg_k total_ndcg_k = total_ndcg_k + ndcg_k f.write("precision@" + str(k) + ": " + str(total_precision_k) + "\n") f.write("dcg@" + str(k) + ": " + str(total_dcg_k) + "\n") f.write("ndcg@" + str(k) + ": " + str(total_ndcg_k) + "\n") mrr = rank_metrics.mean_reciprocal_rank(relevanceVector) f.write("Mean Reciprocal Rank: " + str(mrr) + "\n") maP = rank_metrics.mean_average_precision(relevanceVector) f.write("Mean Average Precision: " + str(maP) + "\n") f.close()
def compute_map(label_q, label_d, d): rs = convert_rank_gt(label_q, label_d, d) return mean_average_precision(rs)
def validate(self, dev_dataset): batches_idx = helper.get_batches_idx(len(dev_dataset), self.args.batch_size) print('number of dev batches = ', len(batches_idx)) num_batches = len(batches_idx) predicts, targets = [], [] map, mrr, ndcg_1, ndcg_3, ndcg_5, ndcg_10 = 0, 0, 0, 0, 0, 0 for batch_no in range(1, num_batches + 1): #1,...,num_batches batch_idx = batches_idx[batch_no - 1] batch_data = [dev_dataset.dataset[i] for i in batch_idx] #将一批数据转换为模型输入的格式 (hist_query_input, hist_doc_input, session_num, hist_query_num, hist_query_len, hist_click_num, hist_doc_len, cur_query_input, cur_doc_input, cur_query_num, cur_query_len, cur_click_num, cur_doc_len, query, q_len, doc, d_len, y, next_q, next_q_len, maximum_iterations) = helper.batch_to_tensor( batch_data, self.args.max_query_len, self.args.max_doc_len) indices, slots_num = self.model.get_memory_input(session_num) feed_dict = { self.model.hist_query_input: hist_query_input, self.model.hist_doc_input: hist_doc_input, self.model.session_num: session_num, self.model.hist_query_num: hist_query_num, self.model.hist_query_len: hist_query_len, self.model.hist_click_num: hist_click_num, self.model.hist_doc_len: hist_doc_len, self.model.cur_query_input: cur_query_input, self.model.cur_doc_input: cur_doc_input, self.model.cur_query_num: cur_query_num, self.model.cur_query_len: cur_query_len, self.model.cur_click_num: cur_click_num, self.model.cur_doc_len: cur_doc_len, self.model.q: query, self.model.q_len: q_len, self.model.d: doc, self.model.d_len: d_len, self.model.indices: indices, self.model.slots_num: slots_num, self.model.maximum_iterations: maximum_iterations } click_prob_, predicting_ids_, predicting_len_ = self.sess.run( [ self.model.click_prob, self.model.predicting_ids, self.model.predicting_len ], feed_dict=feed_dict) map += mean_average_precision(click_prob_, y) mrr += MRR(click_prob_, y) ndcg_1 += NDCG(click_prob_, y, 1) ndcg_3 += NDCG(click_prob_, y, 3) ndcg_5 += NDCG(click_prob_, y, 5) ndcg_10 += NDCG(click_prob_, y, 10) batch_predicting_text = helper.generate_predicting_text( predicting_ids_, predicting_len_, self.dictionary) batch_target_text, batch_query_text = helper.generate_target_text( batch_data, self.dictionary, self.args.max_query_len) predicts += batch_predicting_text targets += batch_target_text map = map / num_batches mrr = mrr / num_batches ndcg_1 = ndcg_1 / num_batches ndcg_3 = ndcg_3 / num_batches ndcg_5 = ndcg_5 / num_batches ndcg_10 = ndcg_10 / num_batches score, precisions, brevity_penalty, cand_tot_length, ref_closest_length = multi_bleu.multi_bleu( predicts, targets) metrics_sum = map + mrr + ndcg_1 + ndcg_3 + ndcg_5 + ndcg_10 + ( precisions[0] + precisions[1] + precisions[2] + precisions[3]) * 0.01 print('validation metrics: ') print('MAP = %.4f' % map) print('MRR = %.4f' % mrr) print("NDCG = {:.4f}/{:.4f}/{:.4f}/{:.4f}".format( ndcg_1, ndcg_3, ndcg_5, ndcg_10)) print("BLEU = {:.1f}/{:.1f}/{:.1f}/{:.1f}".format( precisions[0], precisions[1], precisions[2], precisions[3])) return metrics_sum
def compute_ranking_performance(model, session_queries, session_query_length, rel_docs, rel_docs_length, doc_labels): # query encoding embedded_queries = model.embedding( session_queries.view(-1, session_queries.size(-1))) encoded_queries = model.query_encoder( embedded_queries, session_query_length.view(-1).data.cpu().numpy()) encoded_queries = model.apply_pooling(encoded_queries, model.config.pool_type) # encoded_queries: batch_size x session_length x (nhid_query * self.num_directions) encoded_queries = encoded_queries.view(*session_queries.size()[:-1], -1) # document encoding embedded_docs = model.embedding(rel_docs.view(-1, rel_docs.size(-1))) encoded_docs = model.document_encoder( embedded_docs, rel_docs_length.view(-1).data.cpu().numpy()) encoded_docs = model.apply_pooling(encoded_docs, model.config.pool_type) # encoded_docs: batch_size x session_length x num_rel_docs_per_query x (nhid_doc * self.num_directions) encoded_docs = encoded_docs.view(*rel_docs.size()[:-1], -1) # session level encoding sess_q_hidden = model.session_query_encoder.init_weights( encoded_queries.size(0)) sess_q_out = Variable( torch.zeros(session_queries.size(0), 1, model.config.nhid_session)) if model.config.cuda: sess_q_out = sess_q_out.cuda() map, mrr, NDCG_at_1, NDCG_at_3, NDCG_at_5, NDCG_at_10 = 0, 0, 0, 0, 0, 0 # loop over all the queries in a session for idx in range(encoded_queries.size(1)): combined_rep = torch.cat( (encoded_queries[:, idx, :], sess_q_out.squeeze(1)), 1) combined_rep = model.projection(combined_rep) combined_rep = combined_rep.unsqueeze(1).expand( *encoded_docs[:, idx, :, :].size()) click_score = torch.sum( torch.mul(combined_rep, encoded_docs[:, idx, :, :]), 2) numpy_score, numpy_labels = click_score.data.cpu().numpy( ), doc_labels[:, idx, :].data.cpu().numpy() map += mean_average_precision(numpy_score, numpy_labels) mrr += MRR(numpy_score, numpy_labels) NDCG_at_1 += NDCG(numpy_score, numpy_labels, 1) NDCG_at_3 += NDCG(numpy_score, numpy_labels, 3) NDCG_at_5 += NDCG(numpy_score, numpy_labels, 5) NDCG_at_10 += NDCG(numpy_score, numpy_labels, 10) # update session-level query encoder state using query representations sess_q_out, sess_q_hidden = model.session_query_encoder( encoded_queries[:, idx, :].unsqueeze(1), sess_q_hidden) map = map / encoded_queries.size(1) mrr = mrr / encoded_queries.size(1) NDCG_at_1 = NDCG_at_1 / encoded_queries.size(1) NDCG_at_3 = NDCG_at_3 / encoded_queries.size(1) NDCG_at_5 = NDCG_at_5 / encoded_queries.size(1) NDCG_at_10 = NDCG_at_10 / encoded_queries.size(1) return map, mrr, NDCG_at_1, NDCG_at_3, NDCG_at_5, NDCG_at_10