def test_ranking(model, test_batches):
    num_batches = len(test_batches)
    _map, mrr, ndcg_1, ndcg_3, ndcg_5, ndcg_10 = 0, 0, 0, 0, 0, 0
    for batch_no in range(1, num_batches + 1):
        queries, docs, click_labels = helper.batch_to_tensor(test_batches[batch_no - 1], model.dictionary)
        if model.config.cuda:
            queries = queries.cuda()
            docs = docs.cuda()
            click_labels = click_labels.cuda()

        score = model(queries, docs)
        _map += mean_average_precision(score, click_labels)
        mrr += MRR(score, click_labels)
        ndcg_1 += NDCG(score, click_labels, 1)
        ndcg_3 += NDCG(score, click_labels, 3)
        ndcg_5 += NDCG(score, click_labels, 5)
        ndcg_10 += NDCG(score, click_labels, 10)

    _map = _map / num_batches
    mrr = mrr / num_batches
    ndcg_1 = ndcg_1 / num_batches
    ndcg_3 = ndcg_3 / num_batches
    ndcg_5 = ndcg_5 / num_batches
    ndcg_10 = ndcg_10 / num_batches

    print('MAP - ', _map)
    print('MRR - ', mrr)
    print('NDCG@1 - ', ndcg_1)
    print('NDCG@3 - ', ndcg_3)
    print('NDCG@5 - ', ndcg_5)
    print('NDCG@10 - ', ndcg_10)
Exemple #2
0
def test_ranking(model, test_batches):
    num_batches = len(test_batches)
    map, mrr, ndcg_1, ndcg_3, ndcg_5, ndcg_10 = 0, 0, 0, 0, 0, 0
    for batch_no in range(1, num_batches + 1):
        test_queries, test_docs, test_labels = helper.batch_to_tensor(
            test_batches[batch_no - 1], model.dictionary,
            model.config.max_query_length, model.config.max_doc_length)
        if model.config.cuda:
            test_queries = test_queries.cuda()
            test_docs = test_docs.cuda()
            test_labels = test_labels.cuda()

        softmax_prob = model(test_queries, test_docs)
        map += mean_average_precision(softmax_prob, test_labels)
        mrr += MRR(softmax_prob, test_labels)
        ndcg_1 += NDCG(softmax_prob, test_labels, 1)
        ndcg_3 += NDCG(softmax_prob, test_labels, 3)
        ndcg_5 += NDCG(softmax_prob, test_labels, 5)
        ndcg_10 += NDCG(softmax_prob, test_labels, 10)

    map = map / num_batches
    mrr = mrr / num_batches
    ndcg_1 = ndcg_1 / num_batches
    ndcg_3 = ndcg_3 / num_batches
    ndcg_5 = ndcg_5 / num_batches
    ndcg_10 = ndcg_10 / num_batches

    print('MAP - ', map)
    print('MRR - ', mrr)
    print('NDCG@1 - ', ndcg_1)
    print('NDCG@3 - ', ndcg_3)
    print('NDCG@5 - ', ndcg_5)
    print('NDCG@10 - ', ndcg_10)
def retrival_metrics_test2train(sims_file, test_labels_file,
                                train_labels_file):
    sims, labels_test, labels_train = np.load(sims_file), np.load(
        test_labels_file), np.load(train_labels_file)
    mAP_input = np.zeros(sims.shape)
    for i in range(sims.shape[0]):
        sims_row, curr_label = sims[i], labels_test[i]
        labels_row = labels_train[np.argsort(sims_row)]
        for j in range(labels_row.shape[0]):
            if labels_row[j] == curr_label:
                mAP_input[i][j] = 1
    print(mAP_input)
    return mean_average_precision(mAP_input.tolist())
def retrival_metrics_all(sims_file, labels_file):
    sims, labels = np.load(sims_file), np.load(labels_file)
    #sims = np.array([[0,0.5,1],[0.5,0,2],[1,2,0]])
    #labels = np.array([0,1,0])
    mAP_input = np.zeros(sims.shape)
    for i in range(sims.shape[0]):
        sims_row, curr_label = sims[i], labels[i]
        labels_row = labels[np.argsort(sims_row)]
        for j in range(labels_row.shape[0]):
            if labels_row[j] == curr_label:
                mAP_input[i][j] = 1
    #print(mAP_input)
    return mean_average_precision(mAP_input.tolist())
def compute_ranking_performance(model, session_queries, session_query_length,
                                rel_docs, rel_docs_length, doc_labels):
    batch_queries = session_queries.view(-1, session_queries.size(-1))
    batch_docs = rel_docs.view(-1, *rel_docs.size()[2:])

    projected_queries = model.encode_query(batch_queries, session_query_length)
    projected_docs = model.encode_document(batch_docs, rel_docs_length)
    score = model.document_ranker(projected_queries, projected_docs,
                                  batch_queries, batch_docs)

    numpy_score, numpy_labels = score.data.cpu().numpy(), doc_labels.view(
        -1, doc_labels.size(-1)).data.cpu().numpy()
    map = mean_average_precision(numpy_score, numpy_labels)
    mrr = MRR(numpy_score, numpy_labels)
    NDCG_at_1 = NDCG(numpy_score, numpy_labels, 1)
    NDCG_at_3 = NDCG(numpy_score, numpy_labels, 3)
    NDCG_at_5 = NDCG(numpy_score, numpy_labels, 5)
    NDCG_at_10 = NDCG(numpy_score, numpy_labels, 10)

    return map, mrr, NDCG_at_1, NDCG_at_3, NDCG_at_5, NDCG_at_10
Exemple #6
0
def test(model, test_dataset, dictionary, sess):
    batches_idx = helper.get_batches_idx(len(test_dataset), args.batch_size)
    print('number of test batches = ', len(batches_idx))

    num_batches = len(batches_idx)
    predicts, targets = [], []
    map, mrr, ndcg_1, ndcg_3, ndcg_5, ndcg_10 = 0, 0, 0, 0, 0, 0
    for batch_no in range(1, num_batches + 1):  #1,...,num_batches
        batch_idx = batches_idx[batch_no - 1]
        batch_data = [test_dataset.dataset[i] for i in batch_idx]

        #将一批数据转换为模型输入的格式
        (hist_query_input, hist_doc_input, session_num, hist_query_num,
         hist_query_len, hist_click_num, hist_doc_len, cur_query_input,
         cur_doc_input, cur_query_num, cur_query_len, cur_click_num,
         cur_doc_len, query, q_len, doc, d_len, y, next_q, next_q_len,
         maximum_iterations) = helper.batch_to_tensor(batch_data,
                                                      args.max_query_len,
                                                      args.max_doc_len)

        indices, slots_num = model.get_memory_input(session_num)

        feed_dict = {
            model.hist_query_input: hist_query_input,
            model.hist_doc_input: hist_doc_input,
            model.session_num: session_num,
            model.hist_query_num: hist_query_num,
            model.hist_query_len: hist_query_len,
            model.hist_click_num: hist_click_num,
            model.hist_doc_len: hist_doc_len,
            model.cur_query_input: cur_query_input,
            model.cur_doc_input: cur_doc_input,
            model.cur_query_num: cur_query_num,
            model.cur_query_len: cur_query_len,
            model.cur_click_num: cur_click_num,
            model.cur_doc_len: cur_doc_len,
            model.q: query,
            model.q_len: q_len,
            model.d: doc,
            model.d_len: d_len,
            model.indices: indices,
            model.slots_num: slots_num,
            model.maximum_iterations: maximum_iterations
        }

        click_prob_, predicting_ids_, predicting_len_ = sess.run(
            [model.click_prob, model.predicting_ids, model.predicting_len],
            feed_dict=feed_dict)

        map += mean_average_precision(click_prob_, y)
        mrr += MRR(click_prob_, y)
        ndcg_1 += NDCG(click_prob_, y, 1)
        ndcg_3 += NDCG(click_prob_, y, 3)
        ndcg_5 += NDCG(click_prob_, y, 5)
        ndcg_10 += NDCG(click_prob_, y, 10)

        batch_predicting_text = helper.generate_predicting_text(
            predicting_ids_, predicting_len_, dictionary)
        batch_target_text, batch_query_text = helper.generate_target_text(
            batch_data, dictionary, args.max_query_len)
        predicts += batch_predicting_text
        targets += batch_target_text

    map = map / num_batches
    mrr = mrr / num_batches
    ndcg_1 = ndcg_1 / num_batches
    ndcg_3 = ndcg_3 / num_batches
    ndcg_5 = ndcg_5 / num_batches
    ndcg_10 = ndcg_10 / num_batches

    print('MAP - ', map)
    print('MRR - ', mrr)
    print('NDCG@1 - ', ndcg_1)
    print('NDCG@3 - ', ndcg_3)
    print('NDCG@5 - ', ndcg_5)
    print('NDCG@10 - ', ndcg_10)

    print("targets size = ", len(targets))
    print("predicts size = ", len(predicts))

    multi_bleu.print_multi_bleu(predicts, targets)
Exemple #7
0
# In[36]:

import numpy as np
import rank_metrics
import sys
relevanceVector = np.loadtxt(open(sys.argv[1] + "/rv/relevanceVector_" +
                                  sys.argv[2]),
                             delimiter=" ")
f = open(sys.argv[1] + '/em/evalMetrics_' + sys.argv[2], 'w')
for k in range(1, 16):
    total_precision_k = 0
    total_dcg_k = 0
    total_ndcg_k = 0
    for row in relevanceVector:
        precision_k = rank_metrics.precision_at_k(row, k)
        dcg_k = rank_metrics.dcg_at_k(row, k, 0)
        ndcg_k = rank_metrics.ndcg_at_k(row, k, 0)
        total_precision_k = total_precision_k + precision_k
        total_dcg_k = total_dcg_k + dcg_k
        total_ndcg_k = total_ndcg_k + ndcg_k
    f.write("precision@" + str(k) + ": " + str(total_precision_k) + "\n")
    f.write("dcg@" + str(k) + ": " + str(total_dcg_k) + "\n")
    f.write("ndcg@" + str(k) + ": " + str(total_ndcg_k) + "\n")

mrr = rank_metrics.mean_reciprocal_rank(relevanceVector)
f.write("Mean Reciprocal Rank: " + str(mrr) + "\n")
maP = rank_metrics.mean_average_precision(relevanceVector)
f.write("Mean Average Precision: " + str(maP) + "\n")
f.close()
Exemple #8
0
def compute_map(label_q, label_d, d):
    rs = convert_rank_gt(label_q, label_d, d)
    return mean_average_precision(rs)
Exemple #9
0
    def validate(self, dev_dataset):
        batches_idx = helper.get_batches_idx(len(dev_dataset),
                                             self.args.batch_size)
        print('number of dev batches = ', len(batches_idx))

        num_batches = len(batches_idx)
        predicts, targets = [], []
        map, mrr, ndcg_1, ndcg_3, ndcg_5, ndcg_10 = 0, 0, 0, 0, 0, 0
        for batch_no in range(1, num_batches + 1):  #1,...,num_batches
            batch_idx = batches_idx[batch_no - 1]
            batch_data = [dev_dataset.dataset[i] for i in batch_idx]

            #将一批数据转换为模型输入的格式
            (hist_query_input, hist_doc_input, session_num, hist_query_num,
             hist_query_len, hist_click_num, hist_doc_len, cur_query_input,
             cur_doc_input, cur_query_num, cur_query_len, cur_click_num,
             cur_doc_len, query, q_len, doc, d_len, y, next_q, next_q_len,
             maximum_iterations) = helper.batch_to_tensor(
                 batch_data, self.args.max_query_len, self.args.max_doc_len)

            indices, slots_num = self.model.get_memory_input(session_num)

            feed_dict = {
                self.model.hist_query_input: hist_query_input,
                self.model.hist_doc_input: hist_doc_input,
                self.model.session_num: session_num,
                self.model.hist_query_num: hist_query_num,
                self.model.hist_query_len: hist_query_len,
                self.model.hist_click_num: hist_click_num,
                self.model.hist_doc_len: hist_doc_len,
                self.model.cur_query_input: cur_query_input,
                self.model.cur_doc_input: cur_doc_input,
                self.model.cur_query_num: cur_query_num,
                self.model.cur_query_len: cur_query_len,
                self.model.cur_click_num: cur_click_num,
                self.model.cur_doc_len: cur_doc_len,
                self.model.q: query,
                self.model.q_len: q_len,
                self.model.d: doc,
                self.model.d_len: d_len,
                self.model.indices: indices,
                self.model.slots_num: slots_num,
                self.model.maximum_iterations: maximum_iterations
            }

            click_prob_, predicting_ids_, predicting_len_ = self.sess.run(
                [
                    self.model.click_prob, self.model.predicting_ids,
                    self.model.predicting_len
                ],
                feed_dict=feed_dict)

            map += mean_average_precision(click_prob_, y)
            mrr += MRR(click_prob_, y)
            ndcg_1 += NDCG(click_prob_, y, 1)
            ndcg_3 += NDCG(click_prob_, y, 3)
            ndcg_5 += NDCG(click_prob_, y, 5)
            ndcg_10 += NDCG(click_prob_, y, 10)

            batch_predicting_text = helper.generate_predicting_text(
                predicting_ids_, predicting_len_, self.dictionary)
            batch_target_text, batch_query_text = helper.generate_target_text(
                batch_data, self.dictionary, self.args.max_query_len)

            predicts += batch_predicting_text
            targets += batch_target_text

        map = map / num_batches
        mrr = mrr / num_batches
        ndcg_1 = ndcg_1 / num_batches
        ndcg_3 = ndcg_3 / num_batches
        ndcg_5 = ndcg_5 / num_batches
        ndcg_10 = ndcg_10 / num_batches

        score, precisions, brevity_penalty, cand_tot_length, ref_closest_length = multi_bleu.multi_bleu(
            predicts, targets)

        metrics_sum = map + mrr + ndcg_1 + ndcg_3 + ndcg_5 + ndcg_10 + (
            precisions[0] + precisions[1] + precisions[2] +
            precisions[3]) * 0.01

        print('validation metrics: ')
        print('MAP = %.4f' % map)
        print('MRR = %.4f' % mrr)
        print("NDCG = {:.4f}/{:.4f}/{:.4f}/{:.4f}".format(
            ndcg_1, ndcg_3, ndcg_5, ndcg_10))
        print("BLEU = {:.1f}/{:.1f}/{:.1f}/{:.1f}".format(
            precisions[0], precisions[1], precisions[2], precisions[3]))

        return metrics_sum
def compute_ranking_performance(model, session_queries, session_query_length,
                                rel_docs, rel_docs_length, doc_labels):
    # query encoding
    embedded_queries = model.embedding(
        session_queries.view(-1, session_queries.size(-1)))
    encoded_queries = model.query_encoder(
        embedded_queries,
        session_query_length.view(-1).data.cpu().numpy())
    encoded_queries = model.apply_pooling(encoded_queries,
                                          model.config.pool_type)
    # encoded_queries: batch_size x session_length x (nhid_query * self.num_directions)
    encoded_queries = encoded_queries.view(*session_queries.size()[:-1], -1)

    # document encoding
    embedded_docs = model.embedding(rel_docs.view(-1, rel_docs.size(-1)))
    encoded_docs = model.document_encoder(
        embedded_docs,
        rel_docs_length.view(-1).data.cpu().numpy())
    encoded_docs = model.apply_pooling(encoded_docs, model.config.pool_type)
    # encoded_docs: batch_size x session_length x num_rel_docs_per_query x (nhid_doc * self.num_directions)
    encoded_docs = encoded_docs.view(*rel_docs.size()[:-1], -1)

    # session level encoding
    sess_q_hidden = model.session_query_encoder.init_weights(
        encoded_queries.size(0))
    sess_q_out = Variable(
        torch.zeros(session_queries.size(0), 1, model.config.nhid_session))
    if model.config.cuda:
        sess_q_out = sess_q_out.cuda()

    map, mrr, NDCG_at_1, NDCG_at_3, NDCG_at_5, NDCG_at_10 = 0, 0, 0, 0, 0, 0
    # loop over all the queries in a session
    for idx in range(encoded_queries.size(1)):
        combined_rep = torch.cat(
            (encoded_queries[:, idx, :], sess_q_out.squeeze(1)), 1)
        combined_rep = model.projection(combined_rep)
        combined_rep = combined_rep.unsqueeze(1).expand(
            *encoded_docs[:, idx, :, :].size())
        click_score = torch.sum(
            torch.mul(combined_rep, encoded_docs[:, idx, :, :]), 2)
        numpy_score, numpy_labels = click_score.data.cpu().numpy(
        ), doc_labels[:, idx, :].data.cpu().numpy()

        map += mean_average_precision(numpy_score, numpy_labels)
        mrr += MRR(numpy_score, numpy_labels)
        NDCG_at_1 += NDCG(numpy_score, numpy_labels, 1)
        NDCG_at_3 += NDCG(numpy_score, numpy_labels, 3)
        NDCG_at_5 += NDCG(numpy_score, numpy_labels, 5)
        NDCG_at_10 += NDCG(numpy_score, numpy_labels, 10)
        # update session-level query encoder state using query representations
        sess_q_out, sess_q_hidden = model.session_query_encoder(
            encoded_queries[:, idx, :].unsqueeze(1), sess_q_hidden)

    map = map / encoded_queries.size(1)
    mrr = mrr / encoded_queries.size(1)
    NDCG_at_1 = NDCG_at_1 / encoded_queries.size(1)
    NDCG_at_3 = NDCG_at_3 / encoded_queries.size(1)
    NDCG_at_5 = NDCG_at_5 / encoded_queries.size(1)
    NDCG_at_10 = NDCG_at_10 / encoded_queries.size(1)

    return map, mrr, NDCG_at_1, NDCG_at_3, NDCG_at_5, NDCG_at_10