Ejemplo n.º 1
0
def test_ndcg():
    x = []
    k = 4
    assert_eq(ndcg.dcg(x, k), 0)

    x = [1, 1, 1]
    k = 5
    assert_eq(ndcg.dcg(x, k), 1 * (1 + 1 / np.log2(3) + 1 / np.log2(4)))
    k = 1
    assert_eq(ndcg.dcg(x, k), 1)

    x = [[1, 1, 1], [1, 2, 3]]
    k = 3
    res = ndcg.dcg(x, k)
    assert_eq(res[0], 1 * (1 + 1 / np.log2(3) + 1 / np.log2(4)))
    assert_eq(res[1], 1 + 3 / np.log2(3) + 7 / np.log2(4))

    x = [1, 2, 3]
    opt_x = [3, 2, 1]
    k = 3
    assert_eq(ndcg.ndcg(x, k), ndcg.dcg(x, k) / ndcg.dcg(opt_x, k))

    x = [1, 1, 1]
    assert_eq(ndcg.ndcg(x, k), 1)

    x = []
    assert_eq(ndcg.ndcg(x, k), 0)
Ejemplo n.º 2
0
def predict(estimator, test_path, best_dir, target_dir):
    input_fn_for_test = lambda: input_fn(test_path, 0)
    output_results = estimator.predict(
        input_fn_for_test,
        checkpoint_path=tf.train.latest_checkpoint(best_dir))
    predict_result_dict = {}
    true_result_dict = {}
    for output_result in output_results:
        for i in range(output_result['ans_num'][0]):
            question = output_result['que_ans'][i].decode('utf8')
            answer = output_result['que_ans'][i + FLAGS.neg_num +
                                              1].decode('utf8')
            if question not in true_result_dict:
                true_result_dict[question] = {}
            if i == 0:
                true_result_dict[question][answer] = 1
            else:
                true_result_dict[question][answer] = 0

            if question not in predict_result_dict:
                predict_result_dict[question] = {}
            predict_result_dict[question][answer] = float(
                output_result['output_rank'][i])

    logger.info(best_dir)

    total_item_result = []
    for master_key in predict_result_dict:
        complete_string = master_key + '\x01'
        temp_string = ''
        # 按照值的大小进行排序
        sorted_final_sim_dict = sorted(predict_result_dict[master_key].items(),
                                       reverse=True,
                                       key=lambda x: x[1])
        for item in sorted_final_sim_dict:
            if item[0] in complete_string:  # 从相似item中排除掉其本身
                continue
            temp_string += item[0] + '_' + str(item[1]) + ','
        if temp_string != '':
            complete_string += temp_string.rstrip(',')
            total_item_result.append(complete_string)

    write_content(total_item_result, target_dir)  # 写入dssm的相似结果

    total_ndcg = 0
    for key in predict_result_dict:
        label_list = []
        sorted_answer_score = sorted(predict_result_dict[key].items(),
                                     reverse=True,
                                     key=lambda x: x[1])[:5]  # 取前5个预测值最大的结果
        for item in sorted_answer_score:
            assert item[0] in true_result_dict[key]
            label_list.append(true_result_dict[key][item[0]])
        total_ndcg += ndcg(label_list, top_n=5)
    logger.info('Average ndcg@5 is {}'.format(total_ndcg /
                                              len(predict_result_dict)))
    return predict_result_dict
def measure_correlations(responsiveness, pyramid_scores, predictions):
    """
	Computes the correlations:
	- between predictions and responsiveness for the 3 metrics
	- between predictions and pyramid scores for the 3 metrics
	return the results in a dictionary.
	"""

    r_to_pyramid = pearsonr(pyramid_scores, predictions)[0]
    rho_to_pyramid = spearmanr(pyramid_scores, predictions)[0]
    ndcg_to_pyramid = ndcg(predictions, pyramid_scores, 10)

    r_to_responsiveness = pearsonr(responsiveness, predictions)[0]
    rho_to_responsiveness = spearmanr(responsiveness, predictions)[0]
    ndcg_to_responsiveness = ndcg(predictions, responsiveness, 10)

    return {
        'r_to_pyramid': r_to_pyramid,
        'rho_to_pyramid': rho_to_pyramid,
        'ndcg_to_pyramid': ndcg_to_pyramid,
        'r_to_responsiveness': r_to_responsiveness,
        'rho_to_responsiveness': rho_to_responsiveness,
        'ndcg_to_responsiveness': ndcg_to_responsiveness
    }
Ejemplo n.º 4
0
    def test_param_stomping(self):
        testval_init = np.asarray([3, 2, 3, 0])
        testval = np.asarray([3, 2, 3, 0])

        x = ndcg.ndcg(testval, 4, False)
        self.assertEqual(testval_init.tolist(), testval.tolist())

        x = ndcg.idcg(testval, False)
        self.assertEqual(testval_init.tolist(), testval.tolist())

        x = ndcg.dcg(testval, False)
        self.assertEqual(testval_init.tolist(), testval.tolist())

        x = ndcg.cum_gain(testval)
        self.assertEqual(testval_init.tolist(), testval.tolist())
Ejemplo n.º 5
0
    def test_param_stomping(self):
        testval_init = np.asarray([3, 2, 3, 0])
        testval = np.asarray([3, 2, 3, 0])

        x = ndcg.ndcg(testval, 4, False)
        self.assertEqual(testval_init.tolist(), testval.tolist())

        x = ndcg.idcg(testval, False)
        self.assertEqual(testval_init.tolist(), testval.tolist())

        x = ndcg.dcg(testval, False)
        self.assertEqual(testval_init.tolist(), testval.tolist())

        x = ndcg.cum_gain(testval)
        self.assertEqual(testval_init.tolist(), testval.tolist())
Ejemplo n.º 6
0
def py_eval_func(target_scores, predict_scores, qids):
    target_scores = list(target_scores.numpy())
    predict_scores = list(predict_scores.numpy())
    qids = list(qids.numpy())
    x = list(zip(target_scores, predict_scores, qids))

    groups = {}
    for e in x:
        if e[2] in groups:
            groups[e[2]].append((e[0], e[1]))
        else:
            groups[e[2]] = [(e[0], e[1])]

    cum_ndcg = []
    for _, g in groups.items():
        sorted_g = sorted(g, key=lambda x: x[1], reverse=True)
        pos = range(1, len(sorted_g) + 1)
        rel = [e[0] for e in sorted_g]
        partial_ndcg = ndcg(pos, rel)
        if partial_ndcg > 0.:
            # print(partial_ndcg)
            cum_ndcg.append(partial_ndcg)

    return np.asarray(cum_ndcg, dtype=np.float32)
Ejemplo n.º 7
0
 def test_ndcg_nranks(self):
     self.assertAlmostEqual(0.9491769, ndcg.ndcg([3, 2, 3, 0], 4, False))
     self.assertAlmostEqual(0.9491769, ndcg.ndcg([3, 2, 3], 4, False))
Ejemplo n.º 8
0
 def test_ndcg(self):
     # from wikipedia
     self.assertAlmostEqual(0.9315085, ndcg.ndcg([3, 2, 3, 0, 1, 2], 6, False))
Ejemplo n.º 9
0
 def test_ndcg_zeros(self):
     self.assertAlmostEqual(0.0, ndcg.ndcg([0, 0, 0, 0], 6, False))
Ejemplo n.º 10
0
 def test_ndcg_none(self):
     # from wikipedia
     self.assertAlmostEqual(0.0, ndcg.ndcg([], 0, False))
     self.assertAlmostEqual(0.0, ndcg.ndcg(np.asarray([]), 0, False))
     self.assertAlmostEqual(0.0, ndcg.ndcg(None, 0, False))
Ejemplo n.º 11
0
def main():
    scores = parse()
    print('DCG: %f' % dcg(scores))
    print('NDCG: %f' % ndcg(scores))
    print('pFound: %f' % pfound(scores))    
Ejemplo n.º 12
0
 def test_ndcg_nranks(self):
     self.assertAlmostEqual(0.9491769, ndcg.ndcg([3, 2, 3, 0], 4, False))
     self.assertAlmostEqual(0.9491769, ndcg.ndcg([3, 2, 3], 4, False))
Ejemplo n.º 13
0
 def test_ndcg(self):
     # from wikipedia
     self.assertAlmostEqual(0.9315085,
                            ndcg.ndcg([3, 2, 3, 0, 1, 2], 6, False))
Ejemplo n.º 14
0
    assert (len(score) == len(ret_products))
    assert (len(nz) == len(ret_products))

    for p, v, s in zip(ret_products, nz, score):
        p['rel'] = v[1]
        p['score'] = s

    #for p in ret_products:
    #    if p.get('score') is None:
    #        print(p)
    sorted_products = sorted(ret_products,
                             key=lambda x: x.get('score'),
                             reverse=True)

    ret_ndcg = ndcg(range(1,
                          len(sorted_products) + 1),
                    [x['rel'] for x in sorted_products])

    if ret_ndcg > 0.:
        if ret_ndcg > 0.7:
            f1.write(query + "\n")
        if ret_ndcg <= 0.7 and ret_ndcg > 0.6:
            f2.write(query + "\n")
        if ret_ndcg <= 0.6:
            f3.write(query + "\n")
        count += 1
        cum_ndcg += ret_ndcg
        if count % 200 == 0:
            print("Processed %d queries, ndcg: %0.4f" %
                  (count, cum_ndcg / count))
            f1.flush()
Ejemplo n.º 15
0
 def test_ndcg_zeros(self):
     self.assertAlmostEqual(0.0, ndcg.ndcg([0, 0, 0, 0], 6, False))
Ejemplo n.º 16
0
def evaluate(rel_file, retr_file, write_flag):
    f = open(rel_file, 'r')
    data_rel = json.load(f)
    f.close()

    f = open(retr_file, 'r')
    data_retr = json.load(f)
    f.close()

    total_relevant = 0
    rel_found = 0
    total_retrieved = 0
    macro_precision = 0
    macro_recall = 0
    t_ndcg = np.zeros((1, 3))

    sumAIP = 0
    sumIP = np.zeros((1, 11))
    sumAP = 0

    sum_prfAIP = 0
    sum_prfIP = np.zeros((1, 11))

    n_questions = len(data_retr["questions"])
    np.set_printoptions(threshold=np.nan)
    for i in range(n_questions):
        #print i
        data_rel_id = data_rel["questions"][i]["id"]
        data_retr_id = data_retr["questions"][i]["id"]
        assert data_rel_id == data_retr_id

        rel_docs = []
        relevant_docs = data_rel["questions"][i]["documents"]
        [rel_docs.append(doc.split('/')[4]) for doc in relevant_docs]
        #rel_docs = relevant_docs
        retr_docs = map(str, data_retr["questions"][i]["retrieved"])
        retr_docs = [x.strip() for x in retr_docs]
        n_rel_docs = len(data_rel["questions"][i]["documents"])
        n_retr_docs = len(data_retr["questions"][i]["retrieved"])

        total_relevant += n_rel_docs
        total_retrieved += n_retr_docs
        common = set(rel_docs) & set(retr_docs)
        n_common = len(common)
        rel_found += n_common
        if n_retr_docs != 0:
            macro_precision += (n_common/n_retr_docs)
        if n_rel_docs != 0:
            macro_recall += (n_common/n_rel_docs)
        curve, avg_precision, int_curve = pr_rec(rel_docs, retr_docs)
        perfect_curve, perfect_int_curve = perfect_reranking(rel_docs, retr_docs, n_common)
        t_ndcg = np.add(t_ndcg, ndcg(rel_docs, retr_docs))

        sumAIP += np.sum(int_curve) / 11
        sumIP = np.add(sumIP, int_curve)

        sum_prfAIP += np.sum(perfect_int_curve, 1) / 11
        sum_prfIP = np.add(sum_prfIP, perfect_int_curve)
        sumAP += avg_precision

    mean_ndcg = np.divide(t_ndcg, n_questions)
    micro_precision = rel_found / total_retrieved
    macro_precision = macro_precision / n_questions

    micro_recall = rel_found / total_relevant
    macro_recall = macro_recall / n_questions

    MAP = sumAP / n_questions
    MAIP = sumAIP / n_questions
    MIP = np.divide(sumIP, n_questions)

    prf_MAIP = sum_prfAIP / n_questions
    prf_MIP = np.divide(sum_prfIP, n_questions)

    print '\n'
    print 'Micro-Average Precision: ' + str("{0:.4f}".format(micro_precision))
    print 'Macro-Average Precision: ' + str("{0:.4f}".format(macro_precision))
    print 'Micro-Average Recall:    ' + str("{0:.4f}".format(micro_recall))
    print 'Macro-Average Recall:    ' + str("{0:.4f}".format(macro_recall))
    print 'MAP:                     ' + str("{0:.4f}".format(MAP))
    print 'MAIP:                    ' + str("{0:.4f}".format(MAIP))
    print 'MIP:                     ' + str(map("{0:.4f}".format,MIP[0]))
    print 'nDCG@20:                 ' + str("{0:.4f}".format(mean_ndcg[0][0]))
    print 'nDCG@100:                ' + str("{0:.4f}".format(mean_ndcg[0][1])) + '\n'
    print 'Perfect Reranking: '
    print 'MAIP:                    ' + str("{0:.4f}".format(prf_MAIP[0]))
    print 'MIP:                     ' + str(map("{0:.4f}".format,prf_MIP[0]))


    if write_flag!='-n':
        with open('evaluation.txt', 'a+') as outfile:
            outfile.write(retr_file)
            outfile.write(' %.4f' % (micro_precision))
            outfile.write(' %.4f' % (macro_precision))
            outfile.write(' %.4f' % (micro_recall))
            outfile.write(' %.4f' % (macro_recall))
            outfile.write(' %.4f' % (MAP))
            outfile.write(' %.4f' % (MAIP))
            outfile.write(' ')
            np.savetxt(outfile, MIP, delimiter=' ', fmt='%.4f', newline=' ')
            outfile.write('%.4f ' % (prf_MAIP[0]))
            np.savetxt(outfile, prf_MIP, delimiter=' ', fmt='%.4f', newline=' ')
            np.savetxt(outfile, mean_ndcg, delimiter=' ', fmt='%.4f')
Ejemplo n.º 17
0
 def test_ndcg_none(self):
     # from wikipedia
     self.assertAlmostEqual(0.0, ndcg.ndcg([], 0, False))
     self.assertAlmostEqual(0.0, ndcg.ndcg(np.asarray([]), 0, False))
     self.assertAlmostEqual(0.0, ndcg.ndcg(None, 0, False))