def generate_reward(gold_index_list, answer_index_list):
    reward = 0
    ap = 0
    reciprocal_rank = 0
    answer_list = list(answer_index_list)
    size = len(answer_index_list)
    true = sum(gold_index_list > 0)
    inp = np.zeros(size)
    for rank, val in enumerate(gold_index_list):
        if val and rank in answer_list:
            inp[answer_list.index(rank)] = val
    maxk = sum(inp > 0)
    if true:
        ap = average_precision(inp) * (maxk / true)
    reciprocal_rank = mean_reciprocal_rank([inp])
    ndcg = ndcg_at_k(inp, min(10, size))
    dcg_five = dcg_at_k(inp, 5)
    reward = (ap + reciprocal_rank + ndcg + dcg_five) / 4
    ranks = [1, 3, 5, 10]
    reward_tuple = [reward, ap, reciprocal_rank, ndcg, dcg_five]
    for r in ranks:
        reward_tuple.append(precision_at_k(inp, min(r, len(inp))))
    for r in ranks:
        reward_tuple.append(ndcg_at_k(inp, min(r, len(inp))))
    return reward_tuple
Esempio n. 2
0
def evalResults(results, trueRelevance, noveltyList, trainModelIDs, rev_dict,
                uid, alg, params, rec, outFile, diversity, novelty):
    params = [str(i) for i in params]
    #calculate rating precision
    mmScaler = MinMaxScaler(copy=True)
    results = mmScaler.fit_transform(results.reshape(-1, 1))
    results = results.reshape((-1, ))
    r2Sc = r2_score(trueRelevance, results)
    mae = mean_absolute_error(trueRelevance, results)

    #calculate ranking scores
    idx = (-results).argsort()

    if diversity == "yes":
        reranked = mmr_sorted(range(len(results)), 0.8, results, rev_dict, 10)
        idx1 = [k for k, v in reranked.items()]
        idx2 = [i for i in idx if i not in idx1]
        idx1.extend(idx2)
        idx = idx1

    rankedRelevance = trueRelevance[idx]
    rankedNovelty = noveltyList[idx]

    #print(rankedRelevance)

    map = rank_metrics.average_precision(rankedRelevance)
    aucSc = roc_auc_score(trueRelevance, results)
    nDCG10 = rank_metrics.ndcg_at_k(rankedRelevance, 10)
    nDCG100 = rank_metrics.ndcg_at_k(rankedRelevance, 100)
    nDCG = rank_metrics.ndcg_at_k(rankedRelevance, len(rankedRelevance))

    p5 = prec_at_n(rankedRelevance, 5)
    r5 = rec_at_n(rankedRelevance, 5)
    n5 = meanNovelty_at_n(rankedNovelty, 5)
    un5 = user_novelty_at_n(idx, trainModelIDs, 5)
    ild5 = ild_at_n(idx, rev_dict, 5)
    p10 = prec_at_n(rankedRelevance, 10)
    r10 = rec_at_n(rankedRelevance, 10)
    n10 = meanNovelty_at_n(rankedNovelty, 10)
    ild10 = ild_at_n(idx, rev_dict, 10)
    un10 = user_novelty_at_n(idx, trainModelIDs, 10)

    mrr = rank_metrics.mean_reciprocal_rank([rankedRelevance])

    #print((uid, alg, ",".join(params), rec, r2Sc, mae, map, aucSc, mrr, p5, p10, r5, r10, nDCG10, nDCG100, nDCG))

    txt = "%s;%s;%s;%s;%s;%s;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f;%.6f\n" % (
        uid, alg, ",".join(params), rec, diversity, novelty, r2Sc, mae, map,
        aucSc, mrr, p5, p10, r5, r10, nDCG10, nDCG100, nDCG, n5, n10, un5,
        un10, ild5, ild10)
    outFile.write(txt)
    return (r2Sc, mae, map, aucSc, mrr, p5, p10, r5, r10, nDCG10, nDCG100,
            nDCG, n5, n10, ild5, ild10)
Esempio n. 3
0
def generate_reward(gold_index_list, answer_index_list, reward_type):
    reward = 0
    ap = 0.
    reciprocal_rank = 0
    answer_list = list(answer_index_list)
    size = len(answer_index_list)
    true = sum(gold_index_list > 0)
    inp = np.zeros(size)
    for rank, val in enumerate(gold_index_list):
        if val and rank in answer_list:
            inp[answer_list.index(rank)] = val
    maxk = sum(inp > 0)
    if true:
        ap = average_precision(inp) * (maxk / true)
    reciprocal_rank = mean_reciprocal_rank([inp])
    ndcg = ndcg_at_k(inp, min(10, size))
    dcg_five = dcg_at_k(inp, 5)
    reward = rewards[reward_type - 1](inp, ap, reciprocal_rank, ndcg, dcg_five)
    return reward, ap, reciprocal_rank, ndcg, dcg_five
Esempio n. 4
0
def generate_reward(gold_index_list, answer_index_list, reward_type=1):
    reward = 0
    ap = 0
    reciprocal_rank = 0
    answer_list = list(deepcopy(answer_index_list))
    size = len(answer_index_list)
    true = sum(gold_index_list)
    inp = np.zeros(size)
    for rank, val in enumerate(gold_index_list):
        if val and rank in answer_list:
            inp[answer_list.index(rank)] = 2
    if true:
        ap = average_precision(inp) * (sum(inp > 0) / true)
    reciprocal_rank = mean_reciprocal_rank([inp])
    #ndcg = ndcg_at_k(inp,size)
    #if reward_type==1:
    #    reward = (ap+reciprocal_rank)/2
    #elif reward_type ==2 :
    #    reward = dcg_at_k(inp,size)
    rewards = [(ap + reciprocal_rank) / 2, dcg_at_k(inp, size)]
    return rewards[reward_type - 1], ap, reciprocal_rank, (inp[0] > 0)
Esempio n. 5
0
def compute_metrics(ranked_judgements, pr_atk, threshold_grade):
    """
    Given the ranked judgements compute the metrics for a query.
    :param ranked_judgements: list(int); graded or binary relevances in rank order.
    :param pr_atk: int; the @K value to use for computing precision and recall.
    :param threshold_grade: int; Assuming 0-3 graded relevances, threshold at some point
        and convert graded to binary relevance.
    :return:
    """
    graded_judgements = ranked_judgements
    ranked_judgements = [
        1 if rel >= threshold_grade else 0 for rel in graded_judgements
    ]
    # Use the full set of candidate not the pr_atk.
    ndcg = rm.ndcg_at_k(graded_judgements, len(ranked_judgements))
    ndcg_pr = rm.ndcg_at_k(graded_judgements,
                           int(0.20 * len(ranked_judgements)))
    ndcg_20 = rm.ndcg_at_k(graded_judgements, 20)
    max_total_relevant = sum(ranked_judgements)
    recall = recall_at_k(ranked_rel=ranked_judgements,
                         atk=pr_atk,
                         max_total_relevant=max_total_relevant)
    precision = rm.precision_at_k(r=ranked_judgements, k=pr_atk)
    r_precision = rm.r_precision(r=ranked_judgements)
    f1 = 2 * precision * recall / (precision + recall) if (precision +
                                                           recall) > 0 else 0.0
    av_precision = rm.average_precision(r=ranked_judgements)
    reciprocal_rank = rm.mean_reciprocal_rank(rs=[ranked_judgements])
    metrics = {
        'recall': float(recall),
        'precision': float(precision),
        'f1': float(f1),
        'r_precision': float(r_precision),
        'av_precision': float(av_precision),
        'reciprocal_rank': float(reciprocal_rank),
        'ndcg': ndcg,
        'ndcg@20': ndcg_20,
        'ndcg%20': ndcg_pr
    }
    return metrics
Esempio n. 6
0
# In[36]:

import numpy as np
import rank_metrics
import sys
relevanceVector = np.loadtxt(open(sys.argv[1] + "/rv/relevanceVector_" +
                                  sys.argv[2]),
                             delimiter=" ")
f = open(sys.argv[1] + '/em/evalMetrics_' + sys.argv[2], 'w')
for k in range(1, 16):
    total_precision_k = 0
    total_dcg_k = 0
    total_ndcg_k = 0
    for row in relevanceVector:
        precision_k = rank_metrics.precision_at_k(row, k)
        dcg_k = rank_metrics.dcg_at_k(row, k, 0)
        ndcg_k = rank_metrics.ndcg_at_k(row, k, 0)
        total_precision_k = total_precision_k + precision_k
        total_dcg_k = total_dcg_k + dcg_k
        total_ndcg_k = total_ndcg_k + ndcg_k
    f.write("precision@" + str(k) + ": " + str(total_precision_k) + "\n")
    f.write("dcg@" + str(k) + ": " + str(total_dcg_k) + "\n")
    f.write("ndcg@" + str(k) + ": " + str(total_ndcg_k) + "\n")

mrr = rank_metrics.mean_reciprocal_rank(relevanceVector)
f.write("Mean Reciprocal Rank: " + str(mrr) + "\n")
maP = rank_metrics.mean_average_precision(relevanceVector)
f.write("Mean Average Precision: " + str(maP) + "\n")
f.close()