Example #1
0
def eval(topx):
    qrels = TrecQrel("./relevance-args-v2.qrels")

    # Generates a P@10 graph with all the runs in a directory
    path_to_runs = "./runs/"
    runs = procedures.list_of_runs_from_path(path_to_runs, "*")

    for run in runs:
        print(run.get_filename())
        te = TrecEval(run, qrels)

        rbp, residuals = te.get_rbp()

        coverage = run.get_mean_coverage(qrels, topX=topx)
        print(
            "Average number of documents judged among top %.0f: %.2f, thats about: %.2f percent."
            % (topx, coverage, coverage / topx * 100))
        # precision = te.get_precision(depth=topx)
        # print("precision (p"+str(topx)+"): ",precision)

        ndcg = te.get_ndcg(depth=topx, removeUnjudged=False)
        print("nDCG (n=" + str(topx) + " removeUnjudged=False): " + str(ndcg))

        ndcg = te.get_ndcg(depth=topx, removeUnjudged=True)
        print("nDCG (n=" + str(topx) + " removeUnjudged=True): " + str(ndcg))

        print("--------\n")
Example #2
0
def eval_rank_df(e: trectools.TrecEval, per_query=True) -> pd.Series:
    return pd.Series({
        "nDCG@5": e.get_ndcg(depth=5, per_query=per_query),
        "nDCG@10": e.get_ndcg(depth=10, per_query=per_query),
        "nDCG@20": e.get_ndcg(depth=20, per_query=per_query),
        "RR": e.get_reciprocal_rank(per_query=per_query)
    })
Example #3
0
def evaluate(qrels, runs_file, topics, model):
    runs = TrecRun(runs_file)
    ev = TrecEval(runs, qrels)

    path_to_csv = os.path.join("eval", model, "results.csv")

    n_topics = len(topics)

    # Calculate various metrics for each query considering the runs/judgment files provided
    print("Calculating metrics...")
    res = ev.evaluate_all(per_query=True)

    # Write results of evaluation to csv file
    res.printresults(path_to_csv, "csv", perquery=True)

    # Calculate NDCG@100 for each query, since the previous metrics don't include it,
    # and append it to each line of the new csv file
    ndcgs = ev.get_ndcg(depth=100, per_query=True)
    values = [row['NDCG@100'] for i, row in ndcgs.iterrows()
              ]  # Column name of Pandas dataframe storing the data
    with open(path_to_csv, 'r') as f:
        lines = [line[:-1]
                 for line in f]  # Remove '\n' from the end of each line
        lines[0] += ",ndcg@100\n"  # Add new column to header
        for i in range(
                1, n_topics + 1
        ):  # Lines 1 to n contain metric values for each of the n queries
            lines[i] += "," + str(
                values[i - 1]
            ) + "\n"  # Line 1 (i) should store value 0 (i-1) - arrays start at 0
        global_ndcg = ev.get_ndcg(depth=100,
                                  per_query=False)  # Calculate global NDCG
        lines[n_topics + 1] += "," + str(
            global_ndcg) + "\n"  # Append global NDCG to last line
    with open(path_to_csv, 'w') as f:
        f.writelines(lines)  # Overwrite csv file with new content
Example #4
0
 def map(self):
     qrels_file = TrecQrel("./Data/qrel.txt")
     path_to_runs = TrecRun("./Data/run.txt")
     te = TrecEval(path_to_runs, qrels_file)
     dic = {"map": te.get_map(), "ndcg": te.get_ndcg()}
     return dic
Example #5
0
def eval_rank_df(e: trectools.TrecEval) -> pd.Series:
    return pd.Series({
        "nDCG@5": e.get_ndcg(depth=5, per_query=False),
        "nDCG@10": e.get_ndcg(depth=10, per_query=False),
        "nDCG@20": e.get_ndcg(depth=20, per_query=False),
    })