Esempio n. 1
0
def eval(qrel_file_path, run_file_path):
    """[summary]
    
    Arguments:
        qrel_file_path {[string]} -- [path of the qrel file usually located at the source language folder]
        run_file_path {[string]} -- [path of the run file usually located at the results folder of a language]
    
    Returns:
        [type] -- [precision@10, precision@20, precision@30, mAP rounded up to four digits]
    """

    r1 = TrecRun(run_file_path)
    qrels = TrecQrel(qrel_file_path)

    te = TrecEval(r1, qrels)
    p5 = te.get_precision(depth=5)
    p10 = te.get_precision(depth=10)
    p20 = te.get_precision(depth=20)
    map = te.get_map()
    rprec = te.get_rprec()
    run_object = r1.evaluate_run(qrels, per_query=True)

    return round(p5, 4), round(p10, 4), round(p20,
                                              4), round(map,
                                                        4), round(rprec, 4)
def collect(qrelsFilePath, baseDir):
    qrels = TrecQrel(qrelsFilePath)

    result = {}
    for i, [topicPath, topicNum] in enumerate(
            sorted(_getDirectoryContent(baseDir, directory=True),
                   key=lambda a_b: int(a_b[1]))):
        for modelPath, modelName in _getDirectoryContent(topicPath,
                                                         directory=True):
            modelName = modelName[:-4]
            if modelName not in result:
                result[modelName] = {}

            for filePath, fileName in _getDirectoryContent(modelPath,
                                                           file=True):
                score = 0

                # only evaluate non empty files
                if os.path.getsize(filePath) > 0:
                    run = TrecRun(filePath)
                    runResult = run.evaluate_run(qrels, True)
                    rs = list(
                        runResult.get_results_for_metric('P_10').values())
                    score = np.mean(rs)

                if fileName not in result[modelName]:
                    result[modelName][fileName] = [score]
                else:
                    result[modelName][fileName].append(score)
            print("Finished processing model {} of topic {}".format(
                modelName, topicNum))
        print("Finished processing topic: ", topicNum)

    # Calculate average over all topics
    for modelName in result:
        for comparisonName in result[modelName]:
            result[modelName][comparisonName] = sum(
                result[modelName][comparisonName]) / len(
                    result[modelName][comparisonName])

    return result
from trectools import TrecRun, TrecQrel
from trectools import procedures
import glob
import os

task1_run_filepath = "../runs_t1/"
qrels_top = "../qrels/task1.qrels"

filepath = glob.glob(os.path.join(task1_run_filepath, "*.txt"))
topqrels = TrecQrel(qrels_top)

results = []

for filename in filepath:
    r = TrecRun(filename)
    res = r.evaluate_run(topqrels)
    results.append(res)

p10 = procedures.get_results(results, "P_10")
procedures.plot_system_rank("task1_p10.jpg", p10, "P@10")

bpref = procedures.get_results(results, "bpref")
procedures.plot_system_rank("task1_bpref.jpg", bpref, "BPREF")

map_ = procedures.get_results(results, "map")
procedures.plot_system_rank("task1_map.jpg", map_, "MAP")
Esempio n. 4
0
    else:
        mypath = "../../data/runs/iterative_lr_ir_run_dir/"

    run_files = [
        join(mypath, f) for f in listdir(mypath)
        if isfile(join(mypath, f)) and f.endswith('.run')
    ]
    print run_files
    p_10 = np.zeros(30)
    p_20 = np.zeros(30)
    count = 0

    for run_file in run_files:
        run = TrecRun(run_file)
        #print 'run loaded'
        res = run.evaluate_run(myQrel)
        #print 'run evaluated'
        keys = [item for item in res.get_results_for_metric("P_20").keys()]
        keys = sorted(keys, key=int)
        values_p20 = [res.get_results_for_metric("P_20")[i] for i in keys]
        values_p20 = np.asarray(values_p20)
        p_20 += values_p20

        keys = [item for item in res.get_results_for_metric("P_10").keys()]
        keys = sorted(keys, key=int)
        values_p10 = [res.get_results_for_metric("P_10")[i] for i in keys]
        values_p10 = np.asarray(values_p10)
        p_10 += values_p10
        count += 1
        #print 'processed ' + str(count) + 'file'
from trectools import procedures
import glob
import os


task1_run_filepath = "../runs_t1/"
qrels_top = "../qrels/task1.qrels"

filepath = glob.glob(os.path.join(task1_run_filepath, "*.txt"))
topqrels = TrecQrel(qrels_top)

results = []

for filename in filepath:
    r = TrecRun(filename)
    res = r.evaluate_run(topqrels)
    results.append(res)


p10 = procedures.get_results(results, "P_10")
procedures.plot_system_rank("task1_p10.jpg", p10, "P@10")

bpref = procedures.get_results(results, "bpref")
procedures.plot_system_rank("task1_bpref.jpg", bpref, "BPREF")

map_ = procedures.get_results(results, "map")
procedures.plot_system_rank("task1_map.jpg", map_, "MAP")