def generate_pairs_from_qrels(qrel_file, topk_rank=20, num_random=5000): qrels = TrecQrel(qrel_file) for topic in tqdm(qrels.topics()): qrels_for_topic = [d.to_dict() for _, d in qrels.qrels_data[qrels.qrels_data['query'] == topic].iterrows()] for i in range(0, len(qrels_for_topic)): for j in range(0, len(qrels_for_topic)): if qrels_for_topic[i]['rel'] > qrels_for_topic[j]['rel']: yield __generate_single_qrel_pair(qrels_for_topic, qrels_for_topic[i], qrels_for_topic[j])
def setUp(self): run1 = TrecRun("./files/r4.run") qrels1 = TrecQrel("./files/qrel1.txt") run2 = TrecRun("./files/input.uic0301") qrels2 = TrecQrel("./files/robust03_cs_qrels.txt") # Contains the first 30 documents for the first 10 topics in input.uic0301 run3 = TrecRun("./files/input.uic0301_top30") self.commontopics = [303, 307, 310, 314, 320, 322, 325, 330, 336, 341] self.teval1 = TrecEval(run1, qrels1) self.teval2 = TrecEval(run2, qrels2) self.teval3 = TrecEval(run3, qrels2)
def eval(topx): qrels = TrecQrel("./relevance-args-v2.qrels") # Generates a P@10 graph with all the runs in a directory path_to_runs = "./runs/" runs = procedures.list_of_runs_from_path(path_to_runs, "*") for run in runs: print(run.get_filename()) te = TrecEval(run, qrels) rbp, residuals = te.get_rbp() coverage = run.get_mean_coverage(qrels, topX=topx) print( "Average number of documents judged among top %.0f: %.2f, thats about: %.2f percent." % (topx, coverage, coverage / topx * 100)) # precision = te.get_precision(depth=topx) # print("precision (p"+str(topx)+"): ",precision) ndcg = te.get_ndcg(depth=topx, removeUnjudged=False) print("nDCG (n=" + str(topx) + " removeUnjudged=False): " + str(ndcg)) ndcg = te.get_ndcg(depth=topx, removeUnjudged=True) print("nDCG (n=" + str(topx) + " removeUnjudged=True): " + str(ndcg)) print("--------\n")
def eval(qrel_file_path, run_file_path): """[summary] Arguments: qrel_file_path {[string]} -- [path of the qrel file usually located at the source language folder] run_file_path {[string]} -- [path of the run file usually located at the results folder of a language] Returns: [type] -- [precision@10, precision@20, precision@30, mAP rounded up to four digits] """ r1 = TrecRun(run_file_path) qrels = TrecQrel(qrel_file_path) te = TrecEval(r1, qrels) p5 = te.get_precision(depth=5) p10 = te.get_precision(depth=10) p20 = te.get_precision(depth=20) map = te.get_map() rprec = te.get_rprec() run_object = r1.evaluate_run(qrels, per_query=True) return round(p5, 4), round(p10, 4), round(p20, 4), round(map, 4), round(rprec, 4)
def compute_map(valid_codes, pred, gs_out_path=None): """ Custom function to compute MAP evaluation metric. Code adapted from https://github.com/TeMU-BSC/CodiEsp-Evaluation-Script/blob/master/codiespD_P_evaluation.py """ # Input args default values if gs_out_path is None: gs_out_path = './intermediate_gs_file.txt' pred_out_path = './intermediate_predictions_file.txt' ###### 2. Format predictions as TrecRun format: ###### format_predictions(pred, pred_out_path, valid_codes) ###### 3. Calculate MAP ###### # Load GS from qrel file qrels = TrecQrel(gs_out_path) # Load pred from run file run = TrecRun(pred_out_path) # Calculate MAP te = TrecEval(run, qrels) MAP = te.get_map(trec_eval=False) # With this option False, rank order is taken from the given document order ###### 4. Return results ###### return MAP
def trec_eval_ndcg(run_name, data_path='./data/', depths=[5, 10, 15, 20, 30, 100, 200, 500, 1000]): qrel_name = os.path.join(data_path, '2019qrels-pass.txt') qrel = TrecQrel(qrel_name) res = TrecRun(run_name) for depth in depths: score = TrecEval(res, qrel).get_ndcg(depth=depth) print('ndcg_cur_%d \t all \t %.4f' % (depth, score))
def trec_eval(file): r1 = TrecRun(file) qrels = TrecQrel("./dataset/.txt") results = TrecEval(r1, qrels) p5 = results.get_precision(5) p10 = results.get_precision(10) p15 = results.get_precision(15) print(p5) print(p10) print(p15)
def main(gs_path, pred_path, codes_path): ''' Load GS, predictions and valid codes; format GS and predictions according to TREC specifications; compute MAP and print it. Parameters ---------- gs_path : str Path to Gold Standard TSV with 2 columns: filename, code It has no headers row. pred_path : str Path to Gold Standard TSV with 2 columns: filename, code It has no headers row. codes_path : str Path to TSV file with valid codes. It has no headers row. Returns ------- None. ''' ###### 0. Load valid codes lists: ###### valid_codes = set( pd.read_csv(codes_path, sep='\t', header=None, usecols=[0])[0].tolist()) valid_codes = set([x.lower() for x in valid_codes]) ###### 1. Format GS as TrecQrel format: ###### qid_gs = format_gs(gs_path, './intermediate_gs_file.txt') ###### 2. Format predictions as TrecRun format: ###### format_predictions(pred_path, './intermediate_predictions_file.txt', valid_codes, qid_gs) ###### 3. Calculate MAP ###### # Load GS from qrel file qrels = TrecQrel('./intermediate_gs_file.txt') # Load pred from run file run = TrecRun('./intermediate_predictions_file.txt') # Calculate MAP te = TrecEval(run, qrels) MAP = te.get_map( trec_eval=False ) # With this option False, rank order is taken from the given document order ###### 4. Show results ###### print('\nMAP estimate: {}\n'.format(round(MAP, 3))) #print('\n{}'.format(round(MAP, 3))) print('{}|{}'.format(pred_path, round(MAP, 3)))
def trec_eval(runs_file_path: Path or str, qrels_file_path: Path or str): metrics = dict() r1 = TrecRun(str(runs_file_path.absolute())) qrels = TrecQrel(str(qrels_file_path.absolute())) results = TrecEval(r1, qrels) metrics["P@5"] = results.get_precision(5) metrics["P@10"] = results.get_precision(10) metrics["P@15"] = results.get_precision(15) metrics["bpref"] = results.get_bpref() metrics["map"] = results.get_map() metrics = {k: round(v, 4) for k, v in metrics.items()} return metrics
def main(args): gold_labels = TrecQrel(args.gold_labels) prediction = TrecRun(args.scores) results = TrecEval(prediction, gold_labels) metrics = extract_metrics(results, args.metrics) metrics.loc[:, '@depth'] = metrics.loc[:, '@depth'].astype(str) metrics.loc[:, '@depth'] = metrics.loc[:, '@depth'].replace(str(MAX_DEPTH), 'all') if args.output: metrics.to_csv(args.output, sep="\t", index=False) logger.info(f"Saved results to {args.output}") else: print(metrics.to_string(index=False))
def main(args): format_check_passed = run_checks(args.scores) if not format_check_passed: return gold_labels = TrecQrel(args.gold_labels) prediction = TrecRun(args.scores) results = TrecEval(prediction, gold_labels) metrics = extract_metrics(results, args.metrics, args.depths) metrics.loc[:, '@depth'] = metrics.loc[:, '@depth'].astype(str) metrics.loc[:, '@depth'] = metrics.loc[:, '@depth'].replace(str(MAX_DEPTH), 'all') if args.output: metrics.to_csv(args.output, sep='\t', index=False) logger.info(f'Saved results to file: {args.output}') else: print(metrics.to_string(index=False))
def parse_qrels(input_file, labels_to_keep=None): ret = TrecQrel(input_file) if labels_to_keep is None: return ret else: ret.filename = None ret.qrels_data['tmp_delete_me'] = ret.qrels_data['docid'].apply( lambda i: labels_to_keep.keep_doc(i)) ret.qrels_data = ret.qrels_data[ret.qrels_data['tmp_delete_me']] ret.qrels_data = ret.qrels_data.drop(['tmp_delete_me'], axis=1) return ret
def collect(qrelsFilePath, baseDir): qrels = TrecQrel(qrelsFilePath) result = {} for i, [topicPath, topicNum] in enumerate( sorted(_getDirectoryContent(baseDir, directory=True), key=lambda a_b: int(a_b[1]))): for modelPath, modelName in _getDirectoryContent(topicPath, directory=True): modelName = modelName[:-4] if modelName not in result: result[modelName] = {} for filePath, fileName in _getDirectoryContent(modelPath, file=True): score = 0 # only evaluate non empty files if os.path.getsize(filePath) > 0: run = TrecRun(filePath) runResult = run.evaluate_run(qrels, True) rs = list( runResult.get_results_for_metric('P_10').values()) score = np.mean(rs) if fileName not in result[modelName]: result[modelName][fileName] = [score] else: result[modelName][fileName].append(score) print("Finished processing model {} of topic {}".format( modelName, topicNum)) print("Finished processing topic: ", topicNum) # Calculate average over all topics for modelName in result: for comparisonName in result[modelName]: result[modelName][comparisonName] = sum( result[modelName][comparisonName]) / len( result[modelName][comparisonName]) return result
(QREL_DIR + 'qrels.inofficial.duplicate-free.web.' + topics + '.txt', 'cw09'), (transferred_prefix + 'transferred-to-cc15.' + topics + '.txt', 'cc15') ] if topics in ['1-50', '51-100', '101-150', '151-200']: ret += [ (transferred_prefix + 'transferred-to-cw12.' + topics + '.txt', 'cw12'), (transferred_prefix + 'transferred-to-cw12wb12.' + topics + '.txt', 'cw12wb12') ] return ret if __name__ == '__main__': args = parse_args() with open(args.outputFile, 'w+') as f: for topics in [ '1-50', '51-100', '101-150', '151-200', '201-250', '251-300' ]: qrel_files_for_topic = qrel_files(topics) for qrel_file, corpus in qrel_files_for_topic: trec_qrel = TrecQrel(qrel_file) for run_file in list_run_files_of_qrel_file( qrel_files_for_topic[0][0]): print(corpus + ': evaluate ' + run_file) f.write( report_run(trec_qrel, corpus, topics, run_file) + '\n')
from trectools import TrecRun, TrecQrel, TrecRes, misc import matplotlib.pylab as plt import collections from os import listdir from os.path import isfile, join import numpy as np #retrieval_approaches = ['lm', 'prf'] #retrieval_approaches = ['lm', 'prf', 'lr'] retrieval_approaches = ['prf'] myQrel = TrecQrel("../../data/runs/iterative_qrel") print 'qrel description ' #print myQrel.describe() #results = [] for retrieval_approach in retrieval_approaches: print retrieval_approach if retrieval_approach == 'lm': mypath = "../../data/runs/iterative_lm_run_dir" elif retrieval_approach == 'prf': mypath = "../../data/runs/iterative_prf_run_dir" elif retrieval_approach == 'lr': mypath = "../../data/runs/iterative_lr_run_dir/clean" else: mypath = "../../data/runs/iterative_lr_ir_run_dir/" run_files = [ join(mypath, f) for f in listdir(mypath)
import sys from trectools import TrecQrel filename = sys.argv[1] tqrel = TrecQrel(filename) df = tqrel.qrels_data.copy() def multiply_the_bread(x): for a in range(1,7): print str(x["query"]) + "00" + str(a), x["q0"], x["filename"], x["rel"] df.apply(multiply_the_bread, axis=1)
def map(self): qrels_file = TrecQrel("./Data/qrel.txt") path_to_runs = TrecRun("./Data/run.txt") te = TrecEval(path_to_runs, qrels_file) dic = {"map": te.get_map(), "ndcg": te.get_ndcg()} return dic
def evaluation(topics, r_test, ix): # Recall-precision curves for different output sizes # MAP # BPREF # Cumulative gains # Efficiency # train_corpus = process_documents(corpus_directory, train=True) # Stemmed documents # test_corpus = process_documents(corpus_directory, train=False) # Stemmed documents # processed_topics = process_topics(topic_directory, stemmed=True) # Stemmed topics # train_rels = extract_relevance(qrels_train_directory) print("Executing boolean queries...") unranked_results = [boolean_query(topic, k, ix) for topic in topics] print("Executing TF-IDF queries...") tfidf_results = [ranking(topic, p, ix, "TF-IDF") for topic in topics] # print("Executing TF-IDF queries (with classifier input)...") # tfidf_results = ranking_with_classifier(train_corpus, test_corpus, train_rels, topic_ids, 500, ix, 1.0, 0.0) print("Executing BM25 queries...") bm25_results = [ranking(topic, p, ix, "BM25") for topic in topics] # print("Executing queries with pagerank input (reusing BM25 directory):") # bm25_results = ranking_with_pagerank(test_corpus, processed_topics, docs_to_test, "BM25", ix, # threshold=0.4, use_priors=True, weighted=True, alpha1=0.25, alpha2=0.75) # Query results are stored in temp/<scoring>/runs.txt, where scoring can either be "boolean", "tfidf" or "bm25" # Creating runs files for TrecTools print("Writing prep files...") boolean_runs = os.path.join("runs", "boolean.txt") with open(boolean_runs, "w") as f: for i, topic in enumerate(unranked_results): for j, r in enumerate(topic): f.write(f"{topics[i]} Q0 {r} {j+1} 1 booleanIR\n") tfidf_runs = os.path.join("runs", "tfidf.txt") with open(tfidf_runs, "w") as f: for i, topic in enumerate(tfidf_results): for j, r in enumerate(topic): f.write(f"{topics[i]} Q0 {r[0]} {j+1} {r[1]} tfidfIR\n") bm25_runs = os.path.join("runs", "bm25.txt") with open(bm25_runs, "w") as f: for i, topic in enumerate(bm25_results): for j, r in enumerate(topic): f.write(f"{topics[i]} Q0 {r[0]} {j+1} {r[1]} bm25IR\n") # Creating qrels file with the right format (at temp/qrelstest.txt) qrels_file = os.path.join("runs", "qrelstest.txt") with open(qrels_file, "w") as new: with open(r_test, "r") as f: for line in f: topic, doc, relevant = line.split() if int(topic[1:]) in topics: new.write(f"{topic[1:]} 0 {doc} {relevant}\n") # Judgment qrels = TrecQrel(qrels_file) # Evaluation files are stored in temp/<scoring>/eval.csv, where scoring can either be "boolean", "tfidf" or "bm25" # Unranked evaluation print("Beginning evaluation for boolean retrieval.") evaluate_boolean(qrels_file, unranked_results, topics) print("Done!") # TF-IDF evaluation print("Beginning evaluation for TF-IDF retrieval.") evaluate(qrels, tfidf_runs, topics, "tfidf") print("Plotting Precision-Recall curves for each topic...") plot_rp_curve(qrels, topics, tfidf_runs, tfidf_results, "tfidf") print("Done!") # BM25 evaluation print("Beginning evaluation for BM25 retrieval.") evaluate(qrels, bm25_runs, topics, "bm25") print("Plotting Precision-Recall curves for each topic...") plot_rp_curve(qrels, topics, bm25_runs, bm25_results, "bm25") print("Done!") print( "All evaluations finished. You can see detailed results in the 'eval' folder." )
yield json.dumps({ 'corpus': extract_corpus(run_file_name), 'topic': query, 'tag': system, "bpref": r['Bpref@1000'], "pseudoNDCG@10": r['NDCG@10'], "pseudoNDCG": r['NDCG@1000'] }) if __name__ == '__main__': args = parse_args() with open(args.outputFile + '-per-query-zero-scores-removed.jsonl', 'w+') as f: for run_file_dir in RUN_FILE_DIR_TO_QRELS: trec_qrel = TrecQrel(RUN_FILE_DIR_TO_QRELS[run_file_dir]) for run_file in glob.glob(args.inputDir + run_file_dir + '/final-rankings/*.txt', recursive=True): print('Evaluate ' + run_file) for report_line in report_run_per_query( trec_qrel, run_file, remove_docs_with_zero_score=True): f.write(report_line + '\n') with open(args.outputFile + '-zero-scores-removed.jsonl', 'w+') as f: for run_file_dir in RUN_FILE_DIR_TO_QRELS: trec_qrel = TrecQrel(RUN_FILE_DIR_TO_QRELS[run_file_dir]) for run_file in glob.glob(args.inputDir + run_file_dir + '/final-rankings/*.txt', recursive=True): print('Evaluate ' + run_file)
import json import pandas as pd import multiprocessing as mp from trectools import TrecQrel from os import listdir THRESHOLD = 0.82 DIR = '/mnt/ceph/storage/data-in-progress/data-research/web-search/SIGIR-21/sigir21-deduplicate-trec-run-files/' qrels = None TRACK_TO_QRELS = { '18': TrecQrel(DIR + 'qrel-files/qrels-web-2009.txt'), '19': TrecQrel(DIR + 'qrel-files/qrels-web-2010.txt'), '20': TrecQrel(DIR + 'qrel-files/qrels-web-2011.txt'), '21': TrecQrel(DIR + 'qrel-files/qrels-web-2012.txt'), '22': TrecQrel(DIR + 'qrel-files/qrels-web-2013.txt'), '23': TrecQrel(DIR + 'qrel-files/qrels-web-2014.txt'), } def analyze_line(line): dedup_data = json.loads(line) topic = dedup_data['topic'] judged_docs = set( qrels.qrels_data[(qrels.qrels_data['query'] == int(topic))]['docid']) irrelevant_docs = set( qrels.qrels_data[(qrels.qrels_data['query'] == int(topic)) & (qrels.qrels_data['rel'] <= 0)]['docid']) ret = [] for sim in dedup_data['similarities']: is_judged = sim['firstId'] in judged_docs or sim[
def evaluation(topics, r_test, ix): # Recall-precision curves for different output sizes # MAP # BPREF # Cumulative gains # Efficiency print("Executing boolean queries...") unranked_results = [boolean_query(topic, k, ix) for topic in topics] print("Executing TF-IDF queries...") tfidf_results = [ranking(topic, p, ix, "TF-IDF") for topic in topics] print("Executing BM25 queries...") bm25_results = [ranking(topic, p, ix, "BM25") for topic in topics] # Query results are stored in temp/<scoring>/runs.txt, where scoring can either be "boolean", "tfidf" or "bm25" # Creating runs files for TrecTools print("Writing prep files...") boolean_runs = os.path.join("runs", "boolean.txt") with open(boolean_runs, "w") as f: for i, topic in enumerate(unranked_results): for j, r in enumerate(topic): f.write(f"{topics[i]} Q0 {r} {j+1} 1 booleanIR\n") tfidf_runs = os.path.join("runs", "tfidf.txt") with open(tfidf_runs, "w") as f: for i, topic in enumerate(tfidf_results): for j, r in enumerate(topic): f.write(f"{topics[i]} Q0 {r[0]} {j+1} {r[1]} tfidfIR\n") bm25_runs = os.path.join("runs", "bm25.txt") with open(bm25_runs, "w") as f: for i, topic in enumerate(bm25_results): for j, r in enumerate(topic): f.write(f"{topics[i]} Q0 {r[0]} {j+1} {r[1]} bm25IR\n") # Creating qrels file with the right format (at temp/qrelstest.txt) qrels_file = os.path.join("runs", "qrelstest.txt") with open(qrels_file, "w") as new: with open(r_test, "r") as f: for line in f: topic, doc, relevant = line.split() if int(topic[1:]) in topics: new.write(f"{topic[1:]} 0 {doc} {relevant}\n") # Judgment qrels = TrecQrel(qrels_file) # Evaluation files are stored in temp/<scoring>/eval.csv, where scoring can either be "boolean", "tfidf" or "bm25" # Unranked evaluation print("Beginning evaluation for boolean retrieval.") evaluate_boolean(qrels_file, unranked_results, topics) print("Done!") # TF-IDF evaluation print("Beginning evaluation for TF-IDF retrieval.") evaluate(qrels, tfidf_runs, topics, "tfidf") print("Plotting Precision-Recall curves for each topic...") plot_rp_curve(qrels, topics, tfidf_runs, tfidf_results, "tfidf") print("Done!") # BM25 evaluation print("Beginning evaluation for BM-25 retrieval.") evaluate(qrels, bm25_runs, topics, "bm25") print("Plotting Precision-Recall curves for each topic...") plot_rp_curve(qrels, topics, bm25_runs, bm25_results, "bm25") print("Done!") print( "All evaluations finished. You can see detailed results in the 'eval' folder." )
porter_stemmer = PorterStemmer() regex = re.compile('[%s]' % re.escape(string.punctuation)) stopwords = list(set(stopwords.words('english'))) #[line.strip() for line in open("ENstopwords891.txt", 'r', encoding='utf-8').readlines()] new_punctuation = list(string.punctuation) documents = [i.get_text() for i in paragraphs] for no, doc in enumerate(documents): documents[no] = " ".join([ porter_stemmer.stem(i) for i in regex.sub(' ', doc).split() if (i != " ") & (i not in stopwords) & (not i.isdigit()) & (i not in new_punctuation) ]) # pre-process: stemming pickle.dump(documents, open('processed_data\processed_paragraph.pkl', 'wb')) A = TrecQrel( "H://dataset//data//test200-train//train.pages.cbor-article.qrels") article = A.qrels_data H = TrecQrel( "H://dataset//data//test200-train//train.pages.cbor-hierarchical.qrels") hierarchical = H.qrels_data T = TrecQrel( "H://dataset//data//test200-train//train.pages.cbor-toplevel.qrels") toplevel = T.qrels_data #Combine the query files abour paragraph retrieval combine = list(toplevel['query']) combine.extend(list(hierarchical['query'])) combine.extend(list(article['query'])) combine = np.unique(combine) flat_query = [] for query in combine:
from trectools import TrecQrel from elasticsearch import Elasticsearch, NotFoundError import os import codecs es = Elasticsearch(["40.68.209.241:9200"]) qrels = TrecQrel("./data/clef-dynamic-topic-subset-click-data.txt") nmissing = 0 npages = 0 for topic in qrels.topics(): if not os.path.exists(os.path.join("data", "topic%d" % (topic))): os.makedirs(os.path.join("data", "topic%d" % (topic))) os.makedirs(os.path.join("data", "topic%d" % (topic), "pos")) os.makedirs(os.path.join("data", "topic%d" % (topic), "neg")) for docid in qrels.get_document_names_for_topic(topic): npages += 1 print "Downloading %s" % (docid) try: r = es.get(index="clueweb12_docs", id=docid) except NotFoundError: print "Missing: %s" % (docid) nmissing += 1 continue if qrels.get_judgement(docid, topic): outpath = os.path.join("data", "topic%d" % (topic), "pos", docid) else:
from trectools import TrecRun, TrecQrel from trectools import procedures import glob import os task1_run_filepath = "../runs_t1/" qrels_top = "../qrels/task1.qrels" filepath = glob.glob(os.path.join(task1_run_filepath, "*.txt")) topqrels = TrecQrel(qrels_top) results = [] for filename in filepath: r = TrecRun(filename) res = r.evaluate_run(topqrels) results.append(res) p10 = procedures.get_results(results, "P_10") procedures.plot_system_rank("task1_p10.jpg", p10, "P@10") bpref = procedures.get_results(results, "bpref") procedures.plot_system_rank("task1_bpref.jpg", bpref, "BPREF") map_ = procedures.get_results(results, "map") procedures.plot_system_rank("task1_map.jpg", map_, "MAP")
if '.web.251-300' in qrel_file_name: return '/mnt/ceph/storage/data-in-progress/trec-system-runs/trec23/web.adhoc/' raise ValueError('Could not handle: ' + str(qrel_file_name)) if __name__ == '__main__': label_transfers = ['', 'cw12-url', 'wayback-cw12-url'] for track in [ 'qrels-web-2009', 'qrels-web-2010', 'qrels-web-2011', 'qrels-web-2012' ]: track_display_name = track.replace('qrels-', '') qrels = { 'orig': TrecQrel('data/' + track), 'cw12-url': TrecQrel('data/' + track + '-cw12-url'), 'wayback-cw12-url': TrecQrel('data/' + track + '-wayback-cw12-url'), } track_eval_data = [] for run_file in list_run_files_of_qrel_file(track): run = TrecRun(run_file) run_file_eval = { 'run': run.get_runid(), 'track': track_display_name }
import os from trectools import TrecQrel, procedures qrels_file = os.path.join('gov', 'qrels', 'gov.qrels') qrels = TrecQrel(qrels_file) path_to_runs = 'runs' runs = procedures.list_of_runs_from_path(path_to_runs, '*.runs') results = procedures.evaluate_runs(runs, qrels, per_query=False) metrics = ['map', 'Rprec', 'recip_rank', 'P_5', 'P_10', 'P_15'] for metric in metrics: print( f'{metric}: {procedures.extract_metric_from_results(results, metric)[0][1]}' )
gs_path, pred_path, codes_path = parse_arguments() ###### 0. Load valid codes lists: ###### valid_codes = set( pd.read_csv(codes_path, sep='\t', header=None, usecols=[0])[0].tolist()) valid_codes = set([x.lower() for x in valid_codes]) ###### 1. Format GS as TrecQrel format: ###### format_gs(gs_path, './intermediate_gs_file.txt') ###### 2. Format predictions as TrecRun format: ###### format_predictions(pred_path, './intermediate_predictions_file.txt', valid_codes) ###### 3. Calculate MAP ###### # Load GS from qrel file qrels = TrecQrel('./intermediate_gs_file.txt') # Load pred from run file run = TrecRun('./intermediate_predictions_file.txt') # Calculate MAP te = TrecEval(run, qrels) MAP = te.get_map( trec_eval=False ) # With this option False, rank order is taken from the given document order ###### 4. Show results ###### print('\nMAP estimate: {}\n'.format(round(MAP, 3)))