def report_run_per_query(qrels, run_file_name, remove_docs_with_zero_score=False): run = TrecRun(run_file_name) system = run.run_data['system'][0] if remove_docs_with_zero_score: run.run_data = run.run_data[run.run_data['score'] > 0] trec_eval = TrecEval(run, qrels) bpref = trec_eval.getBpref(per_query=True) ndcg_10 = trec_eval.getNDCG(depth=10, per_query='query') ndcg = trec_eval.getNDCG(per_query='query') ret = bpref.join(ndcg_10, on='query') ret = ret.join(ndcg, on='query') for query, r in ret.iterrows(): yield json.dumps({ 'corpus': extract_corpus(run_file_name), 'topic': query, 'tag': system, "bpref": r['Bpref@1000'], "pseudoNDCG@10": r['NDCG@10'], "pseudoNDCG": r['NDCG@1000'] })
def report_run(qrels, run_file_name, remove_docs_with_zero_score=False): run = TrecRun(run_file_name) system = run.run_data['system'][0] if remove_docs_with_zero_score: run.run_data = run.run_data[run.run_data['score'] > 0] trec_eval = TrecEval(run, qrels) ret = { 'corpus': extract_corpus(run_file_name), 'topics': extract_topics(run_file_name), 'tag': system, "bpref": trec_eval.getBpref(), "pseudoNDCG@10": trec_eval.getNDCG(depth=10), "pseudoNDCG": trec_eval.getNDCG() } return json.dumps(ret)
def reciprocal_rank_fusion(trec_runs, k=60, max_docs=1000, output=sys.stdout): """ Implements a reciprocal rank fusion as define in ``Reciprocal Rank fusion outperforms Condorcet and individual Rank Learning Methods`` by Cormack, Clarke and Buettcher. Parameters: k: term to avoid vanishing importance of lower-ranked documents. Default value is 60 (default value used in their paper). output: a file pointer to write the results. Sys.stdout is the default. """ outputRun = TrecRun() rows = [] topics = trec_runs[0].topics() for topic in sorted(topics): doc_scores = {} for r in trec_runs: docs_for_run = r.get_top_documents(topic, n=1000) for pos, docid in enumerate(docs_for_run, start=1): doc_scores[docid] = doc_scores.get(docid, 0.0) + 1.0 / (k + pos) # Writes out information for this topic for rank, (docid, score) in enumerate(sorted(iter(doc_scores.items()), key=lambda x: (-x[1], x[0]))[:max_docs], start=1): # output.write("%s Q0 %s %d %f reciprocal_rank_fusion_k=%d\n" % (str(topic), docid, rank, score, k)) rows.append((topic, "Q0", docid, rank, score, "reciprocal_rank_fusion_k=%d" % k)) df = pd.DataFrame(rows) df.columns = ["query", "q0", "docid", "rank", "score", "system"] df["q0"] = df["q0"].astype(np.str) outputRun.run_data = df.copy() return outputRun