def evaluate_run(self, a_trec_qrel, outfile=None, printfile=True, debug=False): """ It is necessary to have trec_eval set on your PATH run this function. """ if printfile: if not outfile: outfile = self.get_full_filename_path() + ".res" cmd = "trec_eval -q %s %s > %s" % (a_trec_qrel.get_full_filename_path(), self.get_full_filename_path(), outfile) self.evaluate_external_script(cmd, debug) return TrecRes(outfile) else: cmd = "trec_eval -q %s %s > .tmp_res" % (a_trec_qrel.get_full_filename_path(), self.get_full_filename_path()) self.evaluate_external_script(cmd, debug) res = TrecRes(".tmp_res") sarge.run("rm -f .tmp_res") return res
def evaluate_ubire(self, a_trec_qrel, a_trec_other, p=0.8, stoprank=10, outfile=None, extension="ures", printfile=True, debug=False): """ It is necessary to have ubire.jar set on your classpath to run this function. """ if not os.path.isfile(os.path.join(os.getcwd(), "ubire.jar")): print("File ubire.jar was not found in the current directory.") print("Please move it here (%s) and run this procedure again." % (os.getcwd())) return None if printfile: if not outfile: outfile = self.get_full_filename_path() + "." + extension cmd = "java -jar ubire.jar -q --qrels-file=%s --qread-file=%s --readability --rbp-p=%f --stoprank=%d --ranking-file=%s > %s" % (a_trec_qrel.get_full_filename_path(), a_trec_other.get_full_filename_path(), p, stoprank, self.get_full_filename_path(), outfile) self.evaluate_external_script(cmd, debug) return TrecRes(outfile) else: cmd = "java -jar ubire.jar -q --qrels-file=%s --qread-file=%s --readability --rbp-p=%f --stoprank=%d --ranking-file=%s > .tmp_ures" % (a_trec_qrel.get_full_filename_path(), a_trec_other.get_full_filename_path(), p, stoprank, self.get_full_filename_path()) self.evaluate_external_script(cmd, debug) res = TrecRes(".tmp_ubire") sarge.run("rm -f .tmp_ubire") return res
def evaluateAll(self, per_query=False): """ Runs all evaluation metrics as the default trec_eval tool. Params ------- per_query: If True, runs the evaluation per query. Default = False Returns -------- An TrecRes object """ run_id = self.run.get_runid() results_per_query = [] if per_query: bpref_pq = self.getBpref(depth=1000, per_query=True, trec_eval=True).reset_index() bpref_pq["metric"] = "bpref" bpref_pq.rename(columns={"Bpref@1000":"value"}, inplace=True) results_per_query.append(bpref_pq) for v in [5, 10, 15, 20, 30, 100, 200, 500, 1000]: precision_per_query = self.getPrecision(depth=v, per_query=True, trec_eval=True).reset_index() precision_per_query["metric"] = "P_%d" % (v) precision_per_query.rename(columns={"P@%d" % (v): "value"}, inplace=True) results_per_query.append(precision_per_query) map_pq = self.getMAP(depth=1000, per_query=True, trec_eval=True).reset_index() map_pq["metric"] = "map" map_pq.rename(columns={"MAP@1000":"value"}, inplace=True) results_per_query.append(map_pq) num_ret = self.getRetrievedDocuments(per_query=True).reset_index() num_ret["metric"] = "num_ret" num_ret.rename(columns={"docid":"value"}, inplace=True) results_per_query.append(num_ret) num_rel = self.getRelevantDocuments(per_query=True).reset_index() num_rel["metric"] = "num_rel" num_rel.rename(columns={"relevant_per_query":"value"}, inplace=True) results_per_query.append(num_rel) num_rel_ret = self.getRelevantRetrievedDocuments(per_query=True).reset_index() num_rel_ret["metric"] = "num_rel_ret" num_rel_ret.rename(columns={"rel":"value"}, inplace=True) results_per_query.append(num_rel_ret) rprec = self.getRPrec(per_query=True).reset_index() rprec["metric"] = "Rprec" rprec.rename(columns={"RPrec@1000":"value"}, inplace=True) results_per_query.append(rprec) recip_rank = self.getReciprocalRank(per_query=True).reset_index() recip_rank["metric"] = "recip_rank" recip_rank.rename(columns={"recip_rank@1000":"value"}, inplace=True) results_per_query.append(recip_rank) ps = {} for v in [5, 10, 15, 20, 30, 100, 200, 500, 1000]: ps[v] = self.getPrecision(depth=v, per_query=False, trec_eval=True) map_ = self.getMAP(depth=10000, per_query=False, trec_eval=True) gm_map_ = self.getGeometricMAP(depth=10000, trec_eval=True) bpref_ = self.getBpref(depth=1000, per_query=False, trec_eval=True) rprec_ = self.getRPrec(depth=1000, per_query=False, trec_eval=True) recip_rank_ = self.getReciprocalRank(depth=1000, per_query=False, trec_eval=True) rows = [ {"metric": "runid", "query": "all", "value": run_id}, {"metric": "num_ret", "query": "all", "value": self.getRetrievedDocuments(per_query=False)}, {"metric": "num_rel", "query": "all", "value": self.getRelevantDocuments(per_query=False)}, {"metric": "num_rel_ret", "query": "all", "value": self.getRelevantRetrievedDocuments(per_query=False)}, {"metric": "num_q", "query": "all", "value": len(self.run.topics())}, {"metric": "map", "query": "all", "value": map_}, {"metric": "gm_map", "query": "all", "value": gm_map_}, {"metric": "bpref", "query": "all", "value": bpref_}, {"metric": "Rprec", "query": "all", "value": rprec_}, {"metric": "recip_rank", "query": "all", "value": recip_rank_}, {"metric": "P_5", "query": "all", "value": ps[5]}, {"metric": "P_10", "query": "all", "value": ps[10]}, {"metric": "P_15", "query": "all", "value": ps[15]}, {"metric": "P_20", "query": "all", "value": ps[20]}, {"metric": "P_30", "query": "all", "value": ps[30]}, {"metric": "P_100", "query": "all", "value": ps[100]}, {"metric": "P_200", "query": "all", "value": ps[200]}, {"metric": "P_500", "query": "all", "value": ps[500]}, {"metric": "P_1000", "query": "all", "value": ps[1000]}, ] # TODO: iprec_at_recall_LEVEL is missing from the default trec_eval metrics rows = pd.DataFrame(rows) if len(results_per_query) > 0: results_per_query = pd.concat(results_per_query) rows = pd.concat((results_per_query, rows), sort=True).reset_index(drop=True) res = TrecRes() res.data = rows res.runid = run_id return res
def evaluate_all(self, per_query=False): """ Runs all evaluation metrics as the default trec_eval tool. Params ------- per_query: If True, runs the evaluation per query. Default = False Returns -------- An TrecRes object """ run_id = self.run.get_runid() results_per_query = [] if per_query: bpref_pq = self.get_bpref(depth=1000, per_query=True, trec_eval=True).reset_index() bpref_pq["metric"] = "bpref" bpref_pq.rename(columns={"Bpref@1000": "value"}, inplace=True) results_per_query.append(bpref_pq) for v in [5, 10, 15, 20, 30, 100, 200, 500, 1000]: precision_per_query = self.get_precision( depth=v, per_query=True, trec_eval=True).reset_index() precision_per_query["metric"] = "P_%d" % (v) precision_per_query.rename(columns={"P@%d" % (v): "value"}, inplace=True) results_per_query.append(precision_per_query) map_pq = self.get_map(depth=1000, per_query=True, trec_eval=True).reset_index() map_pq["metric"] = "map" map_pq.rename(columns={"MAP@1000": "value"}, inplace=True) results_per_query.append(map_pq) num_ret = self.get_retrieved_documents( per_query=True).reset_index() num_ret["metric"] = "num_ret" num_ret.rename(columns={"docid": "value"}, inplace=True) results_per_query.append(num_ret) num_rel = self.get_relevant_documents(per_query=True).reset_index() num_rel["metric"] = "num_rel" num_rel.rename(columns={"relevant_per_query": "value"}, inplace=True) results_per_query.append(num_rel) num_rel_ret = self.get_relevant_retrieved_documents( per_query=True).reset_index() num_rel_ret["metric"] = "num_rel_ret" num_rel_ret.rename(columns={"rel": "value"}, inplace=True) results_per_query.append(num_rel_ret) rprec = self.get_rprec(per_query=True).reset_index() rprec["metric"] = "Rprec" rprec.rename(columns={"RPrec@1000": "value"}, inplace=True) results_per_query.append(rprec) recip_rank = self.get_reciprocal_rank(per_query=True).reset_index() recip_rank["metric"] = "recip_rank" recip_rank.rename(columns={"recip_rank@1000": "value"}, inplace=True) results_per_query.append(recip_rank) ps = {} for v in [5, 10, 15, 20, 30, 100, 200, 500, 1000]: ps[v] = self.get_precision(depth=v, per_query=False, trec_eval=True) map_ = self.get_map(depth=10000, per_query=False, trec_eval=True) gm_map_ = self.get_geometric_map(depth=10000, trec_eval=True) bpref_ = self.get_bpref(depth=1000, per_query=False, trec_eval=True) rprec_ = self.get_rprec(depth=1000, per_query=False, trec_eval=True) recip_rank_ = self.get_reciprocal_rank(depth=1000, per_query=False, trec_eval=True) rows = [ { "metric": "runid", "query": "all", "value": run_id }, { "metric": "num_ret", "query": "all", "value": self.get_retrieved_documents(per_query=False) }, { "metric": "num_rel", "query": "all", "value": self.get_relevant_documents(per_query=False) }, { "metric": "num_rel_ret", "query": "all", "value": self.get_relevant_retrieved_documents(per_query=False) }, { "metric": "num_q", "query": "all", "value": len(self.run.topics()) }, { "metric": "map", "query": "all", "value": map_ }, { "metric": "gm_map", "query": "all", "value": gm_map_ }, { "metric": "bpref", "query": "all", "value": bpref_ }, { "metric": "Rprec", "query": "all", "value": rprec_ }, { "metric": "recip_rank", "query": "all", "value": recip_rank_ }, { "metric": "P_5", "query": "all", "value": ps[5] }, { "metric": "P_10", "query": "all", "value": ps[10] }, { "metric": "P_15", "query": "all", "value": ps[15] }, { "metric": "P_20", "query": "all", "value": ps[20] }, { "metric": "P_30", "query": "all", "value": ps[30] }, { "metric": "P_100", "query": "all", "value": ps[100] }, { "metric": "P_200", "query": "all", "value": ps[200] }, { "metric": "P_500", "query": "all", "value": ps[500] }, { "metric": "P_1000", "query": "all", "value": ps[1000] }, ] # TODO: iprec_at_recall_LEVEL is missing from the default trec_eval metrics rows = pd.DataFrame(rows) if len(results_per_query) > 0: results_per_query = pd.concat(results_per_query) rows = pd.concat((results_per_query, rows), sort=True).reset_index(drop=True) res = TrecRes() res.data = rows res.runid = run_id return res