def __init__(self, **kwargs): super(FusionAnalysis, self).__init__(**kwargs) self.h_q_info = load_query_info(self.q_info) self.out_dir = os.path.join( self.out_dir, ntpath.basename(self.target_eva.split('.')[0])) if not os.path.exists(self.out_dir): os.makedirs(self.out_dir) self.h_q_eva = dict(load_gdeval_res(self.target_eva)[0]) self.h_base_q_eva = dict(load_gdeval_res(self.base_eva)[0])
def get_rel_ndcg(eva_res, base_eva_res): l_q_eva, __, __ = load_gdeval_res(eva_res) l_base_q_eva, __, __ = load_gdeval_res(base_eva_res) h_q_rel_ndcg = dict() h_base_q_eva = dict(l_base_q_eva) for q, (ndcg, __) in l_q_eva: base_ndcg = h_base_q_eva.get(q, [0, 0])[0] rel = ndcg - base_ndcg h_q_rel_ndcg[q] = rel return h_q_rel_ndcg
def perfect_merge(eva_a_in, eva_b_in): l_q_eva_a, ndcg_a, err_a = load_gdeval_res(eva_a_in) l_q_eva_b, ndcg_b, err_b = load_gdeval_res(eva_b_in) for p in xrange(11): prob = p * 0.1 l_q_best_eva, best_ndcg, best_err = pick_best(l_q_eva_a, l_q_eva_b, prob) # print "%.2f,amean,%.6f,%.6f" % (prob, best_ndcg, best_err) print '%.2f%%,relative,' % (prob * 100) + \ "{0:.02f}%".format((best_ndcg / max(ndcg_a, ndcg_b) - 1) * 100) + "," + \ "{0:.02f}%".format((best_err / max(err_a, err_b) - 1) * 100) return
def linking_merge(eva_a_in, eva_b_in, q_info_in, q_manual_info_in, out_name): l_qid_eva_a, ndcg_a, err_a = load_gdeval_res(eva_a_in) l_qid_eva_b, ndcg_b, err_b = load_gdeval_res(eva_b_in) h_q_f1 = calc_q_link_accuracy(q_info_in, q_manual_info_in) out = open(out_name, 'w') for p in xrange(11): f1_bar = p * 0.1 l_q_merge_eva, merge_ndcg, merge_err = pick_via_q_linking_accuracy( l_qid_eva_a, l_qid_eva_b, h_q_f1, f1_bar) # print "%.2f,amean,%.6f,%.6f" % (prob, best_ndcg, best_err) print >> out, '%.2f%%,relative,' % (f1_bar * 100) + \ "{0:.02f}%".format((merge_ndcg / max(ndcg_a, ndcg_b) - 1) * 100) + "," + \ "{0:.02f}%".format((merge_err / max(err_a, err_b) - 1) * 100) out.close() return
def __init__(self, **kwargs): super(PrettyCompEAtt, self).__init__(**kwargs) self.l_h_q_eva = [ dict(load_gdeval_res(eval_in, False)) for eval_in in self.l_eval_in ] self.l_h_qid_e_att = [ self._load_e_att(att_in) for att_in in self.l_e_att_in ] logging.info('eval res and e att res loaded')
def per_cv_dir_eval(self, cv_dir): logging.info('start [%s]', cv_dir) collect_cv_results(cv_dir, self.qrel) method_base = ntpath.basename(cv_dir.strip('/')) this_out_dir = os.path.join(self.out_dir, method_base) if not os.path.exists(this_out_dir): os.makedirs(this_out_dir) # subprocess.check_output(['cp', cv_dir + '/eval', cv_dir + '/trec', this_out_dir]) shutil.copyfile(cv_dir + '/eval', this_out_dir + '/eval') shutil.copyfile(cv_dir + '/trec', this_out_dir + '/trec') logging.info('res moved to [%s]', this_out_dir) __, ndcg, err = load_gdeval_res(cv_dir + '/eval') return method_base, ndcg, err
def process(self, eva_in, out_name): l_q_eva = load_gdeval_res(eva_in, with_mean=False) l_avg_doc_len = [] l_ndcg = [] for q, eva in l_q_eva: if not q in self.h_q_meta: logging.warn('q [%s] has no meta data', q) l_ndcg.append(eva[0]) l_avg_doc_len.append(self.h_q_meta[q]['avg_doc_len']) l_bin_res, l_bin_range = bin_score(l_avg_doc_len, l_ndcg, self.nb_bin) h_res = { 'avg_doc_len_bin': l_bin_res, 'avg_doc_len_bin_rage': l_bin_range, } json.dump(h_res, open(out_name, 'w'), indent=1) logging.info('finished, results at [%s]', out_name)
def _load_trec_eval_results(self, run_dir): h_eval_per_q = dict() h_eval = dict() for depth in self.l_target_depth: eva_res_name = os.path.join(run_dir, self.eva_prefix + '%02d' % depth) l_q_eva, ndcg, err = load_gdeval_res(eva_res_name) l_q_eva.sort(key=lambda item: int(item[0])) l_ndcg = [item[1][0] for item in l_q_eva] l_err = [item[1][1] for item in l_q_eva] for metric in self.l_target_metric: name = metric + '@%02d' % depth if metric == 'ndcg': h_eval_per_q[name] = l_ndcg h_eval[name] = ndcg elif metric == 'err': h_eval_per_q[name] = l_err h_eval[name] = err else: logging.error('[%s] metric not implemented', metric) raise NotImplementedError return h_eval_per_q, h_eval
def __init__(self, **kwargs): super(QLenPerformanceAna, self).__init__(**kwargs) self.h_q_info = load_query_info(self.q_info_in) self.h_rel_ndcg = get_rel_ndcg(self.eva_in, self.base_eva_in) self.h_base_eva = dict(load_gdeval_res(self.base_eva_in, False)) self.h_eva = dict(load_gdeval_res(self.eva_in, False))
generate query level label based on whether a method performs better than b input: eva of a eva of b output: q \t +1/-1 """ from knowledge4ir.utils import load_gdeval_res import sys if 4 != len(sys.argv): print "3 para: eva 1 + eva 2 + q level label (1>2 or not)" sys.exit(-1) l_q_eva_a = load_gdeval_res(sys.argv[1])[0] l_q_eva_b = load_gdeval_res(sys.argv[2])[0] h_q_eva_b = dict(l_q_eva_b) out = open(sys.argv[3], 'w') pos = 0 neg = 0 for q, (ndcg, err) in l_q_eva_a: y = 1 if q in h_q_eva_b: if ndcg < h_q_eva_b[q][0]: y = -1 if y > 0: pos += 1 else: neg += 1