예제 #1
0
 def __init__(self, **kwargs):
     super(FusionAnalysis, self).__init__(**kwargs)
     self.h_q_info = load_query_info(self.q_info)
     self.out_dir = os.path.join(
         self.out_dir, ntpath.basename(self.target_eva.split('.')[0]))
     if not os.path.exists(self.out_dir):
         os.makedirs(self.out_dir)
     self.h_q_eva = dict(load_gdeval_res(self.target_eva)[0])
     self.h_base_q_eva = dict(load_gdeval_res(self.base_eva)[0])
def get_rel_ndcg(eva_res, base_eva_res):
    l_q_eva, __, __ = load_gdeval_res(eva_res)
    l_base_q_eva, __, __ = load_gdeval_res(base_eva_res)
    h_q_rel_ndcg = dict()
    h_base_q_eva = dict(l_base_q_eva)
    for q, (ndcg, __) in l_q_eva:
        base_ndcg = h_base_q_eva.get(q, [0, 0])[0]
        rel = ndcg - base_ndcg
        h_q_rel_ndcg[q] = rel

    return h_q_rel_ndcg
예제 #3
0
def perfect_merge(eva_a_in, eva_b_in):
    l_q_eva_a, ndcg_a, err_a = load_gdeval_res(eva_a_in)
    l_q_eva_b, ndcg_b, err_b = load_gdeval_res(eva_b_in)
    for p in xrange(11):
        prob = p * 0.1
        l_q_best_eva, best_ndcg, best_err = pick_best(l_q_eva_a, l_q_eva_b, prob)
        # print "%.2f,amean,%.6f,%.6f" % (prob, best_ndcg, best_err)
        print '%.2f%%,relative,' % (prob * 100) +  \
              "{0:.02f}%".format((best_ndcg / max(ndcg_a, ndcg_b) - 1) * 100) + "," + \
              "{0:.02f}%".format((best_err / max(err_a, err_b) - 1) * 100)
    return
def linking_merge(eva_a_in, eva_b_in, q_info_in, q_manual_info_in, out_name):
    l_qid_eva_a, ndcg_a, err_a = load_gdeval_res(eva_a_in)
    l_qid_eva_b, ndcg_b, err_b = load_gdeval_res(eva_b_in)
    h_q_f1 = calc_q_link_accuracy(q_info_in, q_manual_info_in)
    out = open(out_name, 'w')
    for p in xrange(11):
        f1_bar = p * 0.1
        l_q_merge_eva, merge_ndcg, merge_err = pick_via_q_linking_accuracy(
            l_qid_eva_a, l_qid_eva_b, h_q_f1, f1_bar)
        # print "%.2f,amean,%.6f,%.6f" % (prob, best_ndcg, best_err)
        print >> out, '%.2f%%,relative,' % (f1_bar * 100) + \
              "{0:.02f}%".format((merge_ndcg / max(ndcg_a, ndcg_b) - 1) * 100) + "," + \
              "{0:.02f}%".format((merge_err / max(err_a, err_b) - 1) * 100)
    out.close()
    return
예제 #5
0
 def __init__(self, **kwargs):
     super(PrettyCompEAtt, self).__init__(**kwargs)
     self.l_h_q_eva = [
         dict(load_gdeval_res(eval_in, False)) for eval_in in self.l_eval_in
     ]
     self.l_h_qid_e_att = [
         self._load_e_att(att_in) for att_in in self.l_e_att_in
     ]
     logging.info('eval res and e att res loaded')
 def per_cv_dir_eval(self, cv_dir):
     logging.info('start [%s]', cv_dir)
     collect_cv_results(cv_dir, self.qrel)
     method_base = ntpath.basename(cv_dir.strip('/'))
     this_out_dir = os.path.join(self.out_dir, method_base)
     if not os.path.exists(this_out_dir):
         os.makedirs(this_out_dir)
     # subprocess.check_output(['cp', cv_dir + '/eval', cv_dir + '/trec', this_out_dir])
     shutil.copyfile(cv_dir + '/eval', this_out_dir + '/eval')
     shutil.copyfile(cv_dir + '/trec', this_out_dir + '/trec')
     logging.info('res moved to [%s]', this_out_dir)
     __, ndcg, err = load_gdeval_res(cv_dir + '/eval')
     return method_base, ndcg, err
    def process(self, eva_in, out_name):
        l_q_eva = load_gdeval_res(eva_in, with_mean=False)
        l_avg_doc_len = []
        l_ndcg = []
        for q, eva in l_q_eva:
            if not q in self.h_q_meta:
                logging.warn('q [%s] has no meta data', q)
            l_ndcg.append(eva[0])
            l_avg_doc_len.append(self.h_q_meta[q]['avg_doc_len'])

        l_bin_res, l_bin_range = bin_score(l_avg_doc_len, l_ndcg, self.nb_bin)
        h_res = {
            'avg_doc_len_bin': l_bin_res,
            'avg_doc_len_bin_rage': l_bin_range,
        }
        json.dump(h_res, open(out_name, 'w'), indent=1)
        logging.info('finished, results at [%s]', out_name)
 def _load_trec_eval_results(self, run_dir):
     h_eval_per_q = dict()
     h_eval = dict()
     for depth in self.l_target_depth:
         eva_res_name = os.path.join(run_dir,
                                     self.eva_prefix + '%02d' % depth)
         l_q_eva, ndcg, err = load_gdeval_res(eva_res_name)
         l_q_eva.sort(key=lambda item: int(item[0]))
         l_ndcg = [item[1][0] for item in l_q_eva]
         l_err = [item[1][1] for item in l_q_eva]
         for metric in self.l_target_metric:
             name = metric + '@%02d' % depth
             if metric == 'ndcg':
                 h_eval_per_q[name] = l_ndcg
                 h_eval[name] = ndcg
             elif metric == 'err':
                 h_eval_per_q[name] = l_err
                 h_eval[name] = err
             else:
                 logging.error('[%s] metric not implemented', metric)
                 raise NotImplementedError
     return h_eval_per_q, h_eval
예제 #9
0
 def __init__(self, **kwargs):
     super(QLenPerformanceAna, self).__init__(**kwargs)
     self.h_q_info = load_query_info(self.q_info_in)
     self.h_rel_ndcg = get_rel_ndcg(self.eva_in, self.base_eva_in)
     self.h_base_eva = dict(load_gdeval_res(self.base_eva_in, False))
     self.h_eva = dict(load_gdeval_res(self.eva_in, False))
예제 #10
0
generate query level label based on whether a method performs better than b
input:
    eva of a
    eva of b
output:
    q \t +1/-1
"""

from knowledge4ir.utils import load_gdeval_res
import sys

if 4 != len(sys.argv):
    print "3 para: eva 1 + eva 2 + q level label (1>2 or not)"
    sys.exit(-1)

l_q_eva_a = load_gdeval_res(sys.argv[1])[0]
l_q_eva_b = load_gdeval_res(sys.argv[2])[0]
h_q_eva_b = dict(l_q_eva_b)
out = open(sys.argv[3], 'w')

pos = 0
neg = 0
for q, (ndcg, err) in l_q_eva_a:
    y = 1
    if q in h_q_eva_b:
        if ndcg < h_q_eva_b[q][0]:
            y = -1
    if y > 0:
        pos += 1
    else:
        neg += 1