def calc_q_link_accuracy(q_info_in, q_manual_info_in):
    h_qid_info = load_query_info(q_info_in)
    h_qid_manual_info = load_query_info(q_manual_info_in)
    h_q_f1 = {}
    for qid, h_info in h_qid_info.items():

        l_e = []
        if 'tagme' in h_info:
            l_e = [ana[0] for ana in h_info['tagme']['query']]
        elif 'cmns' in h_info:
            l_e = [ana[0] for ana in h_info['cmns']['query']]
        l_label_e = []
        if qid in h_qid_manual_info:
            l_label_e = [
                ana[0] for ana in h_qid_manual_info[qid]['manual']['query']
            ]
        if len(l_e) == 0 & len(l_label_e) == 0:
            h_q_f1[qid] = 1
            continue
        s_e = set(l_e)
        s_true = set(l_label_e)
        prec = 0
        recall = 0
        overlap = float(len(s_e.intersection(s_true)))
        if s_e:
            prec = overlap / len(s_e)
        if s_true:
            recall = overlap / len(s_true)
        if (prec == 0) | (recall == 0):
            f1 = 0
        else:
            f1 = 2.0 * prec * recall / (prec + recall)
        h_q_f1[qid] = f1
    print json.dumps(h_q_f1, indent=1)
    return h_q_f1
Beispiel #2
0
def process(q_info_in, out_name):
    h_q_info = load_query_info(q_info_in)
    bow_len, boe_len = avg_len(h_q_info)
    out = open(out_name, 'w')
    print >> out, 'bow_avg_len: %f\nboe_avg_len: %f' % (bow_len, boe_len)

    out.close()
Beispiel #3
0
 def __init__(self, **kwargs):
     super(FusionAnalysis, self).__init__(**kwargs)
     self.h_q_info = load_query_info(self.q_info)
     self.out_dir = os.path.join(
         self.out_dir, ntpath.basename(self.target_eva.split('.')[0]))
     if not os.path.exists(self.out_dir):
         os.makedirs(self.out_dir)
     self.h_q_eva = dict(load_gdeval_res(self.target_eva)[0])
     self.h_base_q_eva = dict(load_gdeval_res(self.base_eva)[0])
def get_target_surfaceforms(q_info_in):
    h_qid_info = load_query_info(q_info_in)
    h_surface = dict()
    for qid, h_info in h_qid_info.items():
        query = h_info['query']
        for ana in h_info['tagme']['query']:
            sf = query[ana[1]:ana[2]]
            h_surface[sf.lower()] = []
    print "total [%d] sf" % (len(h_surface))
    return h_surface
    def __init__(self, **kwargs):
        super(RankComponentAna, self).__init__(**kwargs)
        self.external_info = LeToRFeatureExternalInfo(**kwargs)
        self.embedding = self.external_info.l_embedding[0]
        self.h_entity_texts = self.external_info.h_entity_texts
        self.h_field_h_df = self.external_info.h_field_h_df
        self.h_corpus_stat = self.external_info.h_corpus_stat

        self.h_q_info = load_query_info(self.q_info_in)
        self.ll_qid_ranked_doc = load_trec_ranking_with_info(
            self.trec_with_info_in)
        self.h_qrel = load_trec_labels_dict(self.qrel_in)
        if not os.path.exists(self.out_dir):
            os.makedirs(self.out_dir)
Beispiel #6
0
    def pipe_extract(self, q_info_in=None, out_name=None):
        if not q_info_in:
            q_info_in = self.q_info_in
        if not out_name:
            out_name = self.out_name
        h_q_info = load_query_info(q_info_in)
        l_h_feature = []
        l_y = []
        l_q_info = h_q_info.items()
        l_q_info.sort(key=lambda item: int(item[0]))
        l_qid = []
        for qid, h_info in l_q_info:
            h_feature = self._extract(qid, h_info)
            y = -1
            if qid in self.h_label:
                y = self.h_label[qid]
            l_h_feature.append(h_feature)
            l_y.append(y)
            l_qid.append(qid)

        self._dump_feature_svm(l_y, l_h_feature, l_qid, out_name)
        logging.info('q att feature extracted to [%s]', out_name)
        return
Beispiel #7
0
 def _load_data(self):
     if self.ref_q_info_in:
         self.h_ref_q_info = load_query_info(self.ref_q_info_in)
Beispiel #8
0
 def __init__(self, **kwargs):
     super(QLenPerformanceAna, self).__init__(**kwargs)
     self.h_q_info = load_query_info(self.q_info_in)
     self.h_rel_ndcg = get_rel_ndcg(self.eva_in, self.base_eva_in)
     self.h_base_eva = dict(load_gdeval_res(self.base_eva_in, False))
     self.h_eva = dict(load_gdeval_res(self.eva_in, False))