コード例 #1
0
 def scorer(lucene_score, query_id) -> NamedNumber:
     claim_id, p_id = query_id.split("_")
     p_text = perspective_getter(int(p_id))
     tokens = tokenizer.tokenize_stem(p_text)
     c_lm = claim_log_odds_dict[claim_id]
     reason = " ".join(["{0} ({1:.2f})".format(t, c_lm[t]) for t in tokens])
     score = sum([c_lm[t] for t in tokens])
     return NamedNumber(score, reason)
コード例 #2
0
ファイル: basic_analysis.py プロジェクト: clover3/Chair
    def scorer(lucene_score, query_id) -> NamedNumber:
        claim_id, p_id = query_id.split("_")
        gold_pids = gold[int(claim_id)]
        score = 0
        for p_ids in gold_pids:
            if int(p_id) in p_ids:
                score = 1

        return NamedNumber(score, "")
コード例 #3
0
ファイル: msmarco_predictor.py プロジェクト: clover3/Chair
    def scorer(query_p: Passage,
               candidate: List[Passage]) -> List[NamedNumber]:
        payload = []
        text1 = query_p.text.splitlines()[0]
        print(text1)
        for c in candidate:
            payload.append((text1, c.text))

        r = client.request_multiple(payload)
        r = [NamedNumber(v, "") for v in r]
        return r
コード例 #4
0
ファイル: next_sent_predictor.py プロジェクト: clover3/Chair
 def scorer(lucene_score, query_id) -> NamedNumber:
     claim_id, p_id = query_id.split("_")
     i_claim_id = int(claim_id)
     payload = []
     p_text = perspective_getter(int(p_id))
     c_text = cid_to_text[i_claim_id]
     payload.append(encoder.encode_pair(c_text, p_text))
     r = proxy.predict(payload)
     ns_score = -float(r[0])
     #ns_score = 0
     score = bm25_module.score(c_text, p_text)
     new_score = score + ns_score * 10
     score = NamedNumber(new_score, score.name + " {}".format(ns_score))
     return score
コード例 #5
0
    def scorer(lucene_score, query_id) -> NamedNumber:
        claim_id, p_id = query_id.split("_")
        c_text = cid_to_text[int(claim_id)]
        p_text = perspective_getter(int(p_id))
        score: NamedNumber = bm25_module.score(c_text, p_text)

        nclaim_id = int(claim_id)
        if nclaim_id in rm_info:
            ex_qtf = rm_info_c[nclaim_id]
            p_tokens = tokenizer.tokenize_stem(p_text)
            ex_score = bm25_module.score_inner(ex_qtf, Counter(p_tokens))
            new_info = score.name + "({})".format(ex_score.name)
            score = NamedNumber(score + ex_score, new_info)
        else:
            not_found.add(claim_id)
        return score
コード例 #6
0
    def score_inner(self, q_tf, t_tf) -> NamedNumber:
        dl = sum(t_tf.values())
        score_sum = 0
        info = []
        for q_term, qtf in q_tf.items():
            t = BM25_verbose(f=t_tf[q_term],
                             qf=qtf,
                             df=self.df[q_term],
                             N=self.N,
                             dl=dl,
                             avdl=self.avdl,
                             b=self.b,
                             my_k1=self.k1,
                             my_k2=self.k2)
            score_sum += t
            info.append((q_term, t))

        ideal_score = 0
        for q_term, qtf in q_tf.items():
            max_t = BM25_verbose(f=qtf,
                                 qf=qtf,
                                 df=self.df[q_term],
                                 N=self.N,
                                 dl=dl,
                                 avdl=self.avdl,
                                 b=self.b,
                                 my_k1=self.k1,
                                 my_k2=self.k2)
            ideal_score += max_t

        info_log = "Ideal Score={0:.1f} ".format(ideal_score)
        info.sort(key=lambda x: x[1], reverse=True)
        for q_term, t in info:
            if t > 0.001:
                info_log += "{0}({1:.2f}) ".format(q_term, t)
        return NamedNumber(score_sum, info_log)
コード例 #7
0
 def do_score(candidate_p: Passage) -> NamedNumber:
     if candidate_p.text == query_p.text:
         return NamedNumber(-99, "equal")
     p_tf = basic_tf.get_tf(candidate_p)
     return bm25_module.score_inner(q_tf, p_tf)
コード例 #8
0
 def scorer(claim_id: int, p_tokens: List[str]) -> NamedNumber:
     c_lm = claim_log_odds_dict[claim_id]
     reason = " ".join(["{0} ({1:.2f})".format(t, c_lm[t]) for t in p_tokens])
     score = sum([c_lm[t] for t in p_tokens])
     return NamedNumber(score, reason)