def scorer(lucene_score, query_id) -> NamedNumber: claim_id, p_id = query_id.split("_") p_text = perspective_getter(int(p_id)) tokens = tokenizer.tokenize_stem(p_text) c_lm = claim_log_odds_dict[claim_id] reason = " ".join(["{0} ({1:.2f})".format(t, c_lm[t]) for t in tokens]) score = sum([c_lm[t] for t in tokens]) return NamedNumber(score, reason)
def scorer(lucene_score, query_id) -> NamedNumber: claim_id, p_id = query_id.split("_") gold_pids = gold[int(claim_id)] score = 0 for p_ids in gold_pids: if int(p_id) in p_ids: score = 1 return NamedNumber(score, "")
def scorer(query_p: Passage, candidate: List[Passage]) -> List[NamedNumber]: payload = [] text1 = query_p.text.splitlines()[0] print(text1) for c in candidate: payload.append((text1, c.text)) r = client.request_multiple(payload) r = [NamedNumber(v, "") for v in r] return r
def scorer(lucene_score, query_id) -> NamedNumber: claim_id, p_id = query_id.split("_") i_claim_id = int(claim_id) payload = [] p_text = perspective_getter(int(p_id)) c_text = cid_to_text[i_claim_id] payload.append(encoder.encode_pair(c_text, p_text)) r = proxy.predict(payload) ns_score = -float(r[0]) #ns_score = 0 score = bm25_module.score(c_text, p_text) new_score = score + ns_score * 10 score = NamedNumber(new_score, score.name + " {}".format(ns_score)) return score
def scorer(lucene_score, query_id) -> NamedNumber: claim_id, p_id = query_id.split("_") c_text = cid_to_text[int(claim_id)] p_text = perspective_getter(int(p_id)) score: NamedNumber = bm25_module.score(c_text, p_text) nclaim_id = int(claim_id) if nclaim_id in rm_info: ex_qtf = rm_info_c[nclaim_id] p_tokens = tokenizer.tokenize_stem(p_text) ex_score = bm25_module.score_inner(ex_qtf, Counter(p_tokens)) new_info = score.name + "({})".format(ex_score.name) score = NamedNumber(score + ex_score, new_info) else: not_found.add(claim_id) return score
def score_inner(self, q_tf, t_tf) -> NamedNumber: dl = sum(t_tf.values()) score_sum = 0 info = [] for q_term, qtf in q_tf.items(): t = BM25_verbose(f=t_tf[q_term], qf=qtf, df=self.df[q_term], N=self.N, dl=dl, avdl=self.avdl, b=self.b, my_k1=self.k1, my_k2=self.k2) score_sum += t info.append((q_term, t)) ideal_score = 0 for q_term, qtf in q_tf.items(): max_t = BM25_verbose(f=qtf, qf=qtf, df=self.df[q_term], N=self.N, dl=dl, avdl=self.avdl, b=self.b, my_k1=self.k1, my_k2=self.k2) ideal_score += max_t info_log = "Ideal Score={0:.1f} ".format(ideal_score) info.sort(key=lambda x: x[1], reverse=True) for q_term, t in info: if t > 0.001: info_log += "{0}({1:.2f}) ".format(q_term, t) return NamedNumber(score_sum, info_log)
def do_score(candidate_p: Passage) -> NamedNumber: if candidate_p.text == query_p.text: return NamedNumber(-99, "equal") p_tf = basic_tf.get_tf(candidate_p) return bm25_module.score_inner(q_tf, p_tf)
def scorer(claim_id: int, p_tokens: List[str]) -> NamedNumber: c_lm = claim_log_odds_dict[claim_id] reason = " ".join(["{0} ({1:.2f})".format(t, c_lm[t]) for t in p_tokens]) score = sum([c_lm[t] for t in p_tokens]) return NamedNumber(score, reason)