def combine_pc_rel_with_cpid(prediction_file, info: Dict) \ -> Dict[DataID, Tuple[CPIDPair, Logits, Logits]]: data = EstimatorPredictionViewer(prediction_file) print("Num data ", data.data_len) out_d: Dict[DataID, Tuple[CPIDPair, Logits, Logits]] = {} num_append = 0 last_claim = None prev_data_id = None ticker = TimeEstimator(data.data_len) for entry in data: ticker.tick() logits = entry.get_vector("logits") data_id = entry.get_vector("data_id")[0] try: cur_info = info[data_id] if 'cid' in cur_info: cid = cur_info['cid'] last_claim = cid, logits prev_data_id = data_id elif 'pid' in cur_info: pid = cur_info['pid'] cid, c_logits = last_claim cpid = CPIDPair((cid, pid)) out_d[data_id] = (cpid, c_logits, logits) out_d[prev_data_id] = (cpid, c_logits, logits) num_append += 1 else: assert False except KeyError as e: print(e) pass return out_d
def collect_scores(prediction_file, info: Dict, logit_to_score) \ -> Dict[DataID, Tuple[CPIDPair, float]]: data = EstimatorPredictionViewer(prediction_file) print("Num data ", data.data_len) out_d: Dict[DataID, Tuple[CPIDPair, float]] = {} for entry in data: logits = entry.get_vector("logits") score = logit_to_score(logits) data_id = entry.get_vector("data_id")[0] try: cur_info = info[str(data_id)] if 'kdp' in cur_info: parse_info_inner(cur_info, qck_convert_map, True) cid = int(cur_info['query'].query_id) pid = int(cur_info['candidate'].id) elif 'query' in cur_info: parse_info_inner(cur_info, qc_convert_map, True) cid = int(cur_info['query'].query_id) pid = int(cur_info['candidate'].id) else: cid = cur_info['cid'] pid = cur_info['pid'] cpid = CPIDPair((cid, pid)) out_d[data_id] = (cpid, score) except KeyError as e: print("Key error", e) print("data_id", data_id) pass return out_d
def collect_data_w_cpid(prediction_file, info: Dict, logit_to_score) \ -> List[Dict]: data = EstimatorPredictionViewer(prediction_file) print("Num data ", data.data_len) out = [] for entry in data: logits = entry.get_vector("logits") score = logit_to_score(logits) data_id = entry.get_vector("data_id")[0] confidence = get_confidence_or_rel_score(entry) try: cur_info = info[str(data_id)] cid = cur_info['cid'] pid = cur_info['pid'] cpid = CPIDPair((cid, pid)) cur_info['cpid'] = cpid cur_info['score'] = score cur_info['confidence'] = confidence out.append(cur_info) except KeyError as e: print("Key error") print("data_id", data_id) pass return out
def get_claim_perspective_label_dict() -> Dict[CPIDPair, int]: gold = get_claim_perspective_id_dict() d = defaultdict(int) for cid, pid_list_list in gold.items(): for pid in flatten(pid_list_list): cpid_pair = CPIDPair((cid, pid)) d[cpid_pair] = 1 return d
def make_decisions(e: Tuple[int, List[Dict]]): cid, p_list = e decisions = [] for p in p_list: pid = int(p['pid']) query_id = CPIDPair((cid, pid)) if query_id in score_d: score = score_d[query_id] else: score = 0 binary = 1 if score > 0.5 else 0 decisions.append((cid, pid, binary)) return cid, decisions
def rank(e: Tuple[int, List[Dict]]): cid, p_list = e scored_p_list: List[Dict] = [] for p in p_list: pid = int(p['pid']) query_id = CPIDPair((cid, pid)) if query_id in score_d: score = score_d[query_id] suc_count.suc() else: score = -2 suc_count.fail() p['score'] = score scored_p_list.append(p) scored_p_list.sort(key=lambda x: x['score'], reverse=True) return cid, scored_p_list[:top_k]
def main(): info = load_from_pickle("pc_rel_dev_info_all") prediction_path = pjoin(output_path, "pc_rel_dev") rel_info: Dict[DataID, Tuple[CPIDPair, Logits, Logits]] = load_from_pickle("pc_rel_dev_with_cpid") #rel_info: Dict[DataID, Tuple[CPIDPair, Logits, Logits]] = combine_pc_rel_with_cpid(prediction_path, info) doc_index = reverse_index(rel_info) tokenizer = get_tokenizer() while True: s = input() os.system('cls') cid, pid = s.split() cid = int(cid) pid = int(pid) cpid = CPIDPair((cid, pid)) do_print(cpid, doc_index, tokenizer)
def collect_scores_and_confidence(prediction_file, info: Dict, logit_to_score) \ -> Dict[DataID, Tuple[CPIDPair, float, float]]: data = EstimatorPredictionViewer(prediction_file) print("Num data ", data.data_len) out_d: Dict[DataID, Tuple[CPIDPair, float, float]] = {} for entry in data: logits = entry.get_vector("logits") score = logit_to_score(logits) data_id = entry.get_vector("data_id")[0] confidence = get_confidence_or_rel_score(entry) try: cur_info = info[str(data_id)] cid = cur_info['cid'] pid = cur_info['pid'] cpid = CPIDPair((cid, pid)) out_d[data_id] = (cpid, score, confidence) except KeyError as e: print("Key error") print("data_id", data_id) pass return out_d
def collect_info(prediction_file, info: Dict, logit_to_score) -> Dict[CPIDPair, List[Tuple[float, float, Dict]]]: data = EstimatorPredictionViewer(prediction_file) print("Num data ", data.data_len) out_d: Dict[CPIDPair, List[Tuple[float, float, Dict]]] = defaultdict(list) for entry in data: logits = entry.get_vector("logits") score = logit_to_score(logits) rel_score = entry.get_vector("rel_score")[0] data_id = entry.get_vector("data_id")[0] try: cur_info = info[str(data_id)] cid = cur_info['cid'] pid = cur_info['pid'] cpid = CPIDPair((cid, pid)) out_d[cpid].append((score, rel_score, cur_info)) except KeyError as e: print("Key error") print("data_id", data_id) pass return out_d
def CPID_to_CPIDPair(cpid: CPID) -> CPIDPair: cid, pid = cpid.split("_") return CPIDPair((int(cid), int(pid)))