def collect_score_per_doc(baseline_score_d, get_score_from_entry, gold_pids, pid_entries)\ -> Tuple[Dict[int, Tuple[str, int]], List[List[float]], List[bool]]: num_docs = len(pid_entries[0][1]) print("Num_docs", num_docs) doc_value_arr: List[List[float]] = list([list() for _ in range(num_docs)]) seen_sig = set() doc_info_d: Dict[int, Tuple[str, int]] = {} labels = [] cid = None print("Num candidate : ", len(pid_entries)) for pid, entries in pid_entries: label = any([pid in pids for pids in gold_pids]) labels.append(label) base_score = baseline_score_d[int(pid)] print("num docs for pid :{} : {} ".format(pid, len(entries))) print("Seen sig:", len(seen_sig)) for doc_idx, entry in enumerate(entries): cid_ = entry['query'].query_id if cid is None: cid = cid_ assert cid == cid_ assert pid == entry['candidate'].id sig = pid, entry['kdp'].doc_id, entry['kdp'].passage_idx seen_sig.add(sig) if doc_idx < num_docs: score = get_score_from_entry(entry) value = doc_value(score, base_score, int(label)) doc_value_arr[doc_idx].append(value) doc_info_d[doc_idx] = entry['kdp'].doc_id, entry['kdp'].passage_idx else: print() return doc_info_d, doc_value_arr, labels
def main(): baseline_cid_grouped, cid_grouped, claim_d = load_cppnc_related_data() gold = get_claim_perspective_id_dict() doc_scores = dict(doc_score_predictions()) for cid, pid_entries in cid_grouped.items(): baseline_pid_entries = baseline_cid_grouped[cid] baseline_score_d = {} for cpid, a_thing_array in baseline_pid_entries: _, pid = cpid assert len(a_thing_array) == 1 score = a_thing_array[0]['score'] baseline_score_d[pid] = score gold_pids = gold[cid] def get_score_per_pid_entry(p_entries: Tuple[CPIDPair, List[Dict]]): cpid, entries = p_entries return average(lmap(lambda e: e['score'], entries)) pid_entries.sort(key=get_score_per_pid_entry, reverse=True) s = "{} : {}".format(cid, claim_d[cid]) print(s) num_docs = len(pid_entries[0][1]) doc_value_arr = list([list() for _ in range(num_docs)]) labels = [] for cpid, things in pid_entries: _, pid = cpid label = any([pid in pids for pids in gold_pids]) labels.append(label) base_score = baseline_score_d[pid] for doc_idx, per_doc in enumerate(things): score = per_doc['score'] value = doc_value(score, base_score, int(label)) doc_value_arr[doc_idx].append(value) head = ["avg", "pred"] + lmap(bool_to_yn, labels) rows = [head] doc_score = doc_scores[cid] assert len(doc_value_arr) == len(doc_score) for pred_score, doc_values in zip(doc_score, doc_value_arr): avg = average(doc_values) row_float = [avg, pred_score] + doc_values row = lmap(lambda x: "{0}".format(x), row_float) rows.append(row) print_table(rows)
def main(): save_name = "qcknc_val" cid_grouped: Dict[str, Dict[str, List[Dict]]] = load_cppnc_score(save_name) baseline_cid_grouped: Dict[int, List] = load_baseline("train_baseline") # baseline_cid_grouped, cid_grouped, claim_d = load_cppnc_related_data() gold = get_claim_perspective_id_dict() columns = ["cid", "doc_id", "num_good-num_bad"] rows = [columns] for cid_s, pid_entries in cid_grouped.items(): cid = int(cid_s) baseline_pid_entries = baseline_cid_grouped[cid] baseline_score_d: Dict[int, float] = fetch_score_per_pid( baseline_pid_entries) gold_pids = gold[cid] labels = [] per_doc_counter = Counter() for pid, entries in pid_entries.items(): label = any([pid in pids for pids in gold_pids]) labels.append(label) base_score = baseline_score_d[int(pid)] try: for doc_idx, entry in enumerate(entries): doc_id = entry['kdp'].doc_id score = get_score_from_entry(entry) value = doc_value(score, base_score, int(label)) value_type = doc_value_group(value) per_doc_counter[doc_id, value_type] += 1 except KeyError: print(cid, doc_idx, "not found") pass doc_ids = set(left(per_doc_counter.keys())) for doc_id in doc_ids: n_good = per_doc_counter[doc_id, "good"] n_bad = per_doc_counter[doc_id, "bad"] doc_score = n_good - n_bad row = [cid, doc_id, doc_score] if doc_score > 2 or doc_score < -2: rows.append(row) print_table(rows)
def main(): run_config = json.load(open(sys.argv[1], "r")) passage_score_path = run_config['passage_score_path'] payload_name = run_config['payload_name'] doc_scores: Dict[int, List[float]] = dict( load_doc_score_prediction(passage_score_path)) baseline_cid_grouped, cid_grouped, claim_d = load_cppnc_score_and_baseline_and_group( payload_name) gold = get_claim_perspective_id_dict() g_counter = Counter() columns = ["pid doc pair", "good", "bad", "no effect", "no effect pid"] rows = [columns] record = [] for cid, pid_entries in cid_grouped.items(): baseline_pid_entries = baseline_cid_grouped[cid] baseline_score_d = {} for cpid, a_thing_array in baseline_pid_entries: _, pid = cpid assert len(a_thing_array) == 1 score = a_thing_array[0]['score'] baseline_score_d[pid] = score gold_pids = gold[cid] labels = [] counter = Counter() for cpid, things in pid_entries: _, pid = cpid label = any([pid in pids for pids in gold_pids]) labels.append(label) base_score = baseline_score_d[pid] any_effect = False try: for doc_idx, per_doc in enumerate(things): score = per_doc['score'] value = doc_value(score, base_score, int(label)) qknc_score = doc_scores[cid][doc_idx] if qknc_score < 0: continue value_type = doc_value_group(value) counter[value_type] += 1 if value_type in ["good", "bad"]: record.append((cid, pid, doc_idx, value_type)) if value_type != "no effect": any_effect = True counter["pid doc pair"] += 1 if not any_effect: counter["no effect pid"] += 1 except KeyError: print(cid, doc_idx, "not found") pass row = [cid] + list([counter[c] for c in columns]) rows.append(row) for key, count in counter.items(): g_counter[key] += count row = ["all"] + list([g_counter[c] for c in columns]) rows.append(row) row = ["rate"] + list( [g_counter[c] / g_counter["pid doc pair"] for c in columns]) rows.append(row) print_table(rows) print_table(record)
def get_value_from_entry(entry) -> float: score = get_score_from_entry(entry) value = doc_value(score, base_score, int(label)) return value