def main(): tprint("loading counter dict") counter_dict: Dict[str, Counter] = load_counter_dict() def get_doc_lm(doc_id) -> Counter: counter = counter_dict[doc_id] n_tf = sum(counter.values()) out_counter = Counter() for word, cnt in counter.items(): out_counter[word] = cnt / n_tf return out_counter qrel = load_robust_qrel() def get_pos_docs(query_id): if query_id not in qrel: return judgement = qrel[query_id] for doc_id, score in judgement.items(): if score: yield doc_id tprint("build query lm dict") query_lm_dict = {} queries = list(qrel.keys()) for query_id in queries: pos_docs_ids: Iterable[str] = get_pos_docs(query_id) pos_doc_lms: List[Counter] = lmap(get_doc_lm, pos_docs_ids) query_lm: Counter = average_counters(pos_doc_lms) query_lm_dict[query_id] = query_lm
def show(tf_prediction_data, payload_info, data_id, k=100): tf_prediction_data = flatten_batches(tf_prediction_data) logits = np.reshape(tf_prediction_data["logits"], [-1]) scores = np.reshape(logits, [-1]) tokenizer = get_tokenizer() g_idx = 0 pred_list = [] for _ in range(50): ranked_list = [] for _ in range(k): score = scores[g_idx] doc_id = payload_info[g_idx] g_idx += 1 print(doc_id, score) ranked_list.append((doc_id, score)) ranked_list.sort(key=lambda x: x[1], reverse=True) pred = left(ranked_list) pred_list.append(pred) qrels = load_robust_qrel() gold_list = [] st = int(data_id) query_ids = [str(i) for i in range(st, st + 50)] fn = 0 tn = 0 for idx, query_id in enumerate(query_ids): gold = qrels[query_id] if query_id in qrels else {} pred = pred_list[idx] print(query_id, "-------------") for doc_id in pred[:20]: if doc_id in gold: if gold[doc_id] == 1 or gold[doc_id] == 2: s = "T" elif gold[doc_id] == 0: s = "F" else: print(gold[doc_id]) assert False else: s = "N" if s == "T": fn += 1 else: tn += 1 print( doc_id, s, ) print("data len", len(logits)) print("accuracy : ", tn / (fn + tn))
def compare(): qrel = load_robust_qrel() ranked_list = load_2k_rank() for q_id in qrel: galago_ranked_2k = [x[0] for x in ranked_list[q_id]][:100] docs_from_qrel = list(qrel[q_id].keys()) print(len(galago_ranked_2k)) print(len(docs_from_qrel)) for doc_id in galago_ranked_2k: if doc_id not in docs_from_qrel: print(doc_id)
def main(): score_d: Dict[Tuple[str, str, int], float] = load_from_pickle("robust_score_d") score_d2: Dict[Tuple[str, str, int], float] = load_from_pickle("robust_score_d2") qrel: Dict[str, Dict[str, int]] = load_robust_qrel() query_grouped = group_by(score_d.keys(), get_first) counter = Counter() for query_id in query_grouped: keys: List[Tuple[str, str, int]] = query_grouped[query_id] doc_id_grouped = group_by(keys, get_second) qrel_part = qrel[query_id] if query_id in qrel else {} for doc_id in doc_id_grouped: label: int = qrel_part[doc_id] if doc_id in qrel_part else 0 cur_keys: List[Tuple[str, str, int]] = doc_id_grouped[doc_id] if len(cur_keys) == 1: continue summary = [] summary2 = [] for key in cur_keys: query_id2, doc_id2, passage_idx = key assert query_id2 == query_id assert doc_id2 == doc_id score = score_d[key] score2 = score_d2[key] summary.append((passage_idx, score)) summary2.append((passage_idx, score2)) summary.sort(key=get_first) summary2.sort(key=get_first) max_idx = int(argmax(right(summary))) max_idx2 = int(argmax(right(summary2))) if label: if max_idx == max_idx2: counter[1] += 1 else: counter[0] += 1 print(counter) accuracy = counter[1] / (counter[0] + counter[1]) print("accuracy {}".format(accuracy))
def all_doc_ids_of_interest() -> List[str]: qrel = load_robust_qrel() all_doc_id_set = set() for query in qrel.keys(): judgement = qrel[query] for doc_id, score in judgement.items(): all_doc_id_set.add(doc_id) top_k = 1000 galago_rank = load_bm25_best() for query_id, ranked_list in galago_rank.items(): ranked_list.sort(key=lambda x:x[1]) all_doc_id_set.update([x[0] for x in ranked_list[:top_k]]) all_doc_id_list = list(all_doc_id_set) all_doc_id_list.sort() return all_doc_id_list
def main(): save_name = sys.argv[1] score_d: Dict[Tuple[str, str, int], float] = load_from_pickle(save_name) qrel: Dict[str, Dict[str, int]] = load_robust_qrel() query_grouped = group_by(score_d.keys(), get_first) for query_id in query_grouped: keys: List[Tuple[str, str, int]] = query_grouped[query_id] doc_id_grouped = group_by(keys, get_second) qrel_part = qrel[query_id] if query_id in qrel else {} pos_rows = [] neg_rows = [] for doc_id in doc_id_grouped: label: int = qrel_part[doc_id] if doc_id in qrel_part else 0 cur_keys: List[Tuple[str, str, int]] = doc_id_grouped[doc_id] summary = [] for key in cur_keys: query_id2, doc_id2, passage_idx = key assert query_id2 == query_id assert doc_id2 == doc_id score = score_d[key] summary.append((passage_idx, score)) summary.sort(key=get_first) max_idx = int(argmax(right(summary))) score_str = list(["{0:.5f}".format(s) for s in right(summary)]) max_passage_idx = summary[max_idx][0] row = [str(max_passage_idx)] + score_str if label: pos_rows.append(row) else: neg_rows.append(row) print(query_id) print("Positive") print_table(pos_rows) print("Negative") print_table(neg_rows[:30])
def eval(pred_list, data_id): st = int(data_id) query_ids = [str(i) for i in range(st, st + 50)] qrels = load_robust_qrel() gold_set_list = [] gold_dict_list = [] for query_id in query_ids: gold = qrels[query_id] if query_id in qrels else {} gold_set = set() for key in gold: if gold[key] >= 1: gold_set.add(key) gold_set_list.append(gold_set) gold_dict_list.append(gold) NDCG20 = ndcg_at_k_local(pred_list, gold_dict_list, 20) P20 = p_at_k(pred_list, gold_set_list, 20) print("P20:", P20) print("NDCG20:", NDCG20) return P20, NDCG20