def main2(): counter = load_from_pickle(sys.argv[1]) couter_per_length = defaultdict(Counter) l = [] for key, cnt in counter.items(): max_idx, num_seg = key e = num_seg, max_idx, cnt l.append(e) grouped = group_by(l, get_first) rows = [] for num_seg in range(1, 20): entries = grouped[num_seg] cnt_sum = sum([cnt for _, max_idx, cnt in entries]) local_counter = Counter() for _, max_idx, cnt in entries: local_counter[max_idx] = cnt row = [num_seg, cnt_sum] for seg_loc in range(num_seg): row.append(local_counter[seg_loc]) rows.append(row) print_table(rows)
def bert_eval_all(): pc_score_d: Dict[CPID, float] = load_from_pickle("pc_bert_baseline_score_d") score_d: Dict[CPIDPair, float] = { CPID_to_CPIDPair(k): v for k, v in pc_score_d.items() } ap_list, cids = get_ap_list_from_score_d(score_d, "dev") print_table(zip(cids, ap_list))
def get_statistic_for_join(join_result: Iterable[Tuple[str, MSMarcoDoc, JoinedPassage]]): print("get_statistic_for_join()") tokenizer = get_tokenizer() def size_in_tokens(text): return len(tokenizer.tokenize(text)) intervals = list(range(0, 500, 50)) + list(range(500, 5000, 500)) last = "5000 <" keys = intervals + [last] def bin_fn(n): for ceil in intervals: if n < ceil: return ceil return "5000 <" bin_doc = BinHistogram(bin_fn) bin_loc = BinHistogram(bin_fn) bin_passage = BinHistogram(bin_fn) match_fail = 0 for doc, passage in join_result: if passage.loc >= 0: prev = doc.body[:passage.loc] n_tokens_before = len(tokenizer.tokenize(prev)) passage_text = passage.text passage_len = len(passage_text) # print("passage loc", passage_loc) # print(n_tokens_before) bin_doc.add(size_in_tokens(doc.body)) bin_loc.add(size_in_tokens(prev)) bin_passage.add(size_in_tokens(passage_text)) # print(prev) # print(" >>>>> ") # print(passage_maybe) # print(" <<<< ") # print(next) pass else: match_fail += 1 # print("passage not found in doc") # print(doc.body) print('match fail', match_fail) print("doc length") bins = [bin_doc, bin_passage, bin_loc] head = ['', 'bin_doc', 'bin_passage', 'bin_loc'] rows = [head] for key in keys: row = [key] for bin in bins: row.append(bin.counter[key]) rows.append(row) print_table(rows)
def a_relevant(): d_ids = list(load_train_claim_ids()) claims: List[Dict] = get_claims_from_ids(d_ids) claim_lms = build_gold_lms(claims) claim_lms_d = {lm.cid: lm for lm in claim_lms} bg_lm = average_counters(lmap(lambda x: x.LM, claim_lms)) log_bg_lm = get_lm_log(bg_lm) claims = claims[:10] top_n = 100 q_res_path = FilePath( "/mnt/nfs/work3/youngwookim/data/perspective/train_claim/q_res_100") ranked_list: Dict[ str, List[SimpleRankedListEntry]] = load_galago_ranked_list(q_res_path) preload_docs(ranked_list, claims, top_n) stopwords = load_stopwords_for_query() alpha = 0.7 tokenizer = PCTokenizer() for c in claims: q_res: List[SimpleRankedListEntry] = ranked_list[str(c['cId'])] claim_lm = claim_lms_d[c['cId']] log_topic_lm = get_lm_log(smooth(claim_lm.LM, bg_lm, alpha)) log_odd: Counter = subtract(log_topic_lm, log_bg_lm) def get_passage_score(p): def get_score(t): if t in stopwords: return 0 return log_odd[tokenizer.stemmer.stem(t)] return sum([get_score(t) for t in p]) / len(p) if len(p) > 0 else 0 docs = [] for i in range(top_n): try: doc = load_doc(q_res[i].doc_id) docs.append(doc) except KeyError: docs.append(None) pass print(c['text']) rows = [] for rank, doc in enumerate(docs): if doc is None: rows.append((rank, "-", "-")) continue scores = get_doc_score(doc, get_passage_score) avg_score = average(scores) max_score = max(scores) rows.append((rank, avg_score, max_score)) print_table(rows)
def main(): info_dir = sys.argv[1] unique_passages: List[KDP] = collect_unique_passage(info_dir) rows = [] for p in unique_passages: row = [p.doc_id, p.passage_idx, " ".join(p.tokens)] rows.append(row) print_table(rows)
def main(): dvp: List[DocValueParts] = load() def get_qid(e: DocValueParts): return e.query.query_id # Group by doc id dvp_qid_grouped: Dict[str, List[DocValueParts]] = group_by(dvp, get_qid) def get_doc_id_idx(e: DocValueParts): return e.kdp.doc_id, e.kdp.passage_idx def get_doc_id(e: DocValueParts): return e.kdp.doc_id value_types = ["good", "bad", "none"] head = ["qid"] + value_types rows = [head] rows2 = [] for qid, entries in dvp_qid_grouped.items(): # Q : How many kdp are useful? # Q : Does relevant matter? kdp_grouped = group_by(entries, get_doc_id_idx) counter = Counter() doc_value = Counter() for kdp_id, entries2 in kdp_grouped.items(): doc_id, _ = kdp_id value_avg: float = sum([e.value for e in entries2]) if value_avg > 1: counter["good"] += 1 doc_value[doc_id] += 1 elif value_avg < -1: counter["bad"] += 1 doc_value[doc_id] -= 1 else: counter["none"] += 1 row = [qid] + [counter[k] for k in value_types] rows.append(row) doc_value_list = list(doc_value.items()) doc_value_list.sort(key=get_second, reverse=True) rows2.append([qid]) for doc_id, value in doc_value_list[:10]: if value > 0: rows2.append([doc_id, value]) doc_value_list.sort(key=get_second) for doc_id, value in doc_value_list[:10]: if value < 0: rows2.append([doc_id, value]) print_table(rows) print_table(rows2)
def main(): save_name = sys.argv[1] out_dir = os.path.join(output_path, "cppnc") exist_or_mkdir(out_dir) info_file_path = os.path.join(out_dir, save_name + ".info") pred_file_path = os.path.join(out_dir, save_name + ".score") score_d = summarize_score(info_file_path, pred_file_path) # load pre-computed perspectives split = "dev" ap_list, cids = get_ap_list_from_score_d(score_d, split) print_table(zip(cids, ap_list))
def main(): input_path_format = sys.argv[1] st = int(sys.argv[2]) step = int(sys.argv[3]) ed = int(sys.argv[4]) rows = [] for i in range(st, ed, step): s = get_ap_from_file_path(input_path_format.format(i)) row = [i, s] rows.append(row) print_table(rows)
def main(): baseline_cid_grouped, cid_grouped, claim_d = load_cppnc_related_data() gold = get_claim_perspective_id_dict() bin_keys = ["< 0.05", "< 0.50", "< 0.95", "< 1"] def bin_fn(item: float): if item > 0.95: return "< 1" elif item > 0.5: return "< 0.95" elif item > 0.05: return "< 0.50" else: return "< 0.05" for cid, pid_entries in cid_grouped.items(): baseline_pid_entries = baseline_cid_grouped[cid] baseline_score_d = {} for cpid, a_thing_array in baseline_pid_entries: _, pid = cpid assert len(a_thing_array) == 1 score = a_thing_array[0]['score'] baseline_score_d[pid] = score gold_pids = gold[cid] def get_score_per_pid_entry(p_entries: Tuple[CPIDPair, List[Dict]]): cpid, entries = p_entries return average(lmap(lambda e: e['score'], entries)) pid_entries.sort(key=get_score_per_pid_entry, reverse=True) s = "{} : {}".format(cid, claim_d[cid]) print(s) head_row = [""] + bin_keys rows = [head_row] for cpid, things in pid_entries: histogram = BinHistogram(bin_fn) _, pid = cpid label = any([pid in pids for pids in gold_pids]) label_str = bool_to_yn(label) base_score = baseline_score_d[pid] base_score_str = "{0:.2f}".format(base_score) scores: List[float] = lmap(lambda x: (x['score']), things) foreach(histogram.add, scores) row = [label_str, base_score_str] + [ str(histogram.counter[bin_key]) for bin_key in bin_keys ] rows.append(row) print_table(rows)
def main(config): info_dir = config['info_path'] prediction_file = config['pred_path'] f_handler = get_format_handler("qck") info = load_combine_info_jsons(info_dir, f_handler.get_mapping(), f_handler.drop_kdp()) data: List[Dict] = join_prediction_with_info(prediction_file, info, ["data_id", "logits"]) out_entries: List[QCKOutEntry] = lmap(QCKOutEntry.from_dict, data) qrel: Dict[str, Dict[str, int]] = load_qrels_structured(config['qrel_path']) def get_label(query_id, candi_id): if candi_id in qrel[query_id]: return qrel[query_id][candi_id] else: return 0 def logit_to_score_softmax(logit): return scipy.special.softmax(logit)[1] grouped: Dict[str, List[QCKOutEntry]] = group_by(out_entries, lambda x: x.query.query_id) for query_id, items in grouped.items(): raw_kdp_list = [(x.kdp.doc_id, x.kdp.passage_idx) for x in items] kdp_list = unique_list(raw_kdp_list) raw_candi_id_list = [x.candidate.id for x in items] candi_id_list = unique_list(raw_candi_id_list) logit_d = {(x.candidate.id, (x.kdp.doc_id, x.kdp.passage_idx)): x.logits for x in items} labels = [get_label(query_id, candi_id) for candi_id in candi_id_list] head_row0 = [" "] + labels head_row1 = [" "] + candi_id_list rows = [head_row0, head_row1] for kdp_sig in kdp_list: row = [kdp_sig] for candi_id in candi_id_list: try: score = logit_to_score_softmax(logit_d[candi_id, kdp_sig]) score_str = "{0:.2f}".format(score) except KeyError: score_str = "-" row.append(score_str) rows.append(row) print(query_id) print_table(rows)
def main(): rlg_proposed_tfidf = load_ranked_list_grouped(sys.argv[1]) rlg_proposed_bm25 = load_ranked_list_grouped(sys.argv[2]) rlg_bert_tfidf = load_ranked_list_grouped(sys.argv[3]) qrel: QRelsDict = load_qrels_structured(sys.argv[4]) # TODO # Q1 ) Is the set of document different? # Q2 ) Say 2 (BM25) is better than 1 (tf-idf), # 1. X=0, in 2 not in 1 # 2. X=1, in 2 not in 1 # 3. X=0, in 1 not in 2 -> FP prediction that BERT(baseline) misses # 4. X=1, in 1 not in 2 cnt = 0 for q in rlg_proposed_tfidf: entries1 = rlg_proposed_tfidf[q] entries2 = rlg_proposed_bm25[q] entries3 = rlg_bert_tfidf[q] e3_d = {e.doc_id: e for e in entries3} def get_doc_set(entries): return set(map(TrecRankedListEntry.get_doc_id, entries)) docs1 = get_doc_set(entries1) docs2 = get_doc_set(entries2) d = qrel[q] rows = [[q]] rows.append([ 'doc_id', 'label', 'in_bm25', '1_rank', '1_score', '3_rank', '3_score' ]) for e in entries1: label = d[e.doc_id] if e.doc_id in d else 0 #if e.doc_id not in docs2: if True: # Case 3 predict_binary = e.rank < 20 try: e3 = e3_d[e.doc_id] row = [ e.doc_id, label, e.doc_id in docs2, e.rank, e.score, e3.rank, e3.score ] rows.append(row) except KeyError: assert cnt == 0 cnt += 1 if len(rows) > 2: print_table(rows)
def main(): baseline_cid_grouped, cid_grouped, claim_d = load_cppnc_related_data() gold = get_claim_perspective_id_dict() doc_scores = dict(doc_score_predictions()) for cid, pid_entries in cid_grouped.items(): baseline_pid_entries = baseline_cid_grouped[cid] baseline_score_d = {} for cpid, a_thing_array in baseline_pid_entries: _, pid = cpid assert len(a_thing_array) == 1 score = a_thing_array[0]['score'] baseline_score_d[pid] = score gold_pids = gold[cid] def get_score_per_pid_entry(p_entries: Tuple[CPIDPair, List[Dict]]): cpid, entries = p_entries return average(lmap(lambda e: e['score'], entries)) pid_entries.sort(key=get_score_per_pid_entry, reverse=True) s = "{} : {}".format(cid, claim_d[cid]) print(s) num_docs = len(pid_entries[0][1]) doc_value_arr = list([list() for _ in range(num_docs)]) labels = [] for cpid, things in pid_entries: _, pid = cpid label = any([pid in pids for pids in gold_pids]) labels.append(label) base_score = baseline_score_d[pid] for doc_idx, per_doc in enumerate(things): score = per_doc['score'] value = doc_value(score, base_score, int(label)) doc_value_arr[doc_idx].append(value) head = ["avg", "pred"] + lmap(bool_to_yn, labels) rows = [head] doc_score = doc_scores[cid] assert len(doc_value_arr) == len(doc_score) for pred_score, doc_values in zip(doc_score, doc_value_arr): avg = average(doc_values) row_float = [avg, pred_score] + doc_values row = lmap(lambda x: "{0}".format(x), row_float) rows.append(row) print_table(rows)
def main(config): info = load_combine_info_jsons(config['info_path']) predictions: List[Dict] = join_prediction_with_info(config['pred_path'], info, ["data_ids", "logits"], True, "data_ids") entries: List[OutEntry] = lmap(OutEntry.from_dict, predictions) def is_pos(e: OutEntry): return logit_to_score_softmax(e.logits) > 0.5 pos_entries = filter(is_pos, entries) rows = [] for e in pos_entries: row = [e.cid, e.pid, e.doc_id, e.sent_idx] rows.append(row) print_table(rows)
def main(): print("Loading doc score") doc_scores = dict(doc_score_predictions()) print("Loading cppnc scores") save_name = "qcknc_val" cid_grouped: Dict[str, Dict[str, List[Dict]]] = load_cppnc_score(save_name) print(".") gold = get_claim_perspective_id_dict() baseline_cid_grouped: Dict[int, List] = load_baseline("train_baseline") claim_d = load_train_claim_d() for cid, pid_entries_d in cid_grouped.items(): pid_entries_d: Dict[str, List[Dict]] = pid_entries_d baseline_pid_entries = baseline_cid_grouped[int(cid)] baseline_score_d = fetch_score_per_pid(baseline_pid_entries) gold_pids = gold[int(cid)] def get_score_per_pid_entry(p_entries: Tuple[str, List[Dict]]): _, entries = p_entries return average(lmap(get_score_from_entry, entries)) pid_entries: List[Tuple[str, List[Dict]]] = list(pid_entries_d.items()) pid_entries.sort(key=get_score_per_pid_entry, reverse=True) s = "{} : {}".format(cid, claim_d[int(cid)]) print(s) doc_info_d, doc_value_arr, labels = collect_score_per_doc(baseline_score_d, get_score_from_entry, gold_pids, pid_entries) pids = left(pid_entries) head1 = [""] * 4 + pids head2 = ["avg", "doc_id", "passage_idx", "pknc_pred"] + lmap(bool_to_yn, labels) rows = [head1, head2] doc_score = doc_scores[cid] assert len(doc_value_arr) == len(doc_score) for doc_idx, (pred_score, doc_values) in enumerate(zip(doc_score, doc_value_arr)): doc_id, passage_idx = doc_info_d[doc_idx] avg = average(doc_values) row_float = [avg, doc_id, passage_idx, pred_score] + doc_values row = lmap(lambda x: "{0}".format(x), row_float) rows.append(row) print_table(rows)
def main(): data = EstimatorPredictionViewer(sys.argv[1]) rows = [] for e in data: label_ids = e.get_vector("label_ids") logits = e.get_vector("logits") probs = softmax(logits, -1) predict_label = 1 if probs[1] > 0.5 else 0 decision = "Y" if predict_label == label_ids else "N" row = [label_ids, probs[1], decision] rows.append(row) rows.sort(key=lambda x: x[1], reverse=True) print_table(rows)
def extract_qk_unit(info_path, pred_path): info = load_combine_info_jsons(info_path, qk_convert_map, False) predictions = join_prediction_with_info(pred_path, info) grouped: Dict[str, List[Dict]] = group_by(predictions, lambda x: x['query'].query_id) rows = [] for qid, entries in grouped.items(): any_entry = entries[0] query = any_entry['query'] rows.append([query.query_id, query.text]) for entry in entries: row = [get_regression_score(entry), entry['kdp'].doc_id, entry['kdp'].passage_idx] rows.append(row) print_table(rows)
def show_histogram(bh): interesting_point_cursor = 0 keys = list(bh.counter.keys()) keys.sort() portion_acc = 0 rows = [] for k in keys: portion = bh.counter[k] / num_doc portion_acc += portion # interesting_cut = interesting_points[interesting_point_cursor] # if portion_acc > interesting_cut: row = [k, bh.counter[k], portion, portion_acc] rows.append(row) if portion_acc > 0.999: break print_table(rows)
def main(): save_name = "qcknc_val" cid_grouped: Dict[str, Dict[str, List[Dict]]] = load_cppnc_score(save_name) baseline_cid_grouped: Dict[int, List] = load_baseline("train_baseline") # baseline_cid_grouped, cid_grouped, claim_d = load_cppnc_related_data() gold = get_claim_perspective_id_dict() columns = ["cid", "doc_id", "num_good-num_bad"] rows = [columns] for cid_s, pid_entries in cid_grouped.items(): cid = int(cid_s) baseline_pid_entries = baseline_cid_grouped[cid] baseline_score_d: Dict[int, float] = fetch_score_per_pid( baseline_pid_entries) gold_pids = gold[cid] labels = [] per_doc_counter = Counter() for pid, entries in pid_entries.items(): label = any([pid in pids for pids in gold_pids]) labels.append(label) base_score = baseline_score_d[int(pid)] try: for doc_idx, entry in enumerate(entries): doc_id = entry['kdp'].doc_id score = get_score_from_entry(entry) value = doc_value(score, base_score, int(label)) value_type = doc_value_group(value) per_doc_counter[doc_id, value_type] += 1 except KeyError: print(cid, doc_idx, "not found") pass doc_ids = set(left(per_doc_counter.keys())) for doc_id in doc_ids: n_good = per_doc_counter[doc_id, "good"] n_bad = per_doc_counter[doc_id, "bad"] doc_score = n_good - n_bad row = [cid, doc_id, doc_score] if doc_score > 2 or doc_score < -2: rows.append(row) print_table(rows)
def main(): save_name = sys.argv[1] score_d: Dict[Tuple[str, str, int], float] = load_from_pickle(save_name) qrel: Dict[str, Dict[str, int]] = load_robust_qrel() query_grouped = group_by(score_d.keys(), get_first) for query_id in query_grouped: keys: List[Tuple[str, str, int]] = query_grouped[query_id] doc_id_grouped = group_by(keys, get_second) qrel_part = qrel[query_id] if query_id in qrel else {} pos_rows = [] neg_rows = [] for doc_id in doc_id_grouped: label: int = qrel_part[doc_id] if doc_id in qrel_part else 0 cur_keys: List[Tuple[str, str, int]] = doc_id_grouped[doc_id] summary = [] for key in cur_keys: query_id2, doc_id2, passage_idx = key assert query_id2 == query_id assert doc_id2 == doc_id score = score_d[key] summary.append((passage_idx, score)) summary.sort(key=get_first) max_idx = int(argmax(right(summary))) score_str = list(["{0:.5f}".format(s) for s in right(summary)]) max_passage_idx = summary[max_idx][0] row = [str(max_passage_idx)] + score_str if label: pos_rows.append(row) else: neg_rows.append(row) print(query_id) print("Positive") print_table(pos_rows) print("Negative") print_table(neg_rows[:30])
def main(): input_path = sys.argv[1] tf_prediction_data = load_pickle_from(input_path) tf_prediction_data = flatten_batches(tf_prediction_data) logits = tf_prediction_data["logits"] label_ids = tf_prediction_data["label_ids"] scores = lmap(logit_to_score_softmax, logits) assert len(scores) == len(label_ids) print("{} data points".format(len(scores))) todo = [(get_auc, "auc"), (get_ap, "ap")] rows = [] for metric_fn, metric_name in todo: score = metric_fn(label_ids, scores) row = [metric_name, score] rows.append(row) print_table(rows)
def group_by_docs(): dvp: List[DocValueParts2] = load() candidate_d_raw: List[Tuple[int, List[int]]] = get_eval_candidate_as_pids( "train") candidate_d = {str(k): lmap(str, v) for k, v in candidate_d_raw} # Group by doc id dvp_qid_grouped: Dict[str, List[DocValueParts2]] = group_by(dvp, get_qid) def simple(doc_id): return doc_id.split("-")[-1] c_all = Counter() rows = [] for qid, entries in dvp_qid_grouped.items(): # Q : How many kdp are useful? # Q : Does relevant matter? candidate_id_grouped = group_by(entries, get_doc_id) rows.append(["qid", qid]) for doc_id_idx, entries2 in candidate_id_grouped.items(): #c = Counter([good_or_bad(e.score-e.init_score, e.label) for e in entries2]) c = Counter([ get_decision_change(e.label, e.init_score, e.score) for e in entries2 ]) rows.append([doc_id_idx]) #row = [doc_id_idx, c["good"], c["bad"], c["no change"]] row = [ doc_id_idx, c["decision_change_good"], c["decision_change_bad"], c["no_change"] ] rows.append(row) for k, v in c.items(): c_all[k] += v row = [ "summary", c_all["decision_change_good"], c_all["decision_change_bad"], c_all["no_change"] ] rows = [row] + rows print_table(rows)
def show_tp(pred_file_path: str, info_file_path: str, input_type: str, score_type: str, qrel_path: str): judgments_raw: Dict[str, List[Tuple[str, int]]] = load_qrels_flat(qrel_path) judgments = dict_value_map(dict, judgments_raw) key_logit = "logits" def get_score(entry): return get_score_from_logit(score_type, entry[key_logit]) def get_label(query_id, candidate_id): judge_dict = judgments[query_id] if candidate_id in judge_dict: return judge_dict[candidate_id] else: return 0 rows = [] grouped = load_cache("ck_based_analysis") for pair_id, items in grouped.items(): query_id, kdp_id = pair_id if query_id not in judgments: continue e_list: List[Tuple[str, float]] = [] n_rel = 0 for item in items: score = get_score(item) doc_part_id = item['candidate'].id doc_id = get_doc_id(doc_part_id) e = (doc_id, score) e_list.append(e) label = bool(get_label(query_id, doc_id)) if label: if score > 0.1: row = [query_id, kdp_id, doc_part_id, score] rows.append(row) n_rel += 1 row = [len(items), n_rel] rows.append(row) print_table(rows)
def main(): text1 = open(sys.argv[1], "r").read() text2 = open(sys.argv[2], "r").read() tokenizer = get_tokenizer() tokens1 = tokenizer.tokenize(text1) tokens2 = tokenizer.tokenize(text2) rep1 = get_ngrams(tokens1) rep2 = get_ngrams(tokens2) for idx, (cnt1, cnt2) in enumerate(zip(rep1, rep2)): common: Set[str] = set(cnt1.keys()).intersection(cnt2.keys()) rows = [] for ngram in common: row = [ngram, cnt1[ngram], cnt2[ngram]] rows.append(row) print_table(rows)
def group_by_cids(): dvp: List[DocValueParts2] = load() candidate_d_raw: List[Tuple[int, List[int]]] = get_eval_candidate_as_pids( "train") candidate_d = {str(k): lmap(str, v) for k, v in candidate_d_raw} # Group by doc id dvp_qid_grouped: Dict[str, List[DocValueParts2]] = group_by(dvp, get_qid) def simple(doc_id): return doc_id.split("-")[-1] rows = [] for qid, entries in dvp_qid_grouped.items(): # Q : How many kdp are useful? # Q : Does relevant matter? candidate_id_grouped = group_by(entries, get_candidate) rows.append([qid]) for candidate_id, entries2 in candidate_id_grouped.items(): is_initial_candidate = candidate_id in candidate_d[qid] avg_score = average(lmap(lambda x: x.score, entries2)) rows.append(['candidate id:', candidate_id]) rows.append(['is_initial_candidate', is_initial_candidate]) rows.append([ "doc_id", "score", "gold", "init_pred", "direction", "decision" ]) for e in entries2: s = "{}_{}".format(simple(e.kdp.doc_id), e.kdp.passage_idx) row = [ s, "{0:.2f}".format(e.score), e.label, to_pred(e.init_score), direction(e.score, e.init_score), to_pred(e.score) ] rows.append(row) print_table(rows)
def main(): rlp = "C:\\work\\Code\\Chair\\output\\clue_counter_arg\\ranked_list.txt" html_dir = "C:\\work\\Code\\Chair\\output\\clue_counter_arg\\docs" grouped: Dict[str, List[Tuple[str, str]]] = load_all_docs_cleaned(rlp, html_dir) tids_score_dict = get_f5_tids_score_d_from_svm() def get_score(text): if text in tids_score_dict: return tids_score_dict[text] else: return -10000 class AnalyezedDoc(NamedTuple): doc_id: str text: str score: float max_score_sent: str for query, entries in grouped.items(): ad_list = [] for doc_id, text in entries: all_text_list = [text] + sent_tokenize(text) scores = lmap(get_score, all_text_list) max_idx_ = get_max_idx(scores) max_score = scores[max_idx_] ad = AnalyezedDoc(doc_id, text, max_score, all_text_list[max_idx_]) ad_list.append(ad) ad_list.sort(key=lambda x: x.score, reverse=True) print("QID: ", query) for ad in ad_list[:5]: rows = [['doc_id', ad.doc_id], ['score', ad.score], ['max_sent', ad.max_score_sent], ['fulltext', ad.text]] print("-----") print_table(rows)
def main(): config = load_run_config() info = load_combine_info_jsons(config['info_path'], qck_convert_map, False) label_d: Dict[int, List[int]] = get_claim_perspective_id_dict2() print("Info length:", len(info)) predictions: List[Dict] = join_prediction_with_info( config['pred_path'], info) print("Prediction length:", len(predictions)) out_entries: List[QCKOutEntry] = lmap(QCKOutEntry.from_dict, predictions) out_entries = out_entries[:10000] out_entries.sort(key=lambda x: logit_to_score_softmax(x.logits), reverse=True) def get_label(entry: QCKOutEntry): return int(entry.candidate.id) in label_d[int(entry.query.query_id)] rows = [] for entry in out_entries[:100]: label = get_label(entry) score = logit_to_score_softmax(entry.logits) print_info(entry, rows, score, label) print_table(rows)
def avg_scores(): dvp: List[DocValueParts2] = load() candidate_d_raw: List[Tuple[int, List[int]]] = get_eval_candidate_as_pids( "train") candidate_d = {str(k): lmap(str, v) for k, v in candidate_d_raw} # Group by doc id dvp_qid_grouped: Dict[str, List[DocValueParts2]] = group_by(dvp, get_qid) rows = [] for qid, entries in dvp_qid_grouped.items(): # Q : How many kdp are useful? # Q : Does relevant matter? candidate_id_grouped = group_by(entries, get_candidate) c = Counter() new_rows = [] new_rows.append(["candidate id", "init_score", "avg_score"]) for candidate_id, entries2 in candidate_id_grouped.items(): label = entries2[0].label avg_score = average(lmap(lambda x: x.score, entries2)) initial_score = entries2[0].init_score change = avg_score - initial_score value_type = good_or_bad(change, label, 0.01) c[value_type] += 1 row = [ candidate_id, label, value_type, four_digit_float(initial_score), four_digit_float(avg_score) ] new_rows.append(row) row = [qid, c['good'], c['bad'], c['no change']] rows.append(row) rows.extend(new_rows) print_table(rows)
def main(): counter = load_from_pickle(sys.argv[1]) n_appear = Counter() n_max = Counter() n_doc = 0 for key, cnt in counter.items(): max_idx, num_seg = key for i in range(num_seg): n_appear[i] += cnt n_max[max_idx] += cnt n_doc += cnt head = ["idx", "n appear", "n_max", "P(appear)", "P(max)", "P(max|appaer)"] rows = [head] for i in range(20): row = [ i, n_appear[i], n_max[i], n_appear[i] / n_doc, n_max[i] / n_doc, n_max[i] / n_appear[i] ] rows.append(row) print_table(rows)
def main(): run_config = json.load(open(sys.argv[1], "r")) passage_score_path = run_config['passage_score_path'] payload_name = run_config['payload_name'] doc_scores: Dict[int, List[float]] = dict( load_doc_score_prediction(passage_score_path)) baseline_cid_grouped, cid_grouped, claim_d = load_cppnc_score_and_baseline_and_group( payload_name) gold = get_claim_perspective_id_dict() g_counter = Counter() columns = ["pid doc pair", "good", "bad", "no effect", "no effect pid"] rows = [columns] record = [] for cid, pid_entries in cid_grouped.items(): baseline_pid_entries = baseline_cid_grouped[cid] baseline_score_d = {} for cpid, a_thing_array in baseline_pid_entries: _, pid = cpid assert len(a_thing_array) == 1 score = a_thing_array[0]['score'] baseline_score_d[pid] = score gold_pids = gold[cid] labels = [] counter = Counter() for cpid, things in pid_entries: _, pid = cpid label = any([pid in pids for pids in gold_pids]) labels.append(label) base_score = baseline_score_d[pid] any_effect = False try: for doc_idx, per_doc in enumerate(things): score = per_doc['score'] value = doc_value(score, base_score, int(label)) qknc_score = doc_scores[cid][doc_idx] if qknc_score < 0: continue value_type = doc_value_group(value) counter[value_type] += 1 if value_type in ["good", "bad"]: record.append((cid, pid, doc_idx, value_type)) if value_type != "no effect": any_effect = True counter["pid doc pair"] += 1 if not any_effect: counter["no effect pid"] += 1 except KeyError: print(cid, doc_idx, "not found") pass row = [cid] + list([counter[c] for c in columns]) rows.append(row) for key, count in counter.items(): g_counter[key] += count row = ["all"] + list([g_counter[c] for c in columns]) rows.append(row) row = ["rate"] + list( [g_counter[c] / g_counter["pid doc pair"] for c in columns]) rows.append(row) print_table(rows) print_table(record)
from arg.bm25 import BM25 from cache import load_from_pickle from data_generator.job_runner import JobRunner from epath import job_man_dir from tab_print import print_table from tlm.data_gen.msmarco_doc_gen.gen_worker import MMDWorker, PointwiseGen from tlm.data_gen.msmarco_doc_gen.max_sent_encode import SegScorer, PassageScoreTuner, get_mrr_from_ranks from tlm.data_gen.msmarco_doc_gen.processed_resource import ProcessedResource, ProcessedResource10docMulti if __name__ == "__main__": split = "train" resource = ProcessedResource10docMulti(split) max_seq_length = 512 job_id = 0 df = load_from_pickle("mmd_df_10") avdl_raw = 1350 avdl_passage = 40 rows = [] k1 = 0.1 for avdl in [10, 40, 100, 200]: bm25 = BM25(df, avdl=avdl, num_doc=321384, k1=k1, k2=100, b=0.75) scorer = SegScorer(bm25, max_seq_length) qids = resource.query_group[job_id] tuner = PassageScoreTuner(resource, scorer) row = [avdl, tuner.get_mrr(qids)] rows.append(row) print_table(rows)