def combined_agreement(path): data = load_stance_verify_annot(path) group = {} sig2data = {} for e in data: sig = e['statement'] + e['link'] sig2data[sig] = e['statement'], e['link'] if sig not in group: group[sig] = [] group[sig].append((e['support'], e['dispute'])) NOT_FOUND = 0 YES = 1 NOT_SURE = 2 def get_cont_annot(annot_idx): statement_group = {} cont_annot = [] for sig in group: statement, link = sig2data[sig] s, d = group[sig][annot_idx] if statement not in statement_group: statement_group[statement] = [] statement_group[statement].append((link, s, d)) for statement in statement_group: s_yes_cnt = 0 d_yes_cnt = 0 for link, s, d in statement_group[statement]: if s == YES: s_yes_cnt += 1 if d == YES: d_yes_cnt += 1 if s_yes_cnt > 0 and d_yes_cnt > 0: cont = True else: cont = False cont_annot.append((statement, cont)) return cont_annot annot1 = get_cont_annot(0) annot2 = get_cont_annot(1) annot1.sort(key=lambda x: x[0]) annot2.sort(key=lambda x: x[0]) for e1, e2 in zip(annot1, annot2): assert e1[0] == e2[0] kappa, p0 = binary_kappa(right(annot1), right(annot2)) print("kappa", kappa) print("p0", p0)
def get_aawd_binary_train_dev(): global aawd_train_dev_preload if aawd_train_dev_preload is not None: return aawd_train_dev_preload train, dev, test = load_aawd_splits_as_binary() train_x = left(train) train_y = right(train) dev_x = left(dev) dev_y = right(dev) aawd_train_dev_preload = train_x, train_y, dev_x, dev_y return aawd_train_dev_preload
def tune_alpha(self, xy): vectors = [] for x_i, y_i in xy: odd = self.counter_odd(x_i) vectors.append((odd, y_i)) vectors.sort(key=lambda x: x[0], reverse=True) total = len(vectors) p = sum(right(xy)) fp = 0 max_acc = 0 self.opt_alpha = 0 for idx, (odd, label) in enumerate(vectors): alpha = odd - 1e-8 if label == 0: fp += 1 tp = (idx + 1) - fp fn = p - tp tn = total - (idx + 1) - fn acc = (tp + tn) / (total) if acc > max_acc: self.opt_alpha = alpha max_acc = acc print("Train acc : {}".format(max_acc))
def featurize_fn(voca, voca2idx, datapoint): rm_list, label = datapoint nonzero = lfilter(lambda x: x > 0, right(rm_list)) if nonzero: nonzero_min = min(nonzero) else: nonzero_min = 0 terms = left(rm_list) term_ids = lmap(lambda x: voca2idx[x], terms) scores = list([s if s > 0 else 0.2 * nonzero_min for s in right(rm_list)]) v = np.zeros([len(voca)]) for idx, score in zip(term_ids, scores): v[idx] = score return v, label
def stat(): data = load_all_aawd_alignment() print(len(data)) y_labels = right(data) print(len(y_labels)) counter = Counter(y_labels) print(counter)
def main(): score_d: Dict[Tuple[str, str, int], float] = load_from_pickle("robust_score_d") score_d2: Dict[Tuple[str, str, int], float] = load_from_pickle("robust_score_d2") qrel: Dict[str, Dict[str, int]] = load_robust_qrel() query_grouped = group_by(score_d.keys(), get_first) counter = Counter() for query_id in query_grouped: keys: List[Tuple[str, str, int]] = query_grouped[query_id] doc_id_grouped = group_by(keys, get_second) qrel_part = qrel[query_id] if query_id in qrel else {} for doc_id in doc_id_grouped: label: int = qrel_part[doc_id] if doc_id in qrel_part else 0 cur_keys: List[Tuple[str, str, int]] = doc_id_grouped[doc_id] if len(cur_keys) == 1: continue summary = [] summary2 = [] for key in cur_keys: query_id2, doc_id2, passage_idx = key assert query_id2 == query_id assert doc_id2 == doc_id score = score_d[key] score2 = score_d2[key] summary.append((passage_idx, score)) summary2.append((passage_idx, score2)) summary.sort(key=get_first) summary2.sort(key=get_first) max_idx = int(argmax(right(summary))) max_idx2 = int(argmax(right(summary2))) if label: if max_idx == max_idx2: counter[1] += 1 else: counter[0] += 1 print(counter) accuracy = counter[1] / (counter[0] + counter[1]) print("accuracy {}".format(accuracy))
def get_argu_pointwise_data(): load_data = load_argu_data_from_pickle global argu_pointwise_preload if argu_pointwise_preload is not None: return argu_pointwise_preload tprint("get_argu_pointwise_data") train_data: List[Tuple[Passage, int]] = load_data("training") dev_data = load_data("validation") def get_texts(e: Tuple[Passage, int]) -> str: return e[0].text.replace("\n", " ") train_x: List[str] = lmap(get_texts, train_data) train_y: List[int] = right(train_data) dev_x: List[str] = lmap(get_texts, dev_data) dev_y: List[int] = right(dev_data) argu_pointwise_preload = train_x, train_y, dev_x, dev_y return argu_pointwise_preload
def sample_kdps(qk_list: List[QKUnit]) -> List[QKUnit]: n = 4 def sample(l: List[KDP]): random.shuffle(l) return l[:n] right_things = lmap(sample, right(qk_list)) return list(zip(left(qk_list), right_things))
def main(): save_name = sys.argv[1] score_d: Dict[Tuple[str, str, int], float] = load_from_pickle(save_name) qrel: Dict[str, Dict[str, int]] = load_robust_qrel() query_grouped = group_by(score_d.keys(), get_first) for query_id in query_grouped: keys: List[Tuple[str, str, int]] = query_grouped[query_id] doc_id_grouped = group_by(keys, get_second) qrel_part = qrel[query_id] if query_id in qrel else {} pos_rows = [] neg_rows = [] for doc_id in doc_id_grouped: label: int = qrel_part[doc_id] if doc_id in qrel_part else 0 cur_keys: List[Tuple[str, str, int]] = doc_id_grouped[doc_id] summary = [] for key in cur_keys: query_id2, doc_id2, passage_idx = key assert query_id2 == query_id assert doc_id2 == doc_id score = score_d[key] summary.append((passage_idx, score)) summary.sort(key=get_first) max_idx = int(argmax(right(summary))) score_str = list(["{0:.5f}".format(s) for s in right(summary)]) max_passage_idx = summary[max_idx][0] row = [str(max_passage_idx)] + score_str if label: pos_rows.append(row) else: neg_rows.append(row) print(query_id) print("Positive") print_table(pos_rows) print("Negative") print_table(neg_rows[:30])
def get_scores(r: List[Tuple[int, int]]) -> Dict: tp = sum([1 if a == b == 1 else 0 for a, b in r]) tn = sum([1 if a == b == 0 else 0 for a, b in r]) accuracy = (tp + tn) / len(r) pp = sum(left(r)) precision = tp / pp if pp != 0 else 0 recall = tp / sum(right(r)) return {'accuracy': accuracy, 'precision': precision, 'recall': recall}
def combine_ranked_list(ranked_list_list): data = [] for d in ranked_list_list: for query, ranked_list in d.items(): data.append((query, ranked_list)) new_d = {} key_fn = lambda x: x[0] for key, sub_data in group_by(data, key_fn).items(): ranked_list = right(sub_data) new_d[key] = merge_ranked_list_list(ranked_list) return new_d
def get_scores(option, pred_path: FilePath) -> Tuple[List[str], List[np.ndarray]]: raw_predictions: List[Tuple[str, List[np.ndarray]]] = load_prediction(pred_path) if option == "avg": def reducer(data: List[np.ndarray]) -> np.ndarray: np_arr: np.ndarray = np.array(data) return np_arr.mean(axis=0) else: assert False keys = left(raw_predictions) reduced_scores = lmap(reducer, right(raw_predictions)) return keys, reduced_scores
def filter_with_ranked_list( qk_untis: List[QKUnit], ranked_list_d: Dict[str, List[TrecRankedListEntry]], threshold, top_k, ) -> List[QKUnit]: out_qk_units = [] for q, k_list in qk_untis: try: cur_ranked_list = ranked_list_d[q.query_id] entries: Dict[str, TrecRankedListEntry] = { e.doc_id: e for e in cur_ranked_list } n_k_list = len(k_list) not_found_set = set() def get_score(k: KDP): key = k.to_str() if key in entries: s: TrecRankedListEntry = entries[key] return s.score else: not_found_set.add(key) return -1e10 k_list.sort(key=get_score, reverse=True) def higher(k: KDP) -> bool: return get_score(k) >= threshold if threshold is not None: k_list = lfilter(higher, k_list) if top_k is None or top_k == -1: pass else: k_list = k_list[:top_k] out_qk_units.append((q, k_list)) if not_found_set: print("For query {}, {} of {} do not have score".format( q.query_id, len(not_found_set), n_k_list)) except KeyError as e: print(e, "KeyError", q.query_id) print(lmap(len, right(out_qk_units))) return out_qk_units
def load_multiple_ranked_list(dir_path, get_key_from_name): files = get_dir_files(dir_path) data = [] for file_path in files: name = os.path.basename(file_path) ranked_list_d = load_galago_ranked_list(file_path) for query, ranked_list in ranked_list_d.items(): data.append((name, ranked_list)) new_d = {} key_fn = lambda x: get_key_from_name(x[0]) for key, sub_data in group_by(data, key_fn).items(): ranked_list = right(sub_data) new_d[key] = merge_ranked_list_list(ranked_list) return new_d
def eval( score_pred_file_name: FileName, cpid_resolute_file: FileName, n_way=3, ): topic = "abortion" pred_path: FilePath = pjoin(output_path, score_pred_file_name) dpid_resolute: Dict[str, DPID] = load_dpid_resolute(cpid_resolute_file) score_d: Dict[DPID, np.ndarray] = get_datapoint_score(pred_path, dpid_resolute, "avg") def argmax(arr: np.ndarray) -> int: return arr.argmax() pred_d: Dict[DPID, int] = dict_value_map(argmax, score_d) dev_labels = get_dev_labels(topic) if n_way == 2: def merge_label(e): dpid, label = e return dpid, { 0: 0, 1: 1, 2: 1, }[label] dev_labels = lmap(merge_label, dev_labels) def fetch_pred(e: Tuple[DPID, int]): dpid, label = e pred = pred_d[dpid] return pred gold_list: List[int] = right(dev_labels) pred_list: List[int] = lmap(fetch_pred, dev_labels) if n_way == 3: all_result = eval_3label(gold_list, pred_list) elif n_way == 2: all_result = eval_2label(gold_list, pred_list) else: assert False print(all_result) f1 = sum([result['f1'] for result in all_result]) / n_way print("Avg F1 : ", f1)
def summarize_score(info_dir, prediction_file) -> Dict[CPIDPair, float]: info = load_combine_info_jsons(info_dir) print("Info has {} entries".format(len(info))) def logit_to_score_reg(logit): return logit[0] def logit_to_score_softmax(logit): return scipy.special.softmax(logit)[1] scores: Dict[DataID, Tuple[CPIDPair, float]] = collect_score.collect_scores(prediction_file, info, logit_to_score_softmax) grouped = group_by(scores.values(), lambda x: x[0]) print("Group size:", len(grouped)) out_d = {} for cpid, items in grouped.items(): final_score = sum(right(items)) out_d[cpid] = final_score num_items_per_group = average(lmap(len, grouped.values())) print("Num items per group : ", num_items_per_group) return out_d
def reduce_score(raw_predictions: List[Tuple[str, List[float]]], option) -> Tuple[List[str], List[float]]: if option == "avg": def avg_fn(l): r = average(l) cnt = 0 for t in l: if abs(t - r) > 0.5: cnt += 1 print(l) return average(l) reducer: Callable[[List[Any]], float] = avg_fn elif option == "max": reducer: Callable[[List[Any]], float] = max else: assert False keys = left(raw_predictions) reduced_scores = lmap(reducer, right(raw_predictions)) return keys, reduced_scores
def filter_qk_rel(qk_candidate: List[QKUnit], query_lms: Dict[str, Counter], top_n=50) -> List[QKUnit]: scorer = LMScorer(query_lms) filtered_qk_list: List[QKUnit] = [] ticker = TimeEstimator(len(qk_candidate)) for query, k_candidates in qk_candidate: def get_kdp_score(kdp: KDP) -> float: return scorer.score(query.query_id, kdp.tokens) k_candidates.sort(key=get_kdp_score, reverse=True) good_kdps: List[KDP] = k_candidates[:top_n] filtered_qk_list.append((query, good_kdps)) ticker.tick() n_no_kdp_query = sum( lmap(lambda l: 1 if not l else 0, right(filtered_qk_list))) print("{} queries, {} has no kdp ".format(len(qk_candidate), n_no_kdp_query)) return filtered_qk_list
def filter_qk(qk_candidate: List[QKUnit], query_lms: Dict[str, Counter], alpha=0.5) -> List[QKUnit]: scorer = LMScorer(query_lms, alpha) filtered_qk_list: List[QKUnit] = [] ticker = TimeEstimator(len(qk_candidate)) for query, k_candidates in qk_candidate: def get_kdp_score(kdp: KDP) -> float: return scorer.score(query.query_id, kdp.tokens) good_kdps: List[KDP] = lfilter(lambda kdp: get_kdp_score(kdp) > 0, k_candidates) filtered_qk_list.append((query, good_kdps)) ticker.tick() n_no_kdp_query = sum( lmap(lambda l: 1 if not l else 0, right(filtered_qk_list))) print("{} queries, {} has no kdp ".format(len(qk_candidate), n_no_kdp_query)) return filtered_qk_list
def main(): save_name = "alamri_pair" info_entries, output_d = load_from_pickle(save_name) html = HtmlVisualizer("alamri_pairing_deletion.html", use_tooltip=True) initial_text = load_p_h_pair_text( at_output_dir("alamri_pilot", "true_pair_small.csv")) per_group_summary: List[PerGroupSummary] = summarize_pair_deletion_results( info_entries, output_d) def float_arr_to_str_arr(float_arr): return list(map(two_digit_float, float_arr)) def float_arr_to_cell(head, float_arr): return [Cell(head)] + lmap(Cell, map(two_digit_float, float_arr)) def float_arr_to_cell2(head, float_arr): return [Cell(head)] + lmap(Cell, map("{0:.4f}".format, float_arr)) num_data = len(output_d['input_ids']) for data_idx, (p, h) in enumerate(initial_text): group_summary = per_group_summary[data_idx] p_tokens = p.split() h_tokens = h.split() base_score = group_summary.score_d[(-1, -1)] pred_str = make_prediction_summary_str(base_score) html.write_paragraph("Prediction: {}".format(pred_str)) keys = list(group_summary.score_d.keys()) p_idx_max = max(left(keys)) h_idx_max = max(right(keys)) def get_pair_score_by_h(key): p_score, h_score = group_summary.effect_d[key] return h_score def get_pair_score_by_p(key): p_score, h_score = group_summary.effect_d[key] return p_score def get_table(get_pair_score_at): head = [Cell("")] + [Cell(t) for t in p_tokens] rows = [head] for h_idx in range(h_idx_max + 1): row = [Cell(h_tokens[h_idx])] for p_idx in range(p_idx_max + 1): s = get_pair_score_at((p_idx, h_idx)) one_del_score = group_summary.score_d[(p_idx, -1)] two_del_score = group_summary.score_d[(p_idx, h_idx)] tooltip_str = "{} -> {}".format( float_arr_to_str_arr(one_del_score), float_arr_to_str_arr(two_del_score)) row.append( get_tooltip_cell(two_digit_float(s), tooltip_str)) rows.append(row) return rows html.write_table(get_table(get_pair_score_by_p)) html.write_table(get_table(get_pair_score_by_h)) html.write_bar()
def analyze_gradient(data, tokenizer): gradients = data['gradients'] d_input_ids = data['d_input_ids'] mask_input_ids = data['masked_input_ids'] masked_lm_positions = data["masked_lm_positions"] n_inst, seq_len = mask_input_ids.shape n_inst2, def_len = d_input_ids.shape assert n_inst == n_inst2 def_len = 256 hidden_dim = 768 reshaped_grad = reshape_gradienet(gradients, n_inst, def_len, hidden_dim) print(reshaped_grad.shape) n_pred = reshaped_grad.shape[1] grad_per_token = np.sum(np.abs(reshaped_grad), axis=3) html_writer = HtmlVisualizer("dict_grad.html", dark_mode=False) for inst_idx in range(n_inst): tokens = tokenizer.convert_ids_to_tokens(mask_input_ids[inst_idx]) #ans_tokens = tokenizer.convert_ids_to_tokens(input_ids[inst_idx]) for i in range(len(tokens)): if tokens[i] == "[MASK]": tokens[i] = "[MASK_{}]".format(i) if tokens[i] == "[SEP]": tokens[i] = "[SEP]<br>" def_tokens = tokenizer.convert_ids_to_tokens(d_input_ids[inst_idx]) s = tokenizer_wo_tf.pretty_tokens(tokens) lines = [] grad_total_max = 0 for pred_idx in range(n_pred): row = [] max_val = max(grad_per_token[inst_idx, pred_idx]) total = sum(grad_per_token[inst_idx, pred_idx]) mask_pos = masked_lm_positions[inst_idx, pred_idx] if total > grad_total_max: grad_total_max = total row.append(Cell(mask_pos)) row.append(Cell(int(total))) for def_idx in range(def_len): term = def_tokens[def_idx] cont_right = def_idx + 1 < def_len and def_tokens[ def_idx][:2] == "##" cont_left = term[:2] == "##" space_left = " " if not cont_left else "" space_right = " " if not cont_right else "" if term == "[PAD]": break if term == "[unused5]": term = "[\\n]" score = grad_per_token[inst_idx, pred_idx, def_idx] / (hidden_dim * 2) bg_color = get_color(score) row.append(Cell(term, score, not cont_left, not cont_right)) print("{}({})".format( term, grad_per_token[inst_idx, pred_idx, def_idx]), end=" ") lines.append((mask_pos, row)) print("") lines.sort(key=lambda x: x[0]) s = s.replace("[unused4]", "<b>DictTerm</b>") html_writer.write_paragraph(s) if grad_total_max > 5000000: html_writer.write_headline("HIGH Gradient") rows = right(lines) html_writer.write_table(rows) print("----------") html_writer.close()
def normalize_right(pair_list): right_scores = normalize(right(pair_list)) return list(zip(left(pair_list), right_scores))
def doc_lm_scoring(): gold = get_claim_perspective_id_dict() d_ids = list(load_train_claim_ids()) claims: List[Dict] = get_claims_from_ids(d_ids) claims = claims top_n = 10 q_res_path = FilePath( "/mnt/nfs/work3/youngwookim/data/perspective/train_claim/q_res_100") ranked_list: Dict[ str, List[SimpleRankedListEntry]] = load_galago_ranked_list(q_res_path) preload_docs(ranked_list, claims, top_n) claim_lms = build_gold_lms(claims) claim_lms_d = {lm.cid: lm for lm in claim_lms} bg_lm = average_counters(lmap(lambda x: x.LM, claim_lms)) log_bg_lm = get_lm_log(bg_lm) stopwords = load_stopwords_for_query() alpha = 0.5 html_visualizer = HtmlVisualizer("doc_lm_doc_level.html") tokenizer = PCTokenizer() random_passages = [] num_pos_sum = 0 num_pos_exists = 0 for c in claims: q_res: List[SimpleRankedListEntry] = ranked_list[str(c['cId'])] html_visualizer.write_headline("{} : {}".format(c['cId'], c['text'])) # for cluster in clusters: # html_visualizer.write_paragraph("---") # p_text_list: List[str] = lmap(perspective_getter, cluster) # for text in p_text_list: # html_visualizer.write_paragraph(text) # html_visualizer.write_paragraph("---") claim_lm = claim_lms_d[c['cId']] topic_lm_prob = smooth(claim_lm.LM, bg_lm, alpha) log_topic_lm = get_lm_log(smooth(claim_lm.LM, bg_lm, alpha)) log_odd: Counter = subtract(log_topic_lm, log_bg_lm) claim_text = c['text'] claim_tokens = tokenizer.tokenize_stem(claim_text) scores = [] for t in claim_tokens: if t in log_odd: scores.append(log_odd[t]) threshold = average(scores) s = "\t".join(left(log_odd.most_common(30))) html_visualizer.write_paragraph("Log odd top: " + s) not_found = set() def get_log_odd(x): x = tokenizer.stemmer.stem(x) if x not in log_odd: not_found.add(x) return log_odd[x] def get_probs(x): x = tokenizer.stemmer.stem(x) if x not in topic_lm_prob: not_found.add(x) return topic_lm_prob[x] def get_passage_score(p): return sum([log_odd[tokenizer.stemmer.stem(t)] for t in p]) / len(p) if len(p) > 0 else 0 passages = iterate_passages(q_res, top_n, get_passage_score) passages.sort(key=lambda x: x[1], reverse=True) html_visualizer.write_paragraph("Threshold {}".format(threshold)) top5_scores = right(passages[:5]) bot5_scores = right(passages[-5:]) if len(random_passages) > 5: random_sel_pssages = random.choices(random_passages, k=5) else: random_sel_pssages = [] random5_scores = lmap(get_passage_score, random_sel_pssages) def score_line(scores): return " ".join(lmap(two_digit_float, scores)) html_visualizer.write_paragraph("top 5: " + score_line(top5_scores)) html_visualizer.write_paragraph("bot 5: " + score_line(bot5_scores)) html_visualizer.write_paragraph("random 5: " + score_line(random5_scores)) num_pos = len(lfilter(lambda x: x[1] > 0, passages)) num_pos_sum += num_pos if num_pos > 0: num_pos_exists += 1 def print_doc(doc, html_visualizer, score): cells = lmap(lambda x: get_cell_from_token(x, get_log_odd(x)), doc) html_visualizer.write_headline("score={}".format(score)) html_visualizer.multirow_print(cells, width=20) random_passages.extend(left(passages)) if threshold < 0: continue for doc, score in passages: if score < 0: break print_doc(doc, html_visualizer, score) html_visualizer.write_headline("Bottom 5") for doc, score in passages[-5:]: print_doc(doc, html_visualizer, score) print("{} claims. {} docs on {} claims".format(len(claims), num_pos_sum, num_pos_exists))