Beispiel #1
0
def combined_agreement(path):
    data = load_stance_verify_annot(path)
    group = {}
    sig2data = {}
    for e in data:
        sig = e['statement'] + e['link']
        sig2data[sig] = e['statement'], e['link']
        if sig not in group:
            group[sig] = []

        group[sig].append((e['support'], e['dispute']))

    NOT_FOUND = 0
    YES = 1
    NOT_SURE = 2

    def get_cont_annot(annot_idx):
        statement_group = {}
        cont_annot = []
        for sig in group:
            statement, link = sig2data[sig]
            s, d = group[sig][annot_idx]
            if statement not in statement_group:
                statement_group[statement] = []
            statement_group[statement].append((link, s, d))

        for statement in statement_group:
            s_yes_cnt = 0
            d_yes_cnt = 0

            for link, s, d in statement_group[statement]:
                if s == YES:
                    s_yes_cnt += 1
                if d == YES:
                    d_yes_cnt += 1
            if s_yes_cnt > 0 and d_yes_cnt > 0:
                cont = True
            else:
                cont = False
            cont_annot.append((statement, cont))
        return cont_annot

    annot1 = get_cont_annot(0)
    annot2 = get_cont_annot(1)

    annot1.sort(key=lambda x: x[0])
    annot2.sort(key=lambda x: x[0])

    for e1, e2 in zip(annot1, annot2):
        assert e1[0] == e2[0]

    kappa, p0 = binary_kappa(right(annot1), right(annot2))
    print("kappa", kappa)
    print("p0", p0)
Beispiel #2
0
def get_aawd_binary_train_dev():
    global aawd_train_dev_preload
    if aawd_train_dev_preload is not None:
        return aawd_train_dev_preload
    train, dev, test = load_aawd_splits_as_binary()
    train_x = left(train)
    train_y = right(train)
    dev_x = left(dev)
    dev_y = right(dev)
    aawd_train_dev_preload = train_x, train_y, dev_x, dev_y
    return aawd_train_dev_preload
Beispiel #3
0
    def tune_alpha(self, xy):
        vectors = []
        for x_i, y_i in xy:
            odd = self.counter_odd(x_i)
            vectors.append((odd, y_i))
        vectors.sort(key=lambda x: x[0], reverse=True)

        total = len(vectors)
        p = sum(right(xy))
        fp = 0
        max_acc = 0
        self.opt_alpha = 0
        for idx, (odd, label) in enumerate(vectors):
            alpha = odd - 1e-8
            if label == 0:
                fp += 1

            tp = (idx + 1) - fp
            fn = p - tp
            tn = total - (idx + 1) - fn
            acc = (tp + tn) / (total)
            if acc > max_acc:
                self.opt_alpha = alpha
                max_acc = acc

        print("Train acc : {}".format(max_acc))
Beispiel #4
0
def featurize_fn(voca, voca2idx, datapoint):
    rm_list, label = datapoint
    nonzero = lfilter(lambda x: x > 0, right(rm_list))
    if nonzero:
        nonzero_min = min(nonzero)
    else:
        nonzero_min = 0

    terms = left(rm_list)
    term_ids = lmap(lambda x: voca2idx[x], terms)
    scores = list([s if s > 0 else 0.2 * nonzero_min for s in right(rm_list)])

    v = np.zeros([len(voca)])
    for idx, score in zip(term_ids, scores):
        v[idx] = score
    return v, label
Beispiel #5
0
def stat():
    data = load_all_aawd_alignment()
    print(len(data))
    y_labels = right(data)
    print(len(y_labels))
    counter = Counter(y_labels)
    print(counter)
Beispiel #6
0
def main():
    score_d: Dict[Tuple[str, str, int],
                  float] = load_from_pickle("robust_score_d")
    score_d2: Dict[Tuple[str, str, int],
                   float] = load_from_pickle("robust_score_d2")

    qrel: Dict[str, Dict[str, int]] = load_robust_qrel()
    query_grouped = group_by(score_d.keys(), get_first)

    counter = Counter()
    for query_id in query_grouped:
        keys: List[Tuple[str, str, int]] = query_grouped[query_id]

        doc_id_grouped = group_by(keys, get_second)

        qrel_part = qrel[query_id] if query_id in qrel else {}
        for doc_id in doc_id_grouped:
            label: int = qrel_part[doc_id] if doc_id in qrel_part else 0
            cur_keys: List[Tuple[str, str, int]] = doc_id_grouped[doc_id]
            if len(cur_keys) == 1:
                continue
            summary = []
            summary2 = []
            for key in cur_keys:
                query_id2, doc_id2, passage_idx = key
                assert query_id2 == query_id
                assert doc_id2 == doc_id
                score = score_d[key]
                score2 = score_d2[key]
                summary.append((passage_idx, score))
                summary2.append((passage_idx, score2))

            summary.sort(key=get_first)
            summary2.sort(key=get_first)

            max_idx = int(argmax(right(summary)))
            max_idx2 = int(argmax(right(summary2)))

            if label:
                if max_idx == max_idx2:
                    counter[1] += 1
                else:
                    counter[0] += 1

    print(counter)
    accuracy = counter[1] / (counter[0] + counter[1])
    print("accuracy {}".format(accuracy))
Beispiel #7
0
def get_argu_pointwise_data():
    load_data = load_argu_data_from_pickle
    global argu_pointwise_preload
    if argu_pointwise_preload is not None:
        return argu_pointwise_preload
    tprint("get_argu_pointwise_data")
    train_data: List[Tuple[Passage, int]] = load_data("training")
    dev_data = load_data("validation")

    def get_texts(e: Tuple[Passage, int]) -> str:
        return e[0].text.replace("\n", " ")

    train_x: List[str] = lmap(get_texts, train_data)
    train_y: List[int] = right(train_data)
    dev_x: List[str] = lmap(get_texts, dev_data)
    dev_y: List[int] = right(dev_data)
    argu_pointwise_preload = train_x, train_y, dev_x, dev_y
    return argu_pointwise_preload
Beispiel #8
0
def sample_kdps(qk_list: List[QKUnit]) -> List[QKUnit]:
    n = 4

    def sample(l: List[KDP]):
        random.shuffle(l)
        return l[:n]

    right_things = lmap(sample, right(qk_list))
    return list(zip(left(qk_list), right_things))
Beispiel #9
0
def main():
    save_name = sys.argv[1]
    score_d: Dict[Tuple[str, str, int], float] = load_from_pickle(save_name)

    qrel: Dict[str, Dict[str, int]] = load_robust_qrel()
    query_grouped = group_by(score_d.keys(), get_first)

    for query_id in query_grouped:
        keys: List[Tuple[str, str, int]] = query_grouped[query_id]

        doc_id_grouped = group_by(keys, get_second)

        qrel_part = qrel[query_id] if query_id in qrel else {}
        pos_rows = []
        neg_rows = []
        for doc_id in doc_id_grouped:
            label: int = qrel_part[doc_id] if doc_id in qrel_part else 0
            cur_keys: List[Tuple[str, str, int]] = doc_id_grouped[doc_id]
            summary = []
            for key in cur_keys:
                query_id2, doc_id2, passage_idx = key
                assert query_id2 == query_id
                assert doc_id2 == doc_id
                score = score_d[key]
                summary.append((passage_idx, score))

            summary.sort(key=get_first)

            max_idx = int(argmax(right(summary)))

            score_str = list(["{0:.5f}".format(s) for s in right(summary)])

            max_passage_idx = summary[max_idx][0]
            row = [str(max_passage_idx)] + score_str
            if label:
                pos_rows.append(row)
            else:
                neg_rows.append(row)

        print(query_id)
        print("Positive")
        print_table(pos_rows)
        print("Negative")
        print_table(neg_rows[:30])
Beispiel #10
0
def get_scores(r: List[Tuple[int, int]]) -> Dict:
    tp = sum([1 if a == b == 1 else 0 for a, b in r])
    tn = sum([1 if a == b == 0 else 0 for a, b in r])
    accuracy = (tp + tn) / len(r)

    pp = sum(left(r))
    precision = tp / pp if pp != 0 else 0
    recall = tp / sum(right(r))

    return {'accuracy': accuracy, 'precision': precision, 'recall': recall}
Beispiel #11
0
def combine_ranked_list(ranked_list_list):
    data = []
    for d in ranked_list_list:
        for query, ranked_list in d.items():
            data.append((query, ranked_list))

    new_d = {}
    key_fn = lambda x: x[0]
    for key, sub_data in group_by(data, key_fn).items():
        ranked_list = right(sub_data)
        new_d[key] = merge_ranked_list_list(ranked_list)
    return new_d
Beispiel #12
0
def get_scores(option,
               pred_path: FilePath) -> Tuple[List[str], List[np.ndarray]]:
    raw_predictions: List[Tuple[str,
                                List[np.ndarray]]] = load_prediction(pred_path)
    if option == "avg":

        def reducer(data: List[np.ndarray]) -> np.ndarray:
            np_arr: np.ndarray = np.array(data)
            return np_arr.mean(axis=0)
    else:
        assert False
    keys = left(raw_predictions)
    reduced_scores = lmap(reducer, right(raw_predictions))
    return keys, reduced_scores
Beispiel #13
0
def filter_with_ranked_list(
    qk_untis: List[QKUnit],
    ranked_list_d: Dict[str, List[TrecRankedListEntry]],
    threshold,
    top_k,
) -> List[QKUnit]:

    out_qk_units = []
    for q, k_list in qk_untis:
        try:
            cur_ranked_list = ranked_list_d[q.query_id]
            entries: Dict[str, TrecRankedListEntry] = {
                e.doc_id: e
                for e in cur_ranked_list
            }
            n_k_list = len(k_list)

            not_found_set = set()

            def get_score(k: KDP):
                key = k.to_str()
                if key in entries:
                    s: TrecRankedListEntry = entries[key]
                    return s.score
                else:
                    not_found_set.add(key)
                    return -1e10

            k_list.sort(key=get_score, reverse=True)

            def higher(k: KDP) -> bool:
                return get_score(k) >= threshold

            if threshold is not None:
                k_list = lfilter(higher, k_list)

            if top_k is None or top_k == -1:
                pass
            else:
                k_list = k_list[:top_k]
            out_qk_units.append((q, k_list))
            if not_found_set:
                print("For query {}, {} of {} do not have score".format(
                    q.query_id, len(not_found_set), n_k_list))
        except KeyError as e:
            print(e, "KeyError", q.query_id)

    print(lmap(len, right(out_qk_units)))
    return out_qk_units
def load_multiple_ranked_list(dir_path, get_key_from_name):
    files = get_dir_files(dir_path)

    data = []
    for file_path in files:
        name = os.path.basename(file_path)
        ranked_list_d = load_galago_ranked_list(file_path)
        for query, ranked_list in ranked_list_d.items():
            data.append((name, ranked_list))

    new_d = {}
    key_fn = lambda x: get_key_from_name(x[0])
    for key, sub_data in group_by(data, key_fn).items():
        ranked_list = right(sub_data)
        new_d[key] = merge_ranked_list_list(ranked_list)

    return new_d
Beispiel #15
0
def eval(
    score_pred_file_name: FileName,
    cpid_resolute_file: FileName,
    n_way=3,
):
    topic = "abortion"
    pred_path: FilePath = pjoin(output_path, score_pred_file_name)
    dpid_resolute: Dict[str, DPID] = load_dpid_resolute(cpid_resolute_file)
    score_d: Dict[DPID,
                  np.ndarray] = get_datapoint_score(pred_path, dpid_resolute,
                                                    "avg")

    def argmax(arr: np.ndarray) -> int:
        return arr.argmax()

    pred_d: Dict[DPID, int] = dict_value_map(argmax, score_d)

    dev_labels = get_dev_labels(topic)
    if n_way == 2:

        def merge_label(e):
            dpid, label = e
            return dpid, {
                0: 0,
                1: 1,
                2: 1,
            }[label]

        dev_labels = lmap(merge_label, dev_labels)

    def fetch_pred(e: Tuple[DPID, int]):
        dpid, label = e
        pred = pred_d[dpid]
        return pred

    gold_list: List[int] = right(dev_labels)
    pred_list: List[int] = lmap(fetch_pred, dev_labels)
    if n_way == 3:
        all_result = eval_3label(gold_list, pred_list)
    elif n_way == 2:
        all_result = eval_2label(gold_list, pred_list)
    else:
        assert False
    print(all_result)
    f1 = sum([result['f1'] for result in all_result]) / n_way
    print("Avg F1 : ", f1)
Beispiel #16
0
def summarize_score(info_dir, prediction_file) -> Dict[CPIDPair, float]:
    info = load_combine_info_jsons(info_dir)
    print("Info has {} entries".format(len(info)))
    def logit_to_score_reg(logit):
        return logit[0]

    def logit_to_score_softmax(logit):
        return scipy.special.softmax(logit)[1]

    scores: Dict[DataID, Tuple[CPIDPair, float]] = collect_score.collect_scores(prediction_file, info, logit_to_score_softmax)
    grouped = group_by(scores.values(), lambda x: x[0])
    print("Group size:", len(grouped))
    out_d = {}
    for cpid, items in grouped.items():
        final_score = sum(right(items))
        out_d[cpid] = final_score

    num_items_per_group = average(lmap(len, grouped.values()))
    print("Num items per group : ", num_items_per_group)
    return out_d
Beispiel #17
0
def reduce_score(raw_predictions: List[Tuple[str, List[float]]],
                 option) -> Tuple[List[str], List[float]]:
    if option == "avg":

        def avg_fn(l):
            r = average(l)
            cnt = 0
            for t in l:
                if abs(t - r) > 0.5:
                    cnt += 1
            print(l)
            return average(l)

        reducer: Callable[[List[Any]], float] = avg_fn
    elif option == "max":
        reducer: Callable[[List[Any]], float] = max
    else:
        assert False
    keys = left(raw_predictions)
    reduced_scores = lmap(reducer, right(raw_predictions))
    return keys, reduced_scores
Beispiel #18
0
def filter_qk_rel(qk_candidate: List[QKUnit],
                  query_lms: Dict[str, Counter],
                  top_n=50) -> List[QKUnit]:
    scorer = LMScorer(query_lms)

    filtered_qk_list: List[QKUnit] = []
    ticker = TimeEstimator(len(qk_candidate))
    for query, k_candidates in qk_candidate:

        def get_kdp_score(kdp: KDP) -> float:
            return scorer.score(query.query_id, kdp.tokens)

        k_candidates.sort(key=get_kdp_score, reverse=True)
        good_kdps: List[KDP] = k_candidates[:top_n]
        filtered_qk_list.append((query, good_kdps))
        ticker.tick()

    n_no_kdp_query = sum(
        lmap(lambda l: 1 if not l else 0, right(filtered_qk_list)))
    print("{} queries, {} has no kdp ".format(len(qk_candidate),
                                              n_no_kdp_query))
    return filtered_qk_list
Beispiel #19
0
def filter_qk(qk_candidate: List[QKUnit],
              query_lms: Dict[str, Counter],
              alpha=0.5) -> List[QKUnit]:
    scorer = LMScorer(query_lms, alpha)

    filtered_qk_list: List[QKUnit] = []
    ticker = TimeEstimator(len(qk_candidate))
    for query, k_candidates in qk_candidate:

        def get_kdp_score(kdp: KDP) -> float:
            return scorer.score(query.query_id, kdp.tokens)

        good_kdps: List[KDP] = lfilter(lambda kdp: get_kdp_score(kdp) > 0,
                                       k_candidates)
        filtered_qk_list.append((query, good_kdps))
        ticker.tick()

    n_no_kdp_query = sum(
        lmap(lambda l: 1 if not l else 0, right(filtered_qk_list)))
    print("{} queries, {} has no kdp ".format(len(qk_candidate),
                                              n_no_kdp_query))
    return filtered_qk_list
Beispiel #20
0
def main():
    save_name = "alamri_pair"
    info_entries, output_d = load_from_pickle(save_name)
    html = HtmlVisualizer("alamri_pairing_deletion.html", use_tooltip=True)
    initial_text = load_p_h_pair_text(
        at_output_dir("alamri_pilot", "true_pair_small.csv"))
    per_group_summary: List[PerGroupSummary] = summarize_pair_deletion_results(
        info_entries, output_d)

    def float_arr_to_str_arr(float_arr):
        return list(map(two_digit_float, float_arr))

    def float_arr_to_cell(head, float_arr):
        return [Cell(head)] + lmap(Cell, map(two_digit_float, float_arr))

    def float_arr_to_cell2(head, float_arr):
        return [Cell(head)] + lmap(Cell, map("{0:.4f}".format, float_arr))

    num_data = len(output_d['input_ids'])
    for data_idx, (p, h) in enumerate(initial_text):
        group_summary = per_group_summary[data_idx]

        p_tokens = p.split()
        h_tokens = h.split()

        base_score = group_summary.score_d[(-1, -1)]
        pred_str = make_prediction_summary_str(base_score)
        html.write_paragraph("Prediction: {}".format(pred_str))

        keys = list(group_summary.score_d.keys())
        p_idx_max = max(left(keys))
        h_idx_max = max(right(keys))

        def get_pair_score_by_h(key):
            p_score, h_score = group_summary.effect_d[key]
            return h_score

        def get_pair_score_by_p(key):
            p_score, h_score = group_summary.effect_d[key]
            return p_score

        def get_table(get_pair_score_at):
            head = [Cell("")] + [Cell(t) for t in p_tokens]
            rows = [head]
            for h_idx in range(h_idx_max + 1):
                row = [Cell(h_tokens[h_idx])]
                for p_idx in range(p_idx_max + 1):
                    s = get_pair_score_at((p_idx, h_idx))
                    one_del_score = group_summary.score_d[(p_idx, -1)]
                    two_del_score = group_summary.score_d[(p_idx, h_idx)]
                    tooltip_str = "{} -> {}".format(
                        float_arr_to_str_arr(one_del_score),
                        float_arr_to_str_arr(two_del_score))
                    row.append(
                        get_tooltip_cell(two_digit_float(s), tooltip_str))
                rows.append(row)
            return rows

        html.write_table(get_table(get_pair_score_by_p))
        html.write_table(get_table(get_pair_score_by_h))
        html.write_bar()
Beispiel #21
0
def analyze_gradient(data, tokenizer):
    gradients = data['gradients']
    d_input_ids = data['d_input_ids']
    mask_input_ids = data['masked_input_ids']
    masked_lm_positions = data["masked_lm_positions"]

    n_inst, seq_len = mask_input_ids.shape
    n_inst2, def_len = d_input_ids.shape

    assert n_inst == n_inst2

    def_len = 256
    hidden_dim = 768
    reshaped_grad = reshape_gradienet(gradients, n_inst, def_len, hidden_dim)
    print(reshaped_grad.shape)

    n_pred = reshaped_grad.shape[1]

    grad_per_token = np.sum(np.abs(reshaped_grad), axis=3)

    html_writer = HtmlVisualizer("dict_grad.html", dark_mode=False)

    for inst_idx in range(n_inst):
        tokens = tokenizer.convert_ids_to_tokens(mask_input_ids[inst_idx])
        #ans_tokens = tokenizer.convert_ids_to_tokens(input_ids[inst_idx])
        for i in range(len(tokens)):
            if tokens[i] == "[MASK]":
                tokens[i] = "[MASK_{}]".format(i)
            if tokens[i] == "[SEP]":
                tokens[i] = "[SEP]<br>"
        def_tokens = tokenizer.convert_ids_to_tokens(d_input_ids[inst_idx])
        s = tokenizer_wo_tf.pretty_tokens(tokens)

        lines = []

        grad_total_max = 0
        for pred_idx in range(n_pred):
            row = []
            max_val = max(grad_per_token[inst_idx, pred_idx])
            total = sum(grad_per_token[inst_idx, pred_idx])
            mask_pos = masked_lm_positions[inst_idx, pred_idx]

            if total > grad_total_max:
                grad_total_max = total

            row.append(Cell(mask_pos))
            row.append(Cell(int(total)))

            for def_idx in range(def_len):
                term = def_tokens[def_idx]
                cont_right = def_idx + 1 < def_len and def_tokens[
                    def_idx][:2] == "##"
                cont_left = term[:2] == "##"

                space_left = "&nbsp;" if not cont_left else ""
                space_right = "&nbsp;" if not cont_right else ""

                if term == "[PAD]":
                    break
                if term == "[unused5]":
                    term = "[\\n]"

                score = grad_per_token[inst_idx, pred_idx,
                                       def_idx] / (hidden_dim * 2)
                bg_color = get_color(score)

                row.append(Cell(term, score, not cont_left, not cont_right))
                print("{}({})".format(
                    term, grad_per_token[inst_idx, pred_idx, def_idx]),
                      end=" ")

            lines.append((mask_pos, row))
            print("")
        lines.sort(key=lambda x: x[0])

        s = s.replace("[unused4]", "<b>DictTerm</b>")
        html_writer.write_paragraph(s)

        if grad_total_max > 5000000:
            html_writer.write_headline("HIGH Gradient")

        rows = right(lines)
        html_writer.write_table(rows)

        print("----------")
    html_writer.close()
Beispiel #22
0
 def normalize_right(pair_list):
     right_scores = normalize(right(pair_list))
     return list(zip(left(pair_list), right_scores))
Beispiel #23
0
def doc_lm_scoring():
    gold = get_claim_perspective_id_dict()

    d_ids = list(load_train_claim_ids())
    claims: List[Dict] = get_claims_from_ids(d_ids)
    claims = claims
    top_n = 10
    q_res_path = FilePath(
        "/mnt/nfs/work3/youngwookim/data/perspective/train_claim/q_res_100")
    ranked_list: Dict[
        str, List[SimpleRankedListEntry]] = load_galago_ranked_list(q_res_path)
    preload_docs(ranked_list, claims, top_n)
    claim_lms = build_gold_lms(claims)
    claim_lms_d = {lm.cid: lm for lm in claim_lms}
    bg_lm = average_counters(lmap(lambda x: x.LM, claim_lms))
    log_bg_lm = get_lm_log(bg_lm)

    stopwords = load_stopwords_for_query()
    alpha = 0.5

    html_visualizer = HtmlVisualizer("doc_lm_doc_level.html")

    tokenizer = PCTokenizer()
    random_passages = []
    num_pos_sum = 0
    num_pos_exists = 0
    for c in claims:
        q_res: List[SimpleRankedListEntry] = ranked_list[str(c['cId'])]
        html_visualizer.write_headline("{} : {}".format(c['cId'], c['text']))
        # for cluster in clusters:
        #     html_visualizer.write_paragraph("---")
        #     p_text_list: List[str] = lmap(perspective_getter, cluster)
        #     for text in p_text_list:
        #         html_visualizer.write_paragraph(text)
        #     html_visualizer.write_paragraph("---")
        claim_lm = claim_lms_d[c['cId']]
        topic_lm_prob = smooth(claim_lm.LM, bg_lm, alpha)
        log_topic_lm = get_lm_log(smooth(claim_lm.LM, bg_lm, alpha))
        log_odd: Counter = subtract(log_topic_lm, log_bg_lm)

        claim_text = c['text']
        claim_tokens = tokenizer.tokenize_stem(claim_text)

        scores = []
        for t in claim_tokens:
            if t in log_odd:
                scores.append(log_odd[t])
        threshold = average(scores)

        s = "\t".join(left(log_odd.most_common(30)))
        html_visualizer.write_paragraph("Log odd top: " + s)
        not_found = set()

        def get_log_odd(x):
            x = tokenizer.stemmer.stem(x)
            if x not in log_odd:
                not_found.add(x)
            return log_odd[x]

        def get_probs(x):
            x = tokenizer.stemmer.stem(x)
            if x not in topic_lm_prob:
                not_found.add(x)
            return topic_lm_prob[x]

        def get_passage_score(p):
            return sum([log_odd[tokenizer.stemmer.stem(t)]
                        for t in p]) / len(p) if len(p) > 0 else 0

        passages = iterate_passages(q_res, top_n, get_passage_score)

        passages.sort(key=lambda x: x[1], reverse=True)
        html_visualizer.write_paragraph("Threshold {}".format(threshold))

        top5_scores = right(passages[:5])
        bot5_scores = right(passages[-5:])

        if len(random_passages) > 5:
            random_sel_pssages = random.choices(random_passages, k=5)
        else:
            random_sel_pssages = []
        random5_scores = lmap(get_passage_score, random_sel_pssages)

        def score_line(scores):
            return " ".join(lmap(two_digit_float, scores))

        html_visualizer.write_paragraph("top 5: " + score_line(top5_scores))
        html_visualizer.write_paragraph("bot 5: " + score_line(bot5_scores))
        html_visualizer.write_paragraph("random 5: " +
                                        score_line(random5_scores))

        num_pos = len(lfilter(lambda x: x[1] > 0, passages))
        num_pos_sum += num_pos
        if num_pos > 0:
            num_pos_exists += 1

        def print_doc(doc, html_visualizer, score):
            cells = lmap(lambda x: get_cell_from_token(x, get_log_odd(x)), doc)
            html_visualizer.write_headline("score={}".format(score))
            html_visualizer.multirow_print(cells, width=20)

        random_passages.extend(left(passages))
        if threshold < 0:
            continue
        for doc, score in passages:
            if score < 0:
                break
            print_doc(doc, html_visualizer, score)

        html_visualizer.write_headline("Bottom 5")
        for doc, score in passages[-5:]:
            print_doc(doc, html_visualizer, score)

    print("{} claims. {} docs on {} claims".format(len(claims), num_pos_sum,
                                                   num_pos_exists))