Beispiel #1
0
def show_simple(run_name, data_id, tex_visulizer):
    num_tags = 3
    num_select = 20
    pickle_name = "save_view_{}_{}".format(run_name, data_id)
    tokenizer = get_tokenizer()

    data_loader = get_modified_data_loader2(HPSENLI3(), NLIExTrainConfig())

    explain_entries = load_from_pickle(pickle_name)
    explain_entries = explain_entries

    selected_instances = [[], [], []]
    for idx, entry in enumerate(explain_entries):
        x0, logits, scores = entry

        pred = np.argmax(logits)
        input_ids = x0
        p, h = data_loader.split_p_h_with_input_ids(input_ids, input_ids)
        p_tokens = tokenizer.convert_ids_to_tokens(p)
        h_tokens = tokenizer.convert_ids_to_tokens(h)

        p_tokens = restore_capital_letter(p_tokens)
        h_tokens = restore_capital_letter(h_tokens)

        target_tag = ["match", "mismatch", "conflict"][pred]

        tag_idx = data_generator.NLI.nli_info.tags.index(target_tag)
        tag_name = data_generator.NLI.nli_info.tags[tag_idx]
        p_score, h_score = data_loader.split_p_h_with_input_ids(scores[tag_idx], input_ids)
        normalize_fn = normalize
        p_score = normalize_fn(p_score)
        h_score = normalize_fn(h_score)
        p_row = [Cell("\\textbf{P:}")] + cells_from_tokens(p_tokens, p_score)
        h_row = [Cell("\\textbf{H:}")] + cells_from_tokens(h_tokens, h_score)

        pred_str = ["entailment", "neutral" , "contradiction"][pred]
        apply_color(p_row, tag_name)
        apply_color(h_row, tag_name)
        #tex_visulizer.write_paragraph(pred_str)
        if len(selected_instances[pred]) < num_select :
            e = pred_str, [p_row, h_row]
            #tex_visulizer.write_instance(pred_str, gold_label, [p_row, h_row])
            selected_instances[pred].append(e)

        if all([len(s) == num_select for s in selected_instances]):
            break

    for insts in selected_instances:
        for e in insts:
            pred_str, rows = e
            tex_visulizer.write_instance(pred_str, rows)

    return selected_instances
Beispiel #2
0
def run(args):
    hp = hyperparams.HPSENLI3()
    nli_setting = BertNLI()
    data_loader = get_modified_data_loader2(hp, nli_setting)

    predict_nli_ex(
        hp,
        nli_setting,
        data_loader,
        args.tag,
        args.data_id,
        args.model_path,
        args.run_name,
        args.modeling_option,
    )
Beispiel #3
0
def run(args):
    hp = hyperparams.HPSENLI3()
    nli_setting = BertNLI()
    data_loader = get_modified_data_loader2(hp, nli_setting)

    if args.method_name in [
            'deletion_seq', "random", 'idf', 'deletion', 'LIME'
    ]:
        predictor = nli_baseline_predict
    elif args.method_name in [
            "elrp",
            "deeplift",
            "saliency",
            "grad*input",
            "intgrad",
    ]:
        predictor = nli_attribution_predict
    else:
        raise Exception(
            "method_name={} is not in the known method list.".format(
                args.method_name))

    predictor(hp, nli_setting, data_loader, args.tag, args.method_name,
              args.data_id, args.sub_range, args.model_path)
Beispiel #4
0
def show_all(run_name, data_id):
    num_tags = 3
    num_select = 1
    pickle_name = "save_view_{}_{}".format(run_name, data_id)
    tokenizer = get_tokenizer()

    data_loader = get_modified_data_loader2(HPSENLI3(), NLIExTrainConfig())

    explain_entries = load_from_pickle(pickle_name)
    explain_entries = explain_entries

    visualizer = HtmlVisualizer(pickle_name + ".html")
    tex_visulizer = TexTableNLIVisualizer(pickle_name + ".tex")
    tex_visulizer.begin_table()
    selected_instances = [[], [], []]
    for idx, entry in enumerate(explain_entries):
        x0, logits, scores = entry

        pred = np.argmax(logits)
        input_ids = x0
        p, h = data_loader.split_p_h_with_input_ids(input_ids, input_ids)
        p_tokens = tokenizer.convert_ids_to_tokens(p)
        h_tokens = tokenizer.convert_ids_to_tokens(h)

        p_rows = []
        h_rows = []
        p_rows.append(cells_from_tokens(p_tokens))
        h_rows.append(cells_from_tokens(h_tokens))

        p_score_list = []
        h_score_list = []
        for j in range(num_tags):
            tag_name = data_generator.NLI.nli_info.tags[j]
            p_score, h_score = data_loader.split_p_h_with_input_ids(scores[j], input_ids)
            normalize_fn = normalize

            add = True
            if pred == "0":
                add = tag_name == "match"
            if pred == "1":
                add = tag_name == "mismatch"
            if pred == "2":
                add = tag_name == "conflict"

            def format_scores(raw_scores):
                def format_float(s):
                    return "{0:.2f}".format(s)

                norm_scores = normalize_fn(raw_scores)

                cells = [Cell(format_float(s1), s2, False, False) for s1, s2 in zip(raw_scores, norm_scores)]
                if tag_name == "mismatch":
                    set_cells_color(cells, "G")
                elif tag_name == "conflict":
                    set_cells_color(cells, "R")
                return cells

            if add:
                p_rows.append(format_scores(p_score))
                h_rows.append(format_scores(h_score))

            p_score_list.append(p_score)
            h_score_list.append(h_score)

        pred_str = ["Entailment", "Neutral" , "Contradiction"][pred]

        out_entry = pred_str, p_tokens, h_tokens, p_score_list, h_score_list

        if len(selected_instances[pred]) < num_select :
            selected_instances[pred].append(out_entry)
            visualizer.write_headline(pred_str)
            visualizer.multirow_print_from_cells_list(p_rows)
            visualizer.multirow_print_from_cells_list(h_rows)
            visualizer.write_instance(pred_str, p_rows, h_rows)

            tex_visulizer.write_paragraph(str(pred))
            tex_visulizer.multirow_print_from_cells_list(p_rows, width=13)
            tex_visulizer.multirow_print_from_cells_list(h_rows, width=13)

        if all([len(s) == num_select for s in selected_instances]):
            break

    tex_visulizer.close_table()
    return selected_instances