예제 #1
0
def run_experiments(
    relations,
    data_path_pre,
    data_path_post,
    input_param={
        "lm":
        "bert",
        "label":
        "bert_large",
        "models_names": ["bert"],
        "bert_model_name":
        "bert-large-cased",
        "bert_model_dir":
        "pre-trained_language_models/bert/cased_L-24_H-1024_A-16",
    },
    use_negated_probes=False,
):
    model = None
    pp = pprint.PrettyPrinter(width=41, compact=True)

    all_Precision1 = []
    type_Precision1 = defaultdict(list)
    type_count = defaultdict(list)

    results_file = open("last_results.csv", "w+")

    for relation in relations:
        pp.pprint(relation)
        PARAMETERS = {
            "dataset_filename":
            "{}{}{}".format(data_path_pre, relation["relation"],
                            data_path_post),
            "common_vocab_filename":
            "pre-trained_language_models/common_vocab_cased.txt",
            "template":
            "",
            "bert_vocab_name":
            "vocab.txt",
            "batch_size":
            32,
            "logdir":
            "output",
            "full_logdir":
            "output/results/{}/{}".format(input_param["label"],
                                          relation["relation"]),
            "lowercase":
            False,
            "max_sentence_length":
            100,
            "threads":
            -1,
            "interactive":
            False,
            "use_negated_probes":
            use_negated_probes,
        }

        if "template" in relation:
            PARAMETERS["template"] = relation["template"]
            if use_negated_probes:
                PARAMETERS["template_negated"] = relation["template_negated"]

        PARAMETERS.update(input_param)
        print(PARAMETERS)

        args = argparse.Namespace(**PARAMETERS)

        # see if file exists
        try:
            data = load_file(args.dataset_filename)
        except Exception as e:
            print("Relation {} excluded.".format(relation["relation"]))
            print("Exception: {}".format(e))
            continue

        if model is None:
            [model_type_name] = args.models_names
            model = build_model_by_name(model_type_name, args)

        Precision1 = run_evaluation(args, shuffle_data=False, model=model)
        print("P@1 : {}".format(Precision1), flush=True)
        all_Precision1.append(Precision1)

        results_file.write("{},{}\n".format(relation["relation"],
                                            round(Precision1 * 100, 2)))
        results_file.flush()

        if "type" in relation:
            type_Precision1[relation["type"]].append(Precision1)
            data = load_file(PARAMETERS["dataset_filename"])
            type_count[relation["type"]].append(len(data))

    mean_p1 = statistics.mean(all_Precision1)
    print("@@@ {} - mean P@1: {}".format(input_param["label"], mean_p1))
    results_file.close()

    for t, l in type_Precision1.items():

        print(
            "@@@ ",
            input_param["label"],
            t,
            statistics.mean(l),
            sum(type_count[t]),
            len(type_count[t]),
            flush=True,
        )

    return mean_p1, all_Precision1
예제 #2
0
def run_experiments(
    relations,
    data_path_pre,
    data_path_post,
    input_param={
        "lm":
        "bert",
        "label":
        "bert_large",
        "models_names": ["bert"],
        "bert_model_name":
        "bert-large-cased",
        "bert_model_dir":
        "pre-trained_language_models/bert/cased_L-24_H-1024_A-16",
    },
    use_negated_probes=False,
):
    model = None
    pp = pprint.PrettyPrinter(width=41, compact=True)

    all_Precision1 = []
    type_Precision1 = defaultdict(list)
    type_count = defaultdict(list)

    results_file = open("last_results.csv", "w+")
    uid_list_all, mask_feature_list_all, answers_list_all = [], [], []
    all_correct_uuids = []
    total_modified_correct, total_unmodified_correct = 0, 0
    total_modified_num, total_unmodified_num = 0, 0
    for relation in relations:
        # if "type" not in relation or relation["type"] != "1-1":
        #     continue

        pp.pprint(relation)
        PARAMETERS = {
            "dataset_filename":
            "{}{}{}".format(data_path_pre, relation["relation"],
                            data_path_post),
            "common_vocab_filename":
            'pre-trained_language_models/bert/cased_L-12_H-768_A-12/vocab.txt',  #"pre-trained_language_models/common_vocab_cased.txt",
            "template":
            "",
            "bert_vocab_name":
            "vocab.txt",
            "batch_size":
            32,
            "logdir":
            "output",
            "full_logdir":
            "output/results/{}/{}".format(input_param["label"],
                                          relation["relation"]),
            "lowercase":
            False,
            "max_sentence_length":
            512,  # change to 512 later
            "threads":
            2,
            "interactive":
            False,
            "use_negated_probes":
            use_negated_probes,
            "return_features":
            False,
            "uuid_list": []
        }

        if "template" in relation:
            PARAMETERS["template"] = relation["template"]
            if use_negated_probes:
                PARAMETERS["template_negated"] = relation["template_negated"]

        PARAMETERS.update(input_param)
        print(PARAMETERS)

        args = argparse.Namespace(**PARAMETERS)

        # see if file exists
        try:
            data = load_file(args.dataset_filename)
        except Exception as e:
            print("Relation {} excluded.".format(relation["relation"]))
            print("Exception: {}".format(e))
            continue

        if model is None:
            [model_type_name] = args.models_names
            model = build_model_by_name(model_type_name, args)

        if getattr(args, 'output_feature_path', ''):
            # Get the features for kNN-LM. Ignore this part if only obtaining the correct-predicted questions.
            Precision1, total_unmodified, Precision1_modified, total_modified, uid_list, mask_feature_list, answers_list = run_evaluation(
                args, shuffle_data=False, model=model)
            if len(uid_list) > 0:
                uid_list_all.extend(uid_list)
                mask_feature_tensor = torch.cat(mask_feature_list, dim=0)
                mask_feature_list_all.append(mask_feature_tensor)
                answers_list_all.extend(answers_list)

        else:
            Precision1, total_unmodified, Precision1_modified, total_modified, correct_uuids = run_evaluation(
                args, shuffle_data=False, model=model)
            all_correct_uuids.extend(correct_uuids)

        total_modified_correct += Precision1_modified
        total_unmodified_correct += Precision1
        total_modified_num += total_modified
        total_unmodified_num += total_unmodified
        print("P@1 : {}".format(Precision1), flush=True)
        all_Precision1.append(Precision1)

        results_file.write("{},{}\n".format(relation["relation"],
                                            round(Precision1 * 100, 2)))
        results_file.flush()

        if "type" in relation:
            type_Precision1[relation["type"]].append(Precision1)
            data = load_file(PARAMETERS["dataset_filename"])
            type_count[relation["type"]].append(len(data))

    mean_p1 = statistics.mean(all_Precision1)
    print("@@@ {} - mean P@1: {}".format(input_param["label"], mean_p1))
    print("Unmodified acc: {}, modified acc: {}".format(
        total_unmodified_correct / float(total_unmodified_num),
        0 if total_modified_num == 0 else total_modified_correct /
        float(total_modified_num)))
    results_file.close()

    for t, l in type_Precision1.items():

        print(
            "@@@ ",
            input_param["label"],
            t,
            statistics.mean(l),
            sum(type_count[t]),
            len(type_count[t]),
            flush=True,
        )
    if len(uid_list_all) > 0:
        out_dict = {
            'mask_features': torch.cat(mask_feature_list_all, dim=0),
            'uuids': uid_list_all,
            'obj_labels': answers_list_all
        }
        torch.save(out_dict, 'datastore/ds_change32.pt')
    if len(all_correct_uuids) > 0:
        if not os.path.exists('modification'):
            os.makedirs('modification')
        json.dump(all_correct_uuids,
                  open('modification/correct_uuids.json', 'w'))
    return mean_p1, all_Precision1
예제 #3
0
def get_TREx_parameters(data_path_pre="data/"):
    relations = load_file("{}relations.jsonl".format(data_path_pre))
    data_path_pre += "TREx/"
    data_path_post = ".jsonl"
    return relations, data_path_pre, data_path_post
예제 #4
0
def get_TREx_parameters(data_path_pre="data/"):
    relations = load_file(os.path.join(data_path_pre, 'relations.jsonl'))
    data_path_pre = os.path.join(data_path_pre, "TREx/")
    data_path_post = ".jsonl"
    return relations, data_path_pre, data_path_post