コード例 #1
0
def evaluate(mode):

    for layer in range(1, 7):
        label2int = read.read_from_json(
            os.path.join(output_dir,
                         str(layer) + "/label2int"))
        if mode == "dev":
            dev_pred = read.read_from_tsv(
                os.path.join(bert_output,
                             str(layer) + "/eval_results.tsv"))
            dev_pred = np.asarray(dev_pred, np.float32)
            labels = read.read_from_json(
                os.path.join(output_dir,
                             str(layer) + "/label_" + mode))
            multihots = []
            for label_q in labels:
                multihots.append(multihot(label_q, label2int))
            dev_true = np.asarray(multihots)
            map, pre_1 = mean_ap(dev_true, dev_pred)
            print("layer " + str(layer), map, pre_1)
        elif mode == "test":
            dev_pred = read.read_from_tsv(
                os.path.join(bert_output,
                             str(layer) + "/test_results.tsv"))
            dev_pred = np.asarray(dev_pred, np.float32)
            labels = read.read_from_json(
                os.path.join(output_dir,
                             str(layer) + "/label_" + mode))
            multihots = []
            for label_q in labels:
                multihots.append(multihot(label_q, label2int))
            dev_true = np.asarray(multihots)
            map, pre_1 = mean_ap(dev_true, dev_pred)
            print("layer " + str(layer), map, pre_1)
コード例 #2
0
def generate_dist(mode):
    for layer in range(1, 7):
        label2int = read.read_from_json(
            os.path.join(output_dir,
                         str(layer) + "/label2int"))

        if mode == "dev":
            question_id = read.read_from_json(
                os.path.join(output_dir, "question_id_" + mode))
            dev_pred = read.read_from_tsv(
                os.path.join(bert_output,
                             str(layer) + "/eval_results.tsv"))
            dev_pred = np.asarray(dev_pred, np.float32)
            generate_dist_file(
                question_id, dev_pred, label2int,
                os.path.join(
                    bert_output_distribution,
                    "dev/BERT-Base-DEV.L" + str(layer) + ".classdist.txt"))

        elif mode == "test":
            question_id = read.read_from_json(
                os.path.join(output_dir, "question_id_" + mode))
            dev_pred = read.read_from_tsv(
                os.path.join(bert_output,
                             str(layer) + "/test_results.tsv"))
            dev_pred = np.asarray(dev_pred, np.float32)
            generate_dist_file(
                question_id, dev_pred, label2int,
                os.path.join(
                    bert_output_distribution,
                    "test/BERT-Base-TEST.L" + str(layer) + ".classdist.txt"))
コード例 #3
0
def data2bert():
    for layer in range(1, 3):
        label2int = read.read_from_json(
            os.path.join(bert_input,
                         str(layer) + "/label2int"))
        for mode in modes:
            labels = read.read_from_json(
                os.path.join(processed_dir,
                             str(layer) + "/" + "label_" + mode))
            question_only_texts = read.read_from_json(
                os.path.join(processed_dir, "question_" + mode))
            generate_tsv(
                question_only_texts, labels, label2int,
                os.path.join(
                    bert_input,
                    str(layer) + "/trec_" + mode + "_" + str(layer) + ".tsv"),
                mode)
コード例 #4
0
def split_label(level, mode):
    labels = read.read_from_json(os.path.join(processed_dir, "label_" + mode))
    labels_level = []
    for label in labels:
        label_level = get_label_at_level(label, level)
        labels_level.append(label_level)
    read.save_in_json(
        os.path.join(processed_dir,
                     str(level) + "/" + "label_" + mode), labels_level)
コード例 #5
0
def label_dict():
    for layer in range(1, 3):
        label_counts = defaultdict(float)
        for mode in modes:
            split_label(layer, mode)
            labels = read.read_from_json(
                os.path.join(processed_dir,
                             str(layer) + "/" + "label_" + mode))
            for label in labels:
                label = label.strip()
                label_counts[label] += 1.0
        label2int = {j: i for i, j in enumerate(label_counts)}
        read.save_in_json(
            os.path.join(processed_dir,
                         str(layer) + "/label_counts"), label_counts)
        read.save_in_json(os.path.join(bert_input,
                                       str(layer) + "/label2int"), label2int)
コード例 #6
0
def generate_cv():
    data = read_questions(data_path)
    label2int = read.read_from_json(os.path.join(bert_input,"label2int"))

    questions, label = data
    data_set= list(zip(questions,label))
    question_idx = list(range(0, len(questions)))
    for fold, [train_idx, validation_idx,test_idx] in enumerate(k_fold_cross_validation(question_idx, 10, randomize=True)):
        train = [data_set[question_id] for question_id in train_idx]
        validation = [data_set[question_id] for question_id in validation_idx]
        test = [data_set[question_id] for question_id in test_idx]
        data = [train, validation, test]
        for idx, mode in enumerate(modes):
            input = [dataitem[0] for dataitem in data[idx]]
            labels = [dataitem[1] for dataitem in data[idx]]
            idx_label = [[label2int[label_single]  for label_single in label] for label in labels ]
            read.save_in_json(os.path.join(processed_dir,"index_label_" + mode +"_"+ str(fold)), idx_label)
            read.save_in_json(os.path.join(processed_dir,"question_" + mode +"_"+ str(fold)), data[idx])
            generate_tsv(input, label, label2int,os.path.join(bert_input,"lat_" + mode +"_"+ str(fold) + ".tsv"),mode)