예제 #1
0
파일: create_fig.py 프로젝트: lrgr/sigma
def across_all_dir(path, files):
    scores = np.zeros((len(files), 24))
    for i, f in enumerate(files):
        sample_dict = load_json(join(path, f))
        for j, c in enumerate(sample_dict['chromosomesToResults'].keys()):
            scores[i][j] = sample_dict['chromosomesToResults'][c]['score']
    return scores
예제 #2
0
 def __init__(self):
     '''
     提前准备好计算句向量必备的文件:word2id,id2embed,id2weight
     '''
     self.word_id = load_json(config.word_id_path)
     self.id_emb = load_pickle(config.id_emb_path)
     self.id_weight = load_pickle(config.id_weight_path)
     self.tokenizer = clean_seg
예제 #3
0
    def abduce_batch(self, json_file_path, ahs, moneys, attrs, months, max_change_num):
        abduced_attrs = []
        abduced_months = []
        judgement_jsons = []
        
        context_dict = self.build_dict(load_json(json_file_path))
        for ah, money, attr, month in zip(ahs, moneys, attrs, months):
            data = context_dict.get(ah, None)
            abduced_attr, abduced_month, judgement_json = self.abduce(money, attr, month, data, max_change_num)
            if abduced_attr is None:
                continue

            abduced_attrs.append(abduced_attr)
            abduced_months.append(abduced_month)
            judgement_jsons.append(judgement_json)

        return abduced_attrs, abduced_months, judgement_jsons
예제 #4
0
    recorder.write_pair("init_baseline", baseline)
    recorder.write_pair("init_rate", rate)

    tmp_json_path = "tmp/abl_predict_%d.json" % (0)
    filenames_new, ahs_new, money_new, labels_new, attrs_new = \
                                    get_bert_generate_label(perception, test_filename, test_money_filename, tmp_json_path, tags_list)

    best_mae, best_mse, _, _ = sentence.test(money_new, attrs_new, labels_new,
                                             filenames_new, ahs_new)
    ret_raw, ret_str = get_score("data/" + test_filename, tmp_json_path,
                                 "data/tags_for_test.txt")
    recorder.write_pair("init_MAE", best_mae)
    recorder.write_pair("init_MSE", best_mse)
    recorder.write_pair("init_f1_score", ret_str)

    pretrain_bert_train_data = load_json(perception.data_dir + "/" +
                                         pretrain_filename)

    model_idx = 0
    for t in range(abl_times):
        print("abduction times %d" % t, "model idx %d" % (model_idx))

        tmp_json_path = "tmp/abl_train_%d.json" % (t)
        filenames_new, ahs_new, money_new, labels_new, attrs_new = \
                                            get_bert_generate_label(perception, abl_train_filename, abl_train_money_filename, tmp_json_path, tags_list)

        t_b = time.time()
        abductor.set_predict_model(sentence)
        attrs_abduced, labels_abduced, judgement_jsons = abductor.abduce_batch(
            tmp_json_path, ahs_new, money_new, attrs_new, labels_new,
            abl_max_change_num)
        t_e = time.time()