config_file = 'default.ini' config = Configurable(config_file) # model model = CompanyPredict() # load data train_data = read_pkl(config.train_pkl) dev_data = None if config.para_dev_file: dev_data = read_pkl(config.dev_pkl) test_data = read_pkl(config.test_pkl) word_list = read_pkl(config.load_feature_voc) p_label_list, s_label_list = read_pkl(config.load_label_voc) word_voc = VocabSrc(word_list) p_label_voc = VocabTgt(p_label_list) s_label_voc = VocabTgt(s_label_list) embedding = None if os.path.isfile(config.embedding_pkl): embedding = read_pkl(config.embedding_pkl) # 开始 corrects, size = 0, 0 start_time = time.time() # 初始化宏平均 macro_averaging = {} for i in p_label_voc.i2w: macro_averaging[i] = {'tp': 0, 'fn': 0, 'fp': 0} for idx, data in enumerate(test_data):
analysis(dev_data, dev_sentence_len) print('\n') test_data, test_sentence_len = read_doc(config.para_test_file, config.sen_test_file, config.max_length) analysis(test_data, test_sentence_len) if not os.path.isdir(config.save_dir): os.mkdir(config.save_dir) pickle.dump(train_data, open(config.train_pkl, 'wb')) if config.para_dev_file: pickle.dump(dev_data, open(config.dev_pkl, 'wb')) pickle.dump(test_data, open(config.test_pkl, 'wb')) # vocab feature_list = [k for k, v in word_dict.most_common(config.vocab_size)] para_label_list = [k for k in para_label_dict.keys()] s_label_list = [k for k in s_label_dict.keys()] pickle.dump(feature_list, open(config.save_feature_voc, 'wb')) pickle.dump((para_label_list, s_label_list), open(config.save_label_voc, 'wb')) feature_voc = VocabSrc(feature_list) p_label_voc = VocabTgt(para_label_dict) s_label_voc = VocabTgt(s_label_dict) # embedding if config.embedding_file: embedding = feature_voc.create_vocab_embs(config.embedding_file) pickle.dump(embedding, open(config.embedding_pkl, 'wb'))
parse.add_argument('--use_cuda', action='store_true', default=False) parse.add_argument('--model', type=str, default='model.742') args, extra_args = parse.parse_known_args() config = Configurable(args.config_file, extra_args) torch.set_num_threads(args.thread) config.use_cuda = False if gpu and args.use_cuda: config.use_cuda = True print("\nGPU using status: ", config.use_cuda) # load vocab and model feature_list = read_pkl(config.load_feature_voc) label_list = read_pkl(config.load_label_voc) feature_vec = VocabSrc(feature_list) label_vec = VocabTgt(label_list) # model if config.which_model == 'Vanilla': model = Vanilla(config, feature_vec.size, config.embed_dim, PAD, label_vec.size) elif config.which_model == 'Contextualized': model = Contextualized(config, feature_vec.size, config.embed_dim, PAD, label_vec.size) elif config.which_model == 'ContextualizedGates': model = ContextualizedGates(config, feature_vec.size, config.embed_dim, PAD, label_vec.size) else: print('please choose right model') exit() model_path = os.path.join(config.load_model_path, args.model)
if gpu and args.use_cuda: config.use_cuda = True print("\nGPU using status: ", config.use_cuda) # load data train_data = read_pkl(config.train_pkl) dev_data = None if config.dev_file: dev_data = read_pkl(config.dev_pkl) test_data = read_pkl(config.test_pkl) feature_list = read_pkl(config.feature_voc) feature_voc = VocabSrc(feature_list) label_list = read_pkl(config.label_voc) label_voc = VocabTgt(label_list) embedding = None if os.path.isfile(config.embedding_pkl): embedding = read_pkl(config.embedding_pkl) # model model = LSTM(config, feature_voc.size, embedding[1] if embedding else config.embed_dim, PAD, label_voc.size, embedding[0] if embedding else None) if config.use_cuda: model = model.cuda() # train