tag_to_ix=tag_to_id, embedding_dim=parameters['word_dim'], hidden_dim=parameters['word_lstm_dim'], use_gpu=use_gpu, char_to_ix=char_to_id, pre_word_embeds=word_embeds, use_crf=parameters['crf'], char_mode=parameters['char_mode'], char_embedding_dim=parameters['char_dim'], char_lstm_dim=parameters['char_lstm_dim'], alpha=parameters['alpha']) # n_cap=4, # cap_embedding_dim=10) if use_gpu: model.cuda() learning_rate = 0.015 optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9) losses = [] best_dev_F = -1.0 best_test_F = -1.0 best_train_F = -1.0 all_F = [[0, 0, 0]] plot_every = 10 eval_every = 20 sample_count = 0 best_idx = 0 if parameters['reload']:
def main(): parser = argparse.ArgumentParser() # parameters parser.add_argument("--epoch", default=100, type=int, help="the number of epoches needed to train") parser.add_argument("--lr", default=1e-3, type=float, help="the learning rate") parser.add_argument("--train_data_path", default='data/train.tsv', type=str, help="train dataset path") parser.add_argument("--dev_data_path", default=None, type=str, help="dev dataset path") parser.add_argument("--test_data_path", default='data/test.tsv', type=str, help="test dataset path") parser.add_argument("--train_batch_size", default=128, type=int, help="the batch size") parser.add_argument("--dev_batch_size", default=64, type=int, help="the batch size") parser.add_argument("--test_batch_size", default=64, type=int, help="the batch size") parser.add_argument("--embedding_path", default='data/sgns.renmin.bigram-char', type=str, help="pre-trained word embeddings path") parser.add_argument("--embedding_size", default=300, type=int, help="the word embedding size") parser.add_argument("--hidden_size", default=512, type=int, help="the hidden size") parser.add_argument("--fine_tuning", default=True, type=bool, help="whether fine-tune word embeddings") parser.add_argument("--early_stopping", default=15, type=int, help="Tolerance for early stopping (# of epochs).") parser.add_argument("--load_model", default='results/20_Model_best.pt', help="load pretrained model for testing") args = parser.parse_args() if not args.train_data_path: logger.info("please input train dataset path") exit() if not (args.dev_data_path or args.test_data_path): logger.info("please input dev or test dataset path") exit() TEXT, LABEL, vocab_size, word_embeddings, train_iter, dev_iter, test_iter, tag_dict = \ dataset.load_dataset(args.train_data_path, args.dev_data_path, \ args.test_data_path, args.embedding_path, args.train_batch_size, \ args.dev_batch_size, args.test_batch_size) idx_tag = {} for tag in tag_dict: idx_tag[tag_dict[tag]] = tag model = BiLSTM_CRF(args.embedding_size, args.hidden_size, vocab_size, tag_dict, word_embeddings) if torch.cuda.is_available(): model = model.cuda() # cost_test = [] # start = time.perf_counter() # train_dev_size = len(train_iter) # train_size = int(train_dev_size*0.9) train_data, dev_data = dataset.train_dev_split(train_iter, 0.9) # for batch in train_data: # print(batch) # exit() # train_data = lambda: islice(train_iter,0,train_size) # dev_data = lambda: islice(train_iter,train_size,train_dev_size) # train_data = islice(train_iter,0,train_size) # dev_data = islice(train_iter,train_size,train_dev_size) if args.load_model: model.load_state_dict(torch.load(args.load_model, map_location='cpu')) # p, r, f1, eval_loss, all_assess = eval_model(model, dev_data, idx_tag) # logger.info('Eval Loss:%.4f, Eval P:%.4f, Eval R:%.4f, Eval F1:%.4f', \ # eval_loss, p, r, f1) p, r, f1, eval_loss, all_assess = eval_model(model, test_iter, idx_tag) logger.info('LOC Test P:%.4f, Test R:%.4f, Test F1:%.4f', \ all_assess['LOC']['P'], all_assess['LOC']['R'], all_assess['LOC']['F']) logger.info('PER Test P:%.4f, Test R:%.4f, Test F1:%.4f', \ all_assess['PER']['P'], all_assess['PER']['R'], all_assess['PER']['F']) logger.info('ORG Test P:%.4f, Test R:%.4f, Test F1:%.4f', \ all_assess['ORG']['P'], all_assess['ORG']['R'], all_assess['ORG']['F']) logger.info('Micro_AVG Test P:%.4f, Test R:%.4f, Test F1:%.4f', \ p, r, f1) return best_score = 0.0 for epoch in range(args.epoch): # train_data_ = copy.deepcopy(train_data) # dev_data_ = copy.deepcopy(dev_data) # train_model(model, train_data_, dev_data_, epoch, args.lr, idx_tag) train_loss, p, r, f1, eval_loss = train_model(model, train_data, dev_data, epoch, args.lr, idx_tag) logger.info('Epoch:%d, Training Loss:%.4f', epoch, train_loss) logger.info('Epoch:%d, Eval Loss:%.4f, Eval P:%.4f, Eval R:%.4f, Eval F1:%.4f', \ epoch, eval_loss, p, r, f1) # p, r, f1, eval_loss, all_assess = eval_model(model, test_iter, idx_tag) # logger.info('Test Loss:%.4f, Test P:%.4f, Test R:%.4f, Test F1:%.4f', \ # eval_loss, p, r, f1) if f1 > best_score: best_score = f1 torch.save( model.state_dict(), 'results/%d_%s_%s.pt' % (epoch, 'Model', str(best_score)))