def train_and_val(): embedding_dim = 100 hidden_dim = 100 model_load_path = None best_model_save_path = 'model/model_100_best_0223.pth' max_score = 0 stop_epoch = 30 unimprove_time = 0 val_json_path = '/home/agwave/Data/resume/val_0222.json' val_pdf_dir = '/home/agwave/Data/resume/val_0222/' training_data = get_data_from_data_txt(TRAIN_WORD_TO_TAG_PATH) with open('supporting_document/train_word_to_tag_0223.json', 'r') as j: word_to_ix = json.load(j) tag_to_ix = {'b-name': 0, 'i-name': 1, 'b-bir': 2, 'i-bir': 3, 'b-gend': 4, 'i-gend': 5, 'b-tel': 6, 'i-tel': 7, 'b-acad': 8, 'i-acad': 9, 'b-nati': 10, 'i-nati': 11, 'b-live': 12, 'i-live': 13, 'b-poli': 14, 'i-poli': 15, 'b-unv': 16, 'i-unv': 17, 'b-comp': 18, 'i-comp': 19, 'b-work': 20, 'i-work': 21, 'b-post': 22, 'i-post': 23, 'b-proj': 24, 'i-proj': 25, 'b-resp': 26, 'i-resp': 27, 'b-degr': 28, 'i-degr': 29, 'b-grti': 30, 'i-grti': 31, 'b-woti': 32, 'i-woti': 33, 'b-prti': 34, 'i-prti': 35, 'o': 36, '<start>': 37, '<stop>': 38} model = BiLSTM_CRF(len(word_to_ix), tag_to_ix, embedding_dim, hidden_dim) optimizer = optim.Adam(model.parameters(), lr=0.01) start_epoch = 0 if model_load_path != None: print('load model...') checkpoint = torch.load(model_load_path) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) start_epoch = checkpoint['epoch'] + 1 preliminary_score = get_score_by_model(model, val_json_path, val_pdf_dir) print('preliminary score:', preliminary_score) for epoch in range(start_epoch, stop_epoch): print("---------------------") print("running epoch : ", epoch) start_time = time.time() for sentence, tags in tqdm(training_data): model.zero_grad() sentence_in = prepare_sequence(sentence, word_to_ix) targets = torch.tensor([tag_to_ix[t] for t in tags], dtype=torch.long) loss = model.neg_log_likelihood(sentence_in, targets) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 1) optimizer.step() cur_epoch_score = get_score_by_model(model, val_json_path, val_pdf_dir) print('score', cur_epoch_score) print('running time:', time.time() - start_time) if cur_epoch_score > max_score: unimprove_time = 0 max_score = cur_epoch_score torch.save({ 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'epoch': epoch }, best_model_save_path) print('save best model successfully.') else: break
def train_all_data(): embedding_dim = 100 hidden_dim = 100 stop_epoch = 1 model_1_epoch = 'model/model_1_epoch_lr0001.pth' training_data = get_data_from_data_txt(DATA_PERFECT_PATH) word_to_ix = get_word_to_ix(training_data, min_word_freq=1) tag_to_ix = {'b-name': 0, 'i-name': 1, 'b-bir': 2, 'i-bir': 3, 'b-gend': 4, 'i-gend': 5, 'b-tel': 6, 'i-tel': 7, 'b-acad': 8, 'i-acad': 9, 'b-nati': 10, 'i-nati': 11, 'b-live': 12, 'i-live': 13, 'b-poli': 14, 'i-poli': 15, 'b-unv': 16, 'i-unv': 17, 'b-comp': 18, 'i-comp': 19, 'b-work': 20, 'i-work': 21, 'b-post': 22, 'i-post': 23, 'b-proj': 24, 'i-proj': 25, 'b-resp': 26, 'i-resp': 27, 'b-degr': 28, 'i-degr': 29, 'b-grti': 30, 'i-grti': 31, 'b-woti': 32, 'i-woti': 33, 'b-prti': 34, 'i-prti': 35, 'o': 36, '<start>': 37, '<stop>': 38, 'c-live': 39, 'c-proj': 40, 'c-woti': 41, 'c-post': 42, 'c-unv': 43, 'c-nati': 44, 'c-poli': 45, 'c-prti':46, 'c-comp': 47} model = BiLSTM_CRF(len(word_to_ix), tag_to_ix, embedding_dim, hidden_dim) optimizer = optim.Adam(model.parameters(), lr=0.001) # Make sure prepare_sequence from earlier in the LSTM section is loaded for epoch in range( stop_epoch): # again, normally you would NOT do 300 epochs, it is toy data print("---------------------") print("running epon : ", epoch + 1) start_time = time.time() for sentence, tags in tqdm(training_data): model.zero_grad() sentence_in = prepare_sequence(sentence, word_to_ix) targets = torch.tensor([tag_to_ix[t] for t in tags], dtype=torch.long) loss = model.neg_log_likelihood(sentence_in, targets) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 15) optimizer.step() cur_epoch_score = get_score_by_model(model, TRAIN_JSON_PATH, TRAIN_PDF_DIR) print('score', cur_epoch_score) print('running time:', time.time() - start_time) print() if epoch == stop_epoch: torch.save({ 'model_state_dict': model.state_dict() }, model_1_epoch)
best_dev_F, new_dev_F, save = evaluating_batch(model, dev_batched, best_dev_F) if not disable_flag: if not early_stopping.early_stop: early_stopping(-new_dev_F, model, optimizer) else: print("Early stopping, now introduce adv examples") parameters['launch_epoch'] = epoch disable_flag = 1 sample_count = len(train_batched) else: if save: torch.save(model.state_dict(), model_name) best_idx = epoch best_test_F, new_test_F, _ = evaluating_batch(model, test_batched, best_test_F) all_F.append([0.0, new_dev_F, new_test_F]) sys.stdout.flush() print('Epoch %d : train/dev/test : %.2f / %.2f / %.2f - %d' % (epoch, new_train_F, new_dev_F, new_test_F, best_idx)) model.train(True) adjust_learning_rate(optimizer, lr=learning_rate / (1 + 0.05 * sample_count / len(train_data)))
optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) best_score = 0 for epoch in range(Config.epochs): model.train() total_loss = 0 for batch_sentence, batch_label, batch_length in train_dataloder: model.zero_grad() batch_sentence, batch_label, batch_length, _ = sort_batch_data( batch_sentence, batch_label, batch_length) if Config.use_gpu: batch_sentence = batch_sentence.cuda() batch_label = batch_label.cuda() loss = model.neg_log_likehood(batch_sentence, batch_label, batch_length) loss.backward() optimizer.step() total_loss += loss.cpu().item() epoch_score = eval(eval_dataset, model) if epoch_score > best_score: best_score = epoch_score torch.save(model.state_dict(), 'model_best.pth') print('loss:{0}, epoch_score:{1}, best_score:{2}'.format( total_loss / len(train_dataset), epoch_score, best_score))
print(epoch) for i in range(len(batch_data)): model.zero_grad() sen_batch = [] tag_batch = [] for data in batch_data[len(batch_data) - 1 - i]: sen_batch.append(get_idxseq(data[0], word_to_ix)) tag_batch.append([tag_to_ix[t] for t in data[1]]) loss = model.neg_log( sen_batch, tag_batch, torch.tensor(mask[len(batch_data) - 1 - i], dtype=torch.long).cuda()) loss.backward() print(loss) optimizer.step() torch.save(model.state_dict(), './params5.pkl') print(crf.transitions.data) ''''' model.load_state_dict(torch.load('./params3.pkl')) print(crf.transitions.data) with codecs.open('./newtrain.txt', encoding='UTF-8') as f: train = f.readlines() for i in range(20): if i % 2 != 0: sen = train[i].strip(line_end) sen = get_idxseq(sen, word_to_ix) print(model(torch.tensor(sen, dtype=torch.long).cuda())[0]) ''' '' with codecs.open("./test.txt", 'r', encoding='UTF-8') as f: test_data = f.readlines()
batch_first=True).to(device) labs = pad_sequence(labs, batch_first=True).to(device) lens = torch.tensor(lens).to(device) lens, idx = torch.sort(lens, descending=True) sents = sents[idx] labs = labs[idx] score, preds = model(sents, lens) for i, l in enumerate(lens): true_labels.append( seqid2text(labs[i, :l], ix_to_lab)) pred_labels.append( seqid2text(preds[i, :l], ix_to_lab)) f1 = f1_score(true_labels, pred_labels) if (f1 > best_f1): torch.save(model.state_dict(), "models/model-27-02-20") best_f1 = f1 print("Accuracy: {:.4f}".format( accuracy_score(true_labels, pred_labels))) print("F1 score: {:.4f}".format(f1)) print(classification_report(true_labels, pred_labels)) model.train(True) if args.do_test: with torch.no_grad(): print("Evaluation on test set") model.load_state_dict( torch.load("models/model-27-02-20", map_location=device)) model.eval() true_labels = []
def main(): parser = argparse.ArgumentParser() # parameters parser.add_argument("--epoch", default=100, type=int, help="the number of epoches needed to train") parser.add_argument("--lr", default=1e-3, type=float, help="the learning rate") parser.add_argument("--train_data_path", default='data/train.tsv', type=str, help="train dataset path") parser.add_argument("--dev_data_path", default=None, type=str, help="dev dataset path") parser.add_argument("--test_data_path", default='data/test.tsv', type=str, help="test dataset path") parser.add_argument("--train_batch_size", default=128, type=int, help="the batch size") parser.add_argument("--dev_batch_size", default=64, type=int, help="the batch size") parser.add_argument("--test_batch_size", default=64, type=int, help="the batch size") parser.add_argument("--embedding_path", default='data/sgns.renmin.bigram-char', type=str, help="pre-trained word embeddings path") parser.add_argument("--embedding_size", default=300, type=int, help="the word embedding size") parser.add_argument("--hidden_size", default=512, type=int, help="the hidden size") parser.add_argument("--fine_tuning", default=True, type=bool, help="whether fine-tune word embeddings") parser.add_argument("--early_stopping", default=15, type=int, help="Tolerance for early stopping (# of epochs).") parser.add_argument("--load_model", default='results/20_Model_best.pt', help="load pretrained model for testing") args = parser.parse_args() if not args.train_data_path: logger.info("please input train dataset path") exit() if not (args.dev_data_path or args.test_data_path): logger.info("please input dev or test dataset path") exit() TEXT, LABEL, vocab_size, word_embeddings, train_iter, dev_iter, test_iter, tag_dict = \ dataset.load_dataset(args.train_data_path, args.dev_data_path, \ args.test_data_path, args.embedding_path, args.train_batch_size, \ args.dev_batch_size, args.test_batch_size) idx_tag = {} for tag in tag_dict: idx_tag[tag_dict[tag]] = tag model = BiLSTM_CRF(args.embedding_size, args.hidden_size, vocab_size, tag_dict, word_embeddings) if torch.cuda.is_available(): model = model.cuda() # cost_test = [] # start = time.perf_counter() # train_dev_size = len(train_iter) # train_size = int(train_dev_size*0.9) train_data, dev_data = dataset.train_dev_split(train_iter, 0.9) # for batch in train_data: # print(batch) # exit() # train_data = lambda: islice(train_iter,0,train_size) # dev_data = lambda: islice(train_iter,train_size,train_dev_size) # train_data = islice(train_iter,0,train_size) # dev_data = islice(train_iter,train_size,train_dev_size) if args.load_model: model.load_state_dict(torch.load(args.load_model, map_location='cpu')) # p, r, f1, eval_loss, all_assess = eval_model(model, dev_data, idx_tag) # logger.info('Eval Loss:%.4f, Eval P:%.4f, Eval R:%.4f, Eval F1:%.4f', \ # eval_loss, p, r, f1) p, r, f1, eval_loss, all_assess = eval_model(model, test_iter, idx_tag) logger.info('LOC Test P:%.4f, Test R:%.4f, Test F1:%.4f', \ all_assess['LOC']['P'], all_assess['LOC']['R'], all_assess['LOC']['F']) logger.info('PER Test P:%.4f, Test R:%.4f, Test F1:%.4f', \ all_assess['PER']['P'], all_assess['PER']['R'], all_assess['PER']['F']) logger.info('ORG Test P:%.4f, Test R:%.4f, Test F1:%.4f', \ all_assess['ORG']['P'], all_assess['ORG']['R'], all_assess['ORG']['F']) logger.info('Micro_AVG Test P:%.4f, Test R:%.4f, Test F1:%.4f', \ p, r, f1) return best_score = 0.0 for epoch in range(args.epoch): # train_data_ = copy.deepcopy(train_data) # dev_data_ = copy.deepcopy(dev_data) # train_model(model, train_data_, dev_data_, epoch, args.lr, idx_tag) train_loss, p, r, f1, eval_loss = train_model(model, train_data, dev_data, epoch, args.lr, idx_tag) logger.info('Epoch:%d, Training Loss:%.4f', epoch, train_loss) logger.info('Epoch:%d, Eval Loss:%.4f, Eval P:%.4f, Eval R:%.4f, Eval F1:%.4f', \ epoch, eval_loss, p, r, f1) # p, r, f1, eval_loss, all_assess = eval_model(model, test_iter, idx_tag) # logger.info('Test Loss:%.4f, Test P:%.4f, Test R:%.4f, Test F1:%.4f', \ # eval_loss, p, r, f1) if f1 > best_score: best_score = f1 torch.save( model.state_dict(), 'results/%d_%s_%s.pt' % (epoch, 'Model', str(best_score)))
y_pre.append(y_hat) y_true.append(y.squeeze(1)).tolist() y_pre = [[idx2label[idx] for idx in y_pre_idx] for y_pre_idx in y_pre] y_true = [[idx2label[idx] for idx in y_true_idx] for y_true_idx in y_true] #print(y_pre[10],y_true[10]) # 评价指标 P = precision_score(y_true, y_pre) R = recall_score(y_true, y_pre) F1 = f1_score(y_true, y_pre) if F1 > best_f1: best_f1 = F1 torch.save(model.state_dict(), args.model_path) print('train_step %d,train_loss %.4f, P %.3f, R %.3f, F1 %.4f'%(steps,train_loss_sum%n,P,R,F1)) # print('test') # model = BiLSTM_CRF(args, label2idx, weight,device).to(device) # model.load_state_dict(torch.load('./model/best_f1.bin')) # model.eval() # m = 0 # y_pre,y_true=[],[] # for batch in valid_iter: # X, y = batch.TEXT, batch.LABEL # X, y = X.to(device).long(), y.to(device).long() # _, y_hat = model(X)
def my_train(): os.makedirs(f"model_result", exist_ok=True) torch.manual_seed(1) device = torch.device('cuda') data_dir = f"data/{DATASET}/processed" # 加载 train_data = NERDataset(os.path.join(data_dir, "train.pkl")) test_data = NERDataset(os.path.join(data_dir, "test.pkl")) dev_data = NERDataset(os.path.join(data_dir, "dev.pkl")) word_to_idx = load_obj(os.path.join(data_dir, "word_to_idx.pkl")) tag_to_idx = load_obj(os.path.join(data_dir, "tag_to_idx.pkl")) idx_to_tag = {n: m for m, n in tag_to_idx.items()} train_loader = DataLoader( train_data, batch_size=BATCH_SIZE, collate_fn=BatchPadding(), shuffle=True, num_workers=2, pin_memory=True, ) dev_loader = DataLoader( dev_data, batch_size=BATCH_SIZE, collate_fn=BatchPadding(), shuffle=True, num_workers=2, pin_memory=True, ) test_loader = DataLoader( test_data, batch_size=BATCH_SIZE, collate_fn=BatchPadding(), shuffle=True, num_workers=2, pin_memory=True, ) # 建模 model = BiLSTM_CRF(len(word_to_idx), len(tag_to_idx), EMBEDDING_DIM, HIDDEN_DIM, DROPOUT).to(device) print(model) optimizer = optim.Adam(model.parameters(), lr=LEARN_RATE) print("\n开始训练") f1_max = 0 cur_patience = 0 # 用于避免过拟合 for epoch in range(EPOCHS): model.train() for i, (seqs, tags, masks) in enumerate(train_loader, 1): optimizer.zero_grad() loss = model.loss(seqs.to(device), tags.to(device), masks.to(device)) loss.backward() optimizer.step() if i % LOG_INTERVAL == 0: print("epoch {}: {:.0f}%\t\tLoss: {:.6f}".format( epoch, 100.0 * i / len(train_loader), loss.item())) dev_precision, dev_recall, dev_f1 = evaluate(model, dev_loader, idx_to_tag) test_precision, test_recall, test_f1 = evaluate( model, test_loader, idx_to_tag) print( f"\ndev\tprecision: {dev_precision}, recall: {dev_recall}, f1: {dev_f1}" ) print( f"test\tprecision: {test_precision}, recall: {test_recall}, f1: {test_f1}\n" ) torch.save(model.state_dict(), f"model_result/{epoch}.pt") if dev_f1 > f1_max: # 用于检测过拟合情况 f1_max = dev_f1 cur_patience = 0 if dev_f1 > 0.9 and test_f1 > 0.9: break else: cur_patience += 1 if cur_patience >= PATIENCE: # 多次低于最高f1,break break print("Best dev F1: ", f1_max)
loss = model.loss(seqs.to(device), tags.to(device), masks.to(device)) loss.backward() optimizer.step() if i % args.log_interval == 0: print( "Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format( epoch + 1, i * seqs.size(1), len(train_loader.dataset), 100.0 * i / len(train_loader), loss.item(), ) ) print("Evaluating...") dev_precision, dev_recall, dev_f1 = evaluate(model, dev_loader, ix_to_tag) test_precision, test_recall, test_f1 = evaluate(model, test_loader, ix_to_tag) print(f"\ndev\tprecision: {dev_precision}, recall: {dev_recall}, f1: {dev_f1}") print(f"test\tprecision: {test_precision}, recall: {test_recall}, f1: {test_f1}\n") torch.save(model.state_dict(), f"checkpoints/{args.name}/model-epoch{epoch}.pt") if dev_f1 > best_dev_f1: best_dev_f1 = dev_f1 bad_count = 0 else: bad_count += 1 if bad_count >= args.patience: print("Early stopped!") break print("Best dev F1: ", best_dev_f1)