def my_train(): os.makedirs(f"model_result", exist_ok=True) torch.manual_seed(1) device = torch.device('cuda') data_dir = f"data/{DATASET}/processed" # 加载 train_data = NERDataset(os.path.join(data_dir, "train.pkl")) test_data = NERDataset(os.path.join(data_dir, "test.pkl")) dev_data = NERDataset(os.path.join(data_dir, "dev.pkl")) word_to_idx = load_obj(os.path.join(data_dir, "word_to_idx.pkl")) tag_to_idx = load_obj(os.path.join(data_dir, "tag_to_idx.pkl")) idx_to_tag = {n: m for m, n in tag_to_idx.items()} train_loader = DataLoader( train_data, batch_size=BATCH_SIZE, collate_fn=BatchPadding(), shuffle=True, num_workers=2, pin_memory=True, ) dev_loader = DataLoader( dev_data, batch_size=BATCH_SIZE, collate_fn=BatchPadding(), shuffle=True, num_workers=2, pin_memory=True, ) test_loader = DataLoader( test_data, batch_size=BATCH_SIZE, collate_fn=BatchPadding(), shuffle=True, num_workers=2, pin_memory=True, ) # 建模 model = BiLSTM_CRF(len(word_to_idx), len(tag_to_idx), EMBEDDING_DIM, HIDDEN_DIM, DROPOUT).to(device) print(model) optimizer = optim.Adam(model.parameters(), lr=LEARN_RATE) print("\n开始训练") f1_max = 0 cur_patience = 0 # 用于避免过拟合 for epoch in range(EPOCHS): model.train() for i, (seqs, tags, masks) in enumerate(train_loader, 1): optimizer.zero_grad() loss = model.loss(seqs.to(device), tags.to(device), masks.to(device)) loss.backward() optimizer.step() if i % LOG_INTERVAL == 0: print("epoch {}: {:.0f}%\t\tLoss: {:.6f}".format( epoch, 100.0 * i / len(train_loader), loss.item())) dev_precision, dev_recall, dev_f1 = evaluate(model, dev_loader, idx_to_tag) test_precision, test_recall, test_f1 = evaluate( model, test_loader, idx_to_tag) print( f"\ndev\tprecision: {dev_precision}, recall: {dev_recall}, f1: {dev_f1}" ) print( f"test\tprecision: {test_precision}, recall: {test_recall}, f1: {test_f1}\n" ) torch.save(model.state_dict(), f"model_result/{epoch}.pt") if dev_f1 > f1_max: # 用于检测过拟合情况 f1_max = dev_f1 cur_patience = 0 if dev_f1 > 0.9 and test_f1 > 0.9: break else: cur_patience += 1 if cur_patience >= PATIENCE: # 多次低于最高f1,break break print("Best dev F1: ", f1_max)
# Model model = BiLSTM_CRF( len(word_to_ix), len(tag_to_ix), args.embed_dim, args.hidden_dim, args.dropout ).to(device) print(model) optimizer = optim.Adam(model.parameters(), lr=args.lr) print("Training...") best_dev_f1 = 0 bad_count = 0 for epoch in range(args.epochs): model.train() for i, (seqs, tags, masks) in enumerate(train_loader, 1): optimizer.zero_grad() loss = model.loss(seqs.to(device), tags.to(device), masks.to(device)) loss.backward() optimizer.step() if i % args.log_interval == 0: print( "Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format( epoch + 1, i * seqs.size(1), len(train_loader.dataset), 100.0 * i / len(train_loader), loss.item(), ) ) print("Evaluating...") dev_precision, dev_recall, dev_f1 = evaluate(model, dev_loader, ix_to_tag) test_precision, test_recall, test_f1 = evaluate(model, test_loader, ix_to_tag)