def main(): # 参数设置 batch_size = 4 device = 'cuda' if torch.cuda.is_available() else 'cpu' epochs = 10 learning_rate = 5e-6 #Learning Rate不宜太大 # 获取到dataset train_dataset = CNewsDataset('data/cnews/cnews.train.txt') valid_dataset = CNewsDataset('data/cnews/cnews.val.txt') #test_data = load_data('cnews/cnews.test.txt') # 生成Batch train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False) #test_dataloader = DataLoader(valid_data, batch_size=batch_size, shuffle=False) # 读取BERT的配置文件 bert_config = BertConfig.from_pretrained('bert-base-chinese') num_labels = len(train_dataset.labels) # 初始化模型 model = BertClassifier(bert_config, num_labels).to(device) optimizer = AdamW(model.parameters(), lr=learning_rate) criterion = nn.CrossEntropyLoss() best_acc = 0 for epoch in range(1, epochs + 1): losses = 0 # 损失 accuracy = 0 # 准确率 model.train() train_bar = tqdm(train_dataloader) for input_ids, token_type_ids, attention_mask, label_id in train_bar: model.zero_grad() train_bar.set_description('Epoch %i train' % epoch) output = model( input_ids=input_ids.to(device), attention_mask=attention_mask.to(device), token_type_ids=token_type_ids.to(device), ) loss = criterion(output, label_id.to(device)) losses += loss.item() pred_labels = torch.argmax(output, dim=1) # 预测出的label acc = torch.sum(pred_labels == label_id.to(device)).item() / len( pred_labels) #acc accuracy += acc loss.backward() optimizer.step() train_bar.set_postfix(loss=loss.item(), acc=acc) average_loss = losses / len(train_dataloader) average_acc = accuracy / len(train_dataloader) print('\tTrain ACC:', average_acc, '\tLoss:', average_loss) # 验证 model.eval() losses = 0 # 损失 accuracy = 0 # 准确率 valid_bar = tqdm(valid_dataloader) for input_ids, token_type_ids, attention_mask, label_id in valid_bar: valid_bar.set_description('Epoch %i valid' % epoch) output = model( input_ids=input_ids.to(device), attention_mask=attention_mask.to(device), token_type_ids=token_type_ids.to(device), ) loss = criterion(output, label_id.to(device)) losses += loss.item() pred_labels = torch.argmax(output, dim=1) # 预测出的label acc = torch.sum(pred_labels == label_id.to(device)).item() / len( pred_labels) #acc accuracy += acc valid_bar.set_postfix(loss=loss.item(), acc=acc) average_loss = losses / len(valid_dataloader) average_acc = accuracy / len(valid_dataloader) print('\tValid ACC:', average_acc, '\tLoss:', average_loss) if average_acc > best_acc: best_acc = average_acc torch.save(model.state_dict(), 'models/best_model.pkl')
def train(dataloader, head_trans, body_trans, classifier, load_model=False, save_model=True, num_epochs=2): torch.backends.cudnn.benchmark = True # device = 'cuda' if torch.cuda.is_available() else 'cpu' device = 'cpu' print(device) load_model = load_model save_model = save_model learning_rate = 3e-3 num_epochs = num_epochs # For tensorboard writer = SummaryWriter('runs/bert') step = 0 # Initialize Model model = BertClassifier(head_trans, body_trans, classifier).to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=learning_rate) if load_model: model, optimizer, step = load_checkpoint( torch.load('bert_chkpnt/my_checkpoint.pth.tar'), model, optimizer) return model for epoch in range(num_epochs): if save_model: checkpoint = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'step': step } save_checkpoint(checkpoint) loop = tqdm(enumerate(dataloader), total=len(dataloader), leave=False) for batch, (head, body, stance) in loop: outputs = model(head.to(device), body.to(device)) breakpoint() loss = criterion(outputs.float(), stance.to(device).long()) writer.add_scalar('Training Loss', loss.item(), step) step += 1 optimizer.zero_grad() loss.backward() optimizer.step() # Update progress bar loop.set_description(f'Epoch [{epoch+1}/{num_epochs}]') loop.set_postfix(loss=loss.item()) running_loss += loss.item() running_accuracy += ( (torch.argmax(outputs, dim=1) == stance.to(device)).sum().item()) / BATCH_SIZE if (batch + 1) % 10 == 0: writer.add_scalar('Running Loss', running_loss / 10, epoch * len(dataloader) + batch) writer.add_scalar('Running Accuracy', running_accuracy / 10, epoch * len(dataloader) + batch) running_loss = 0.0 running_accuracy = 0 return model
for i, batch in enumerate(progress_bar, 1): outputs = bert_classifier.train_on_batch(batch) postfix = update_metrics(metrics, outputs, batch["labels"]) progress_bar.set_postfix(postfix) if (args.eval_every_n_batches > 0 and i % args.eval_every_n_batches == 0 and len(train_dataloader) - i >= args.eval_every_n_batches // 2) or\ i == len(train_dataloader): dev_metrics = initialize_metrics() dev_progress_bar = tqdm.tqdm(dev_dataloader) for j, batch in enumerate(dev_progress_bar): outputs = bert_classifier.validate_on_batch(batch) postfix = update_metrics(dev_metrics, outputs, batch["labels"]) dev_progress_bar.set_postfix(postfix) if dev_metrics["accuracy"] > best_score: best_score = dev_metrics["accuracy"] best_weights = copy.deepcopy(bert_classifier.state_dict()) bert_classifier.load_state_dict(best_weights) ## загружаем наилучшее состояние bert_classifier.eval() if args.save_file is not None: torch.save(best_weights, args.save_file) probs, labels = [None] * len(dev_data), [None] * len(dev_data) dev_dataloader = make_dataloader(dev_dataset, batch_size=args.dev_batch_size, shuffle=False) dev_progress_bar = tqdm.tqdm(dev_dataloader) for i, batch in enumerate(dev_progress_bar): outputs = bert_classifier.predict_on_batch(batch) for index, prob, label in zip(batch["index"], outputs["probs"], outputs["labels"]): probs[index], labels[index] = prob, label corr_labels = [int(elem[args.answer_field]==args.pos_label) for elem in dev_data] accuracy = accuracy_score(corr_labels, labels) metrics = precision_recall_fscore_support(corr_labels, labels)
def main(): device = torch.device('cuda:3') # 获取到dataset print('加载训练数据') train_data = load_data('dataset/train.csv') print('加载验证数据') valid_data = load_data('dataset/test.csv') # test_data = load_data('cnews/cnews.test.txt') batch_size = 16 # 生成Batch print('生成batch') train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=3) valid_dataloader = DataLoader(valid_data, batch_size=batch_size, shuffle=False, num_workers=3) # test_dataloader = DataLoader(valid_data, batch_size=batch_size, shuffle=False) # 读取BERT的配置文件 bert_config = BertConfig.from_pretrained('./chinese_wwm_pytorch') bert_config.num_labels = num_labels print(bert_config) # 初始化模型 model = BertClassifier(bert_config) # model.to(device) # 参数设置 EPOCHS = 20 learning_rate = 5e-6 # Learning Rate不宜太大 optimizer = AdamW(model.parameters(), lr=learning_rate) # 损失函数采用交叉熵 criterion = nn.CrossEntropyLoss() with open('output.txt', 'w') as wf: wf.write('Batch Size: ' + str(batch_size) + '\tLearning Rate: ' + str(learning_rate) + '\n') best_acc = 0 # 设置并行训练,模型默认是把参数放在device[0]对应的gpu编号的gpu上,所以这里应该和上面设置的cuda:2对应 net = torch.nn.DataParallel(model, device_ids=[3, 4]) net.to(device) # model.module.avgpool = nn.AdaptiveAvgPool2d(7) # 开始训练 for Epoch in range(1, EPOCHS + 1): losses = 0 # 损失 accuracy = 0 # 准确率 print('Epoch:', Epoch) model.train() for batch_index, batch in enumerate(train_dataloader): # print(batch_index) # print(batch) input_ids = batch[0].to(device) attention_mask = batch[1].to(device) token_type_ids = batch[2].to(device) label_ids = batch[3].to(device) # 将三个输入喂到模型中 output = net( # forward input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, ) loss = criterion(output, label_ids) losses += loss.item() pred_labels = torch.argmax(output, dim=1) # 预测出的label acc = torch.sum(pred_labels == label_ids.to(device)).item() / len( pred_labels) # acc accuracy += acc # 打印训练过程中的准确率以及loss # print('Epoch: %d | Train: | Batch: %d / %d | Acc: %f | Loss: %f' % (Epoch, batch_index + 1, len(train_dataloader), acc, loss.item())) # 模型梯度置零,损失函数反向传播,优化更新 model.zero_grad() loss.backward() optimizer.step() # torch.cuda.empty_cache() average_loss = losses / len(train_dataloader) average_acc = accuracy / len(train_dataloader) # 打印该epoch训练结果的 print('\tTrain ACC:', average_acc, '\tLoss:', average_loss) # with open('output.txt', 'a') as rf: # output_to_file = '\nEpoch: ' + str(Epoch) + '\tTrain ACC:' + str(average_acc) + '\tLoss: ' + str( # average_loss) # rf.write(output_to_file) # 验证 model.eval() losses = 0 # 损失 accuracy = 0 # 准确率 # 在验证集上进行验证 for batch_index, batch in enumerate(valid_dataloader): input_ids = batch[0].to(device) attention_mask = batch[1].to(device) token_type_ids = batch[2].to(device) label_ids = batch[3].to(device) with torch.no_grad(): output = model( # forward input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, ) loss = criterion(output, label_ids) losses += loss.item() # 这里的两部操作都是直接对生成的结果张量进行操作 pred_labels = torch.argmax(output, dim=1) # 预测出的label acc = torch.sum(pred_labels == label_ids.to(device)).item() / len( pred_labels) # acc accuracy += acc average_loss = losses / len(valid_dataloader) average_acc = accuracy / len(valid_dataloader) print('\tValid ACC:', average_acc, '\tLoss:', average_loss) # with open('output.txt', 'a') as rf: # output_to_file = '\nEpoch: ' + str(Epoch) + '\tValid ACC:' + str(average_acc) + '\tLoss: ' + str( # average_loss) + '\n' # rf.write(output_to_file) if average_acc > best_acc: best_acc = average_acc torch.save(model.state_dict(), 'best_model_on_trainset.pkl')