def train(fold_all): config = BertConfig.from_pretrained('bert-base-chinese') print('开始训练...') for fold_index in range(FOLD): # set fold parameter BEST_F1 = 0 BEST_EPOCH = 0 loss_list = [] f1_list = [] flag = 0 print('正在加载模型...') if USE_GPU: model = BertForSequenceClassification.from_pretrained('bert-base-chinese', config=config).cuda() else: model = BertForSequenceClassification.from_pretrained('bert-base-chinese', config=config) optimizer = AdamW(model.parameters(), lr=LR, correct_bias=False) scheduler = WarmupLinearSchedule(optimizer, warmup_steps = WARMUP_STEPS, t_total = T_TOTAL) train_list = [] for _ in range(5): if _ != fold_index: train_list = train_list + fold_all[_] dev_list = fold_all[fold_index] train_bert_list = utils.bert_input(train_list) dev_bert_list = utils.bert_input(dev_list) train_dataset = layers.Train_Dataset(train_bert_list) dev_dataset = layers.Train_Dataset(dev_bert_list) train_dataloader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True) dev_dataloader = DataLoader(dataset=dev_dataset, batch_size=BATCH_SIZE, shuffle=False) for epoch in range(EPOCH): model.train() for text, label in train_dataloader: # 转text label为tensor text = [sub_text.tolist() for sub_text in text] label = [int(sub_label) for sub_label in label] if USE_GPU: text = torch.tensor(text).t().cuda() label = torch.tensor(label).cuda() else: text = torch.tensor(text).t() label = torch.tensor(label) # 输入模型 outputs = model(text, labels=label) loss, logits = outputs[:2] # 优化 optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() # 存储单批次f1 loss f1 = utils.batch_f1(logits, label) f1_list.append(f1) loss_list.append(loss.item()) flag += 1 # 输出f1 loss if flag % 200 == 0: f1_mean = np.mean(f1_list) loss_mean = np.mean(loss_list) f1_list = [] loss_list = [] print('fold: {} | epoch: {} | f1: {} | loss: {}'.format(fold_index, epoch, f1_mean, loss_mean)) # 验证集 f1_val = val(model, dev_dataloader) print('***********************************************************************') print('fold: {} | epoch: {} | 验证集F1值: {}'.format(fold_index, epoch, f1_val)) if f1_val > BEST_F1: BEST_F1 = f1_val BEST_EPOCH = epoch torch.save(model, str(fold_index) + '折_' + 'best_model.m') # torch.cuda.empty_cache() print('fold: {} | 验证集最优F1值: {}'.format(fold_index, BEST_F1)) print('fold: {} | 验证集最优epoch: {}'.format(fold_index, BEST_EPOCH)) print('***********************************************************************')
def train(fold_all): # config = BertConfig.from_pretrained('../../model_lib/robert/pytorch/chinese_roberta_wwm_large_ext_pytorch/bert_config.json') # config = BertConfig.from_pretrained('../../model_lib/bert/pytorch/xs/bert_config.json') config = BertConfig.from_pretrained( '../../model_lib/bert/pytorch/bert-base-chinese/bert_config.json') print('开始训练...') for fold_index in range(FOLD): # set fold parameter BEST_F1 = 0 BEST_EPOCH = 0 loss_list = [] f1_list = [] total_loss_list = [] total_f1_list = [] flag = 0 print('正在加载模型...') if USE_GPU: # model = BertForSequenceClassification.from_pretrained('../../model_lib/robert/pytorch/chinese_roberta_wwm_large_ext_pytorch/', config=config).cuda() # model = BertForSequenceClassification.from_pretrained('../../model_lib/bert/pytorch/xs/', config=config).cuda() model = BertForSequenceClassification.from_pretrained( '../../model_lib/bert/pytorch/bert-base-chinese/', config=config).cuda() else: # model = BertForSequenceClassification.from_pretrained('../../model_lib/robert/pytorch/chinese_roberta_wwm_large_ext_pytorch/', config=config) # model = BertForSequenceClassification.from_pretrained('../../model_lib/bert/pytorch/xs/', config=config) model = BertForSequenceClassification.from_pretrained( '../../model_lib/bert/pytorch/bert-base-chinese/', config=config) optimizer = AdamW(model.parameters(), lr=LR, correct_bias=False) scheduler = WarmupLinearSchedule(optimizer, warmup_steps=WARMUP_STEPS, t_total=T_TOTAL) # 旧版的调用方法 # scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=WARMUP_STEPS, num_training_steps=-1) # 制作交叉验证的数据集 train_list = [] for _ in range(3): if _ != fold_index: train_list = train_list + fold_all[_] dev_list = fold_all[fold_index] train_bert_list = utils.bert_input(train_list) dev_bert_list = utils.bert_input(dev_list) train_dataset = layers.Train_Dataset(train_bert_list) dev_dataset = layers.Train_Dataset(dev_bert_list) train_dataloader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True) dev_dataloader = DataLoader(dataset=dev_dataset, batch_size=BATCH_SIZE, shuffle=False) for epoch in range(EPOCH): model.train() for text, label in train_dataloader: # 转text label为tensor text = [sub_text.tolist() for sub_text in text] label = [int(sub_label) for sub_label in label] if USE_GPU: text = torch.tensor(text).t().cuda() label = torch.tensor(label).cuda() else: text = torch.tensor(text).t() label = torch.tensor(label) # 输入模型 outputs = model(text, labels=label) loss, logits = outputs[:2] # 优化 optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() # 存储单批次f1 loss f1 = utils.batch_f1(logits, label) f1_list.append(f1) loss_list.append(loss.item()) flag += 1 # 输出f1 loss if flag % 200 == 0: f1_mean = np.mean(f1_list) loss_mean = np.mean(loss_list) total_f1_list.extend(f1_list) total_loss_list.extend(loss_list) f1_list = [] loss_list = [] print('fold: {} | epoch: {} | f1: {} | loss: {}'.format( fold_index, epoch, f1_mean, loss_mean)) # 验证集,每个epoch验证一次 f1_val = val(model, dev_dataloader) print( '***********************************************************************' ) print('fold: {} | epoch: {} | 验证集F1值: {}'.format( fold_index, epoch, f1_val)) if f1_val > BEST_F1: BEST_F1 = f1_val BEST_EPOCH = epoch torch.save( model, 'bert_base_chinese_epoch5_lr1e5_ml128_bs16_100000_' + str(fold_index) + 'k_' + 'best_model.m') # torch.cuda.empty_cache() print('fold: {} | 验证集最优F1值: {}'.format(fold_index, BEST_F1)) print('fold: {} | 验证集最优epoch: {}'.format(fold_index, BEST_EPOCH)) print( '***********************************************************************' ) # 将total_f1_list和total_loss_list保存为文件格式,以便后期根据特定的要求画图 np.save('data/total_f1.npy', np.array(total_f1_list)) np.save('data/total_loss.npy', np.array(total_loss_list))
def train(fold_all): config = BertConfig.from_pretrained( '../../model_lib/robert/pytorch/chinese_roberta_wwm_large_ext_pytorch/bert_config.json' ) # config = BertConfig.from_pretrained('../../model_lib/bert/pytorch/xs/bert_config.json') # bert-base-chinese # config = BertConfig.from_pretrained('../../model_lib/bert/pytorch/xs/bert_config.json') print('开始训练...') for fold_index in range(FOLD): # set fold parameter BEST_F1 = 0 BEST_EPOCH = 0 loss_list = [] f1_list = [] flag = 0 print('正在加载模型...') if USE_GPU: model = BertForSequenceClassification.from_pretrained( '../../model_lib/robert/pytorch/chinese_roberta_wwm_large_ext_pytorch/', config=config).cuda() # model = BertForSequenceClassification.from_pretrained('../../model_lib/bert/pytorch/xs/', config=config).cuda() else: model = BertForSequenceClassification.from_pretrained( '../../model_lib/robert/pytorch/chinese_roberta_wwm_large_ext_pytorch/', config=config) # model = BertForSequenceClassification.from_pretrained('../../model_lib/bert/pytorch/xs/', config=config) optimizer = AdamW(model.parameters(), lr=LR, correct_bias=False) scheduler = WarmupLinearSchedule(optimizer, warmup_steps=WARMUP_STEPS, t_total=T_TOTAL) # T_TOTAL? # 制作交叉验证的数据集 train_list = [] for _ in range(FOLD): # 真的交叉验证 # if _ != fold_index: # train_list = train_list + fold_all[_] # 用上所有数据进行训练,假的交叉验证 train_list = train_list + fold_all[_] dev_list = fold_all[fold_index] train_bert_list = utils.bert_input(train_list) dev_bert_list = utils.bert_input(dev_list) train_dataset = layers.Train_Dataset(train_bert_list) dev_dataset = layers.Train_Dataset(dev_bert_list) train_dataloader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True) dev_dataloader = DataLoader(dataset=dev_dataset, batch_size=BATCH_SIZE, shuffle=False) for epoch in range(EPOCH): start_time = time.time() model.train() for text, label in train_dataloader: # 转text label为tensor text = [sub_text.tolist() for sub_text in text] label = [int(sub_label) for sub_label in label] if USE_GPU: text = torch.tensor(text).t().cuda() # 为什么要转置? label = torch.tensor(label).cuda() else: text = torch.tensor(text).t() label = torch.tensor(label) # 输入模型 outputs = model(text, labels=label) loss, logits = outputs[:2] # 优化 optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() # 存储单批次f1 loss f1 = utils.batch_f1(logits, label) f1_list.append(f1) loss_list.append(loss.item()) flag += 1 # 输出f1 loss if flag % 200 == 0: f1_mean = np.mean(f1_list) loss_mean = np.mean(loss_list) f1_list = [] loss_list = [] print('fold: {} | epoch: {} | f1: {} | loss: {}'.format( fold_index, epoch, f1_mean, loss_mean)) # 验证集,每个epoch验证一次 f1_val = val(model, dev_dataloader) print( '***********************************************************************' ) print('fold: {} | epoch: {} | 验证集F1值: {}'.format( fold_index, epoch, f1_val)) if f1_val > BEST_F1: BEST_F1 = f1_val BEST_EPOCH = epoch torch.save( model, 'robert_wwm_large_ext_f5k_epoch3_lr1e5_ml64_bs12_' + str(fold_index) + 'k_' + 'best_model.m') # torch.cuda.empty_cache() print('fold: {} | 验证集最优F1值: {}'.format(fold_index, BEST_F1)) print('fold: {} | 验证集最优epoch: {}'.format(fold_index, BEST_EPOCH)) print( '***********************************************************************' ) end_time = time.time() print('epoch cost time:', end_time - start_time)