train_df = train_df.drop(['comment_text'], axis=1) train_df['target'] = (train_df['target'] >= 0.5).astype(float) valid_df = valid_df.fillna(0) valid_df = valid_df.drop(['comment_text'], axis=1) valid_df['target'] = (valid_df['toxic'] == 1) | (valid_df['severe_toxic'] == 1) valid_df['target'] = valid_df['target'] | (valid_df['obscene'] == 1) valid_df['target'] = valid_df['target'] | (valid_df['threat'] == 1) valid_df['target'] = valid_df['target'] | (valid_df['insult'] == 1) valid_df['target'] = valid_df['target'] | (valid_df['identity_hate'] == 1) valid_df['target'] = valid_df['target'].astype(float) model = BertForSequenceClassification(bert_config, num_labels=1) model.load_state_dict(torch.load("./datas/bert_pytorch.bin")) model.to(device) for param in model.parameters(): param.requires_grad = False X = train_seqs[:] y = train_df['target'].values[:] valid_X = valid_seqs[:] valid_y = valid_df['target'].values[:] X = np.concatenate((X, valid_X), axis=1) y = np.concatenate((y, valid_y), axis=0) train_dataset = torch.utils.data.TensorDataset( torch.tensor(X, dtype=torch.long), torch.tensor(y, dtype=torch.float)) output_model_file = "./datas/mybert.bin" lr = 2e-5 batch_size = 32
torch.float)).item() / len(train_loader) tq.set_postfix(avg_loss=avg_loss, avg_accuracy=avg_accuracy) torch.save(model.state_dict(), output_model_file + '_epoch_' + str(epoch) + '.bin') #validate test_model = BertForSequenceClassification(bert_config, num_labels=len(y_columns)) #paralleism test_model = nn.DataParallel(test_model) test_model.load_state_dict( torch.load(output_model_file + '_epoch_' + str(epoch) + '.bin')) test_model.to(device) for param in test_model.parameters(): param.requires_grad = False test_model.eval() valid_preds = np.zeros((len(X_val))) print(valid_preds.size) valid = torch.utils.data.TensorDataset( torch.tensor(X_val, dtype=torch.long)) valid_loader = torch.utils.data.DataLoader(valid, batch_size=256, shuffle=False) tk0 = tqdm(valid_loader) for i, (x_batch, ) in enumerate(tk0): pred = test_model(x_batch.to(device), attention_mask=(x_batch > 0).to(device), labels=None)
def train_unfixed(): # 配置文件 cf = Config('./config.yaml') # 有GPU用GPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 训练数据 train_data = NewsDataset("./data/cnews_final_train.txt", cf.max_seq_len) train_dataloader = DataLoader(train_data, batch_size=cf.batch_size, shuffle=True) # 测试数据 test_data = NewsDataset("./data/cnews_final_test.txt", cf.max_seq_len) test_dataloader = DataLoader(test_data, batch_size=cf.batch_size, shuffle=True) # 模型 config = BertConfig("./output/pytorch_bert_config.json") model = BertForSequenceClassification(config, num_labels=cf.num_labels) model.load_state_dict(torch.load("./output/pytorch_model.bin")) # 优化器用adam for param in model.parameters(): param.requires_grad = True param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] num_train_optimization_steps = int( len(train_data) / cf.batch_size) * cf.epoch optimizer = BertAdam(optimizer_grouped_parameters, lr=cf.lr, t_total=num_train_optimization_steps) # 把模型放到指定设备 model.to(device) # 让模型并行化运算 if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # 训练 start_time = time.time() total_batch = 0 # 总批次 best_acc_val = 0.0 # 最佳验证集准确率 last_improved = 0 # 记录上一次提升批次 require_improvement = 1500 # 如果超过1500轮未提升,提前结束训练 # 获取当前验证集acc model.eval() _, best_acc_val = evaluate(model, test_dataloader, device) flag = False model.train() for epoch_id in range(cf.epoch): print("Epoch %d" % epoch_id) for step, batch in enumerate( tqdm(train_dataloader, desc="batch", total=len(train_dataloader))): # for step,batch in enumerate(train_dataloader): label_id = batch['label_id'].squeeze(1).to(device) word_ids = batch['word_ids'].to(device) segment_ids = batch['segment_ids'].to(device) word_mask = batch['word_mask'].to(device) loss = model(word_ids, segment_ids, word_mask, label_id) loss.backward() optimizer.step() optimizer.zero_grad() total_batch += 1 if total_batch % cf.print_per_batch == 0: model.eval() with torch.no_grad(): loss_train, acc_train = get_model_loss_acc( model, word_ids, segment_ids, word_mask, label_id) loss_val, acc_val = evaluate(model, test_dataloader, device) if acc_val > best_acc_val: # 保存最好结果 best_acc_val = acc_val last_improved = total_batch torch.save(model.state_dict(), "./output/pytorch_model.bin") with open("./output/pytorch_bert_config.json", 'w') as f: f.write(model.config.to_json_string()) improved_str = "*" else: improved_str = "" time_dif = get_time_dif(start_time) msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},' \ + ' Val Loss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6}' print( msg.format(total_batch, loss_train, acc_train, loss_val, acc_val, time_dif, improved_str)) model.train() if total_batch - last_improved > require_improvement: print("长时间未优化") flag = True break if flag: break