def train(args): train_iter, dev_iter = data_processor.load_data(args) # 将数据分为训练集和验证集 print('加载数据完成') model = TextRNN(args) if args.cuda: model.cuda() """ Q5: Please give optimizer here """ optimizer = torch.optim.Adam(model.parameters()) steps = 0 best_acc = 0 last_step = 0 model.train() for epoch in range(1, args.epoch + 1): for batch in train_iter: feature, target = batch.text, batch.label # t_()函数表示将(max_len, batch_size)转置为(batch_size, max_len) with torch.no_grad(): #feature.t_() target.sub_(1) # target减去1 #print(feature.shape) if args.cuda: feature, target = feature.cuda(), target.cuda() optimizer.zero_grad() logits = model(feature) #print(logits.shape) loss = F.cross_entropy(logits, target) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1) optimizer.step() steps += 1 if steps % args.log_interval == 0: # torch.max(logits, 1)函数:返回每一行中最大值的那个元素,且返回其索引(返回最大元素在这一行的列索引) corrects = (torch.max(logits, 1)[1] == target).sum() train_acc = 100.0 * corrects / batch.batch_size sys.stdout.write( '\rBatch[{}] - loss: {:.6f} acc: {:.4f}%({}/{})'.format( steps, loss.item(), train_acc, corrects, batch.batch_size)) if steps % args.test_interval == 0: dev_acc = eval(dev_iter, model, args) if dev_acc > best_acc: best_acc = dev_acc last_step = steps if args.save_best: print('Saving best model, acc: {:.4f}%\n'.format( best_acc)) save(model, args.save_dir, 'best', steps) else: if steps - last_step >= args.early_stopping: print('\nearly stop by {} steps, acc: {:.4f}%'.format( args.early_stopping, best_acc)) raise KeyboardInterrupt
def train(args): train_iter, dev_iter = data_processor.load_data(args) # 将数据分为训练集和验证集 print('加载数据完成') model = TextRNN(args) Cuda = torch.cuda.is_available() if Cuda and args.cuda: model.cuda() """ Q5: Please give optimizer here Add lr_scheduler to adjust learning rate. """ optimizer = torch.optim.Adam(model.parameters(), lr = args.lr) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.8) steps = 0 best_acc = 0 last_step = 0 model.train() for epoch in range(1, args.epoch + 1): for batch in train_iter: feature, target = batch.text, batch.label # t_()函数表示将(max_len, batch_size)转置为(batch_size, max_len) with torch.no_grad(): feature.t_(), target.sub_(1) # target减去1 if args.cuda and Cuda: feature, target = feature.cuda(), target.cuda() optimizer.zero_grad() logits = model(feature) loss = F.cross_entropy(logits, target) loss.backward() optimizer.step() steps += 1 if steps % args.log_interval == 0: # torch.max(logits, 1)函数:返回每一行中最大值的那个元素,且返回其索引(返回最大元素在这一行的列索引) corrects = (torch.max(logits, 1)[1] == target).sum() train_acc = 100.0 * corrects / batch.batch_size sys.stdout.write( '\rBatch[{}] - loss: {:.6f} acc: {:.4f}%({}/{})'.format(steps, loss.item(), train_acc, corrects, batch.batch_size)) if steps % args.test_interval == 0: dev_acc = eval(dev_iter, model, args) if dev_acc > best_acc: best_acc = dev_acc last_step = steps if args.save_best: print('Saving best model, acc: {:.4f}%\n'.format(best_acc)) save(model, args.save_dir, 'best', steps) else: scheduler.step() print('lr decayed to {}'.format(optimizer.state_dict()['param_groups'][0]['lr'])) if steps - last_step >= args.early_stopping: print('\nearly stop by {} steps, acc: {:.4f}%'.format(args.early_stopping, best_acc)) raise KeyboardInterrupt
def train(): model = TextRNN().to(device) #定义损失函数 Loss = nn.MultiLabelSoftMarginLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) #保存最好模型,先给一个定义为0 best_val_acc = 0 for epoch in range(10): # print('epoch=',epoch) #分批训练 accuracy_array0 = np.array([]) for step, (x_batch, y_batch) in enumerate(train_loader): x = x_batch.to(device) y = y_batch.to(device) out = model(x) loss = Loss(out, y) #print(out) #print('loss=',loss) #反向传播 optimizer.zero_grad() loss.backward() optimizer.step() accuracy0 = np.mean( (torch.argmax(out, 1) == torch.argmax(y, 1)).cpu().numpy()) accuracy_array0 = np.append(accuracy_array0, accuracy0) accuracy_train = np.mean(accuracy_array0) print('accuracy_train:', accuracy_train) #对模型进行验证 if (epoch + 1) % 5 == 0: for step, (x_batch, y_batch) in enumerate(val_loader): x = x_batch.to(device) y = y_batch.to(device) out = model(x) #计算准确率 accuracy1 = np.mean( (torch.argmax(out, 1) == torch.argmax(y, 1)).cpu().numpy()) accuracy_array1 = np.array([]) if accuracy1 > best_val_acc: torch.save(model, 'model.pkl') best_val_acc = accuracy1 print('model.pkl saved') accuracy_array1 = np.append(accuracy_array1, best_val_acc) accuracy_test = np.mean(accuracy_array1) print('accuracy_test:', accuracy_test)
def main(): reviews_ints, labels, features, word_int_dict = data_processing(300) train_data, test_data, train_label, test_label = split_train_test(features, labels, 0.1) textrnn = TextRNN(300 * len(train_data), embed_size, hidden_size, 1) criterion = nn.CrossEntropyLoss() optimizer = t.optim.Adam(textrnn.parameters(), lr=0.01) process_bar = len(train_data) // batch_size + 1 # print('process_bar:', process_bar) for epoch in range(num_epochs): # h0 = [num_layers(1) * num_directions(1), batch_size, hidden_size] # h0 = h0.to(device)# 1*200*256 # print(type(h0)) for i in range(process_bar): x = train_data[batch_size * i:batch_size * (i + 1)] y = train_label[batch_size * i:batch_size * (i + 1)] # x = [batch_size * seq_length] x = t.LongTensor(x) y = t.LongTensor(y) # 下面一步中的输入x=[batch_size, seq_length, embed_size], # h0 = [batch_size, num_layers(1) * num_directions(1), hidden_size] # 输出output= [batch_size, seq_length, output_dim(num_directions * hidden_size)], # ht = [batch_size, num_layers * num_directions, hidden_size] output = textrnn(x) # print(output.size()) # print(y.size()) loss = criterion(output, y) optimizer.zero_grad() loss.backward() optimizer.step() print(str(datetime.datetime.now()) + '||epoch ' + str(epoch + 1) + '||step ' + str( i + 1) + ' | loss is: ' + str(loss.item())) if i % 5 == 0: # h0 = t.zeros(num_layers, len(test_data), hidden_size) test = t.LongTensor(test_data) # test = test.transpose(0, 1) # test_label = t.LongTensor(test_label) output = textrnn(test) pre_y = t.max(output,dim=1)[1].data.numpy().squeeze() print(len(pre_y)) acc = sum(pre_y == test_label) / len(test_label) print('acc:', acc)
def train(lr, train_loader, test_dataset): model = TextRNN().cuda() loss_fn = nn.MultiLabelSoftMarginLoss() optimizer = optim.Adam(model.parameters(), lr=lr) best_acc = 0 for epoch in range(train_epochs): for step, (x_batch, y_batch) in enumerate(train_loader): x, y = x_batch.cuda(), y_batch.cuda() # FF y_pred = model(x) loss = loss_fn(y_pred, y) # BF optimizer.zero_grad() loss.backward() optimizer.step() acc = np.mean( (torch.argmax(y_pred, 1) == torch.argmax(y, 1)).cpu().numpy()) print('Training epoch {:}, loss = {:}, acc = {:}'.format( epoch + 1, loss.item(), acc)) if (epoch + 1) % 5 == 0: for step, (x_batch, y_batch) in enumerate(test_loader): x, y = x_batch.cuda(), y_batch.cuda() # FF y_pred = model(x) acc = np.mean( (torch.argmax(y_pred, 1) == torch.argmax(y, 1)).cpu().numpy()) # print('Test acc = {:}'.format(acc)) if acc > best_acc: best_acc = acc torch.save(model.state_dict(), 'model_params.pkl')
def train(): model = TextRNN().to(device) #定义损失函数 Loss = nn.MultiLabelSoftMarginLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) #保存最好模型,先给一个定义为0 best_val_acc = 0 costs = [] early_stop = 0 min_loss = float('inf') for epoch in range(5): # print('epoch=',epoch) #分批训练 losses = [] accuracy_array0 = np.array([]) for step, (x_batch, y_batch) in enumerate(train_loader): x = x_batch.to(device) y = y_batch.to(device) out = model(x) loss = Loss(out, y) losses.append(loss.item()) #print(out) #print('loss=',loss) #反向传播 optimizer.zero_grad() loss.backward() optimizer.step() accuracy0 = np.mean( (torch.argmax(out, 1) == torch.argmax(y, 1)).cpu().numpy()) accuracy_array0 = np.append(accuracy_array0, accuracy0) meanloss = np.mean(losses) costs.append(meanloss) #对模型进行验证 if (epoch + 1) % 5 == 0: accuracy_train = np.mean(accuracy_array0) print('accuracy_train:', accuracy_train) for step, (x_batch, y_batch) in enumerate(val_loader): x = x_batch.to(device) y = y_batch.to(device) out = model(x) #计算准确率 accuracy1 = np.mean( (torch.argmax(out, 1) == torch.argmax(y, 1)).cpu().numpy()) accuracy_array1 = np.array([]) accuracy_test = np.mean(accuracy_array1) print('accuracy_test:', accuracy_test) if accuracy1 > best_val_acc: torch.save(model, 'model.pkl') best_val_acc = accuracy1 print('model.pkl saved') #accuracy_array1 = np.append(accuracy_array1, best_val_acc # 早停法 if meanloss < min_loss: min_loss = meanloss early_stop = 0 else: early_stop += 1 if early_stop > 5: print(f"loss连续{epoch}个epoch未降低, 停止循环") break
def train(): # 配置文件 cf = Config('./config.yaml') # 有GPU用GPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 训练数据 train_data = NewsDataset("./data/cnews_final_train.txt", cf.max_seq_len) train_dataloader = DataLoader(train_data, batch_size=cf.batch_size, shuffle=True) # 测试数据 test_data = NewsDataset("./data/cnews_final_test.txt", cf.max_seq_len) test_dataloader = DataLoader(test_data, batch_size=cf.batch_size, shuffle=True) # 预训练词向量矩阵 embedding_matrix = get_pre_embedding_matrix("./data/final_vectors") # 模型 model = TextRNN(cf, torch.tensor(embedding_matrix)) # 优化器用adam optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters())) # 把模型放到指定设备 model.to(device) # 让模型并行化运算 if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # 训练 start_time = time.time() total_batch = 0 # 总批次 best_acc_val = 0.0 # 最佳验证集准确率 last_improved = 0 # 记录上一次提升批次 require_improvement = 1000 # 如果超过1000轮未提升,提前结束训练 flag = False model.train() for epoch_id in trange(cf.epoch, desc="Epoch"): # for step,batch in enumerate(tqdm(train_dataloader,"batch",total=len(train_dataloader))): for step, batch in enumerate(train_dataloader): label_id = batch['label_id'].squeeze(1).to(device) seq_len = batch["seq_len"].to(device) segment_ids = batch['segment_ids'].to(device) # 将序列按长度降序排列 seq_len, perm_idx = seq_len.sort(0, descending=True) label_id = label_id[perm_idx] segment_ids = segment_ids[perm_idx].transpose(0, 1) loss = model(segment_ids, seq_len, label_id) loss.backward() optimizer.step() optimizer.zero_grad() total_batch += 1 if total_batch % cf.print_per_batch == 0: model.eval() with torch.no_grad(): loss_train, acc_train = model.get_loss_acc( segment_ids, seq_len, label_id) loss_val, acc_val = evaluate(model, test_dataloader, device) if acc_val > best_acc_val: # 保存最好结果 best_acc_val = acc_val last_improved = total_batch torch.save(model.state_dict(), "./output/model.bin") improved_str = "*" else: improved_str = "" time_dif = get_time_dif(start_time) msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},' \ + ' Val Loss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6}' print( msg.format(total_batch, loss_train, acc_train, loss_val, acc_val, time_dif, improved_str)) model.train() if total_batch - last_improved > require_improvement: print("长时间未优化") flag = True break if flag: break
EPOCH = 30 batch_size = 32 best_epoch, best_acc = 0, 0 #保存训练模型 file_name = 'cnews_best.pt' train_data = textData(train=True) val_data = textData(val=True) test_data = textData() train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False) model = TextRNN() # 损失函数:这里用交叉熵 criterion = nn.CrossEntropyLoss() # 优化器 这里用SGD optimizer = optim.Adam(model.parameters(), lr=0.001) # device : GPU or CPU device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) # 训练 for epoch in range(EPOCH): start_time = time.time() for i, data in enumerate(train_loader): model.train() inputs, labels = data inputs, labels = inputs.to(device), labels.to(device) # 前向传播 outputs = model(inputs) # 计算损失函数