Beispiel #1
0
def train(args):
    train_iter, dev_iter = data_processor.load_data(args) # 将数据分为训练集和验证集
    print('加载数据完成')
    model = TextRNN(args)
    Cuda = torch.cuda.is_available()
    if Cuda and args.cuda: 
        model.cuda()
    """
    Q5:
        Please give optimizer here
		
		Add lr_scheduler to adjust learning rate.
    """
    optimizer = torch.optim.Adam(model.parameters(), lr = args.lr)
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.8)
    steps = 0
    best_acc = 0
    last_step = 0
    model.train()
    for epoch in range(1, args.epoch + 1):
        for batch in train_iter:
            feature, target = batch.text, batch.label
            
            # t_()函数表示将(max_len, batch_size)转置为(batch_size, max_len)
            with torch.no_grad():
                 feature.t_(), target.sub_(1) # target减去1
           
            if args.cuda and Cuda:
                feature, target = feature.cuda(), target.cuda()
            optimizer.zero_grad()
            logits = model(feature)
            loss = F.cross_entropy(logits, target)
            loss.backward()
            optimizer.step()
            steps += 1
            if steps % args.log_interval == 0:
                # torch.max(logits, 1)函数:返回每一行中最大值的那个元素,且返回其索引(返回最大元素在这一行的列索引)
                corrects = (torch.max(logits, 1)[1] == target).sum()
                train_acc = 100.0 * corrects / batch.batch_size
                sys.stdout.write(
                    '\rBatch[{}] - loss: {:.6f}  acc: {:.4f}%({}/{})'.format(steps,
                                                                             loss.item(),
                                                                             train_acc,
                                                                             corrects,
                                                                             batch.batch_size))
            if steps % args.test_interval == 0:
                dev_acc = eval(dev_iter, model, args)
                if dev_acc > best_acc:
                    best_acc = dev_acc
                    last_step = steps
                    if args.save_best:
                        print('Saving best model, acc: {:.4f}%\n'.format(best_acc))
                        save(model, args.save_dir, 'best', steps)
                else:
                    scheduler.step()
                    print('lr decayed to {}'.format(optimizer.state_dict()['param_groups'][0]['lr']))
                    if steps - last_step >= args.early_stopping:
                        print('\nearly stop by {} steps, acc: {:.4f}%'.format(args.early_stopping, best_acc))
                        raise KeyboardInterrupt
def train(args):
    train_iter, dev_iter = data_processor.load_data(args)  # 将数据分为训练集和验证集
    print('加载数据完成')
    model = TextRNN(args)
    if args.cuda: model.cuda()
    """
    Q5:
        Please give optimizer here
    """
    optimizer = torch.optim.Adam(model.parameters())
    steps = 0
    best_acc = 0
    last_step = 0
    model.train()
    for epoch in range(1, args.epoch + 1):
        for batch in train_iter:
            feature, target = batch.text, batch.label

            # t_()函数表示将(max_len, batch_size)转置为(batch_size, max_len)
            with torch.no_grad():
                #feature.t_()
                target.sub_(1)  # target减去1
                #print(feature.shape)

            if args.cuda:
                feature, target = feature.cuda(), target.cuda()
            optimizer.zero_grad()
            logits = model(feature)
            #print(logits.shape)
            loss = F.cross_entropy(logits, target)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
            optimizer.step()
            steps += 1
            if steps % args.log_interval == 0:
                # torch.max(logits, 1)函数:返回每一行中最大值的那个元素,且返回其索引(返回最大元素在这一行的列索引)
                corrects = (torch.max(logits, 1)[1] == target).sum()
                train_acc = 100.0 * corrects / batch.batch_size
                sys.stdout.write(
                    '\rBatch[{}] - loss: {:.6f}  acc: {:.4f}%({}/{})'.format(
                        steps, loss.item(), train_acc, corrects,
                        batch.batch_size))
            if steps % args.test_interval == 0:
                dev_acc = eval(dev_iter, model, args)
                if dev_acc > best_acc:
                    best_acc = dev_acc
                    last_step = steps
                    if args.save_best:
                        print('Saving best model, acc: {:.4f}%\n'.format(
                            best_acc))
                        save(model, args.save_dir, 'best', steps)
                else:
                    if steps - last_step >= args.early_stopping:
                        print('\nearly stop by {} steps, acc: {:.4f}%'.format(
                            args.early_stopping, best_acc))
                        raise KeyboardInterrupt
Beispiel #3
0
def train():
    # 配置文件
    cf = Config('./config.yaml')
    # 有GPU用GPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # 训练数据
    train_data = NewsDataset("./data/cnews_final_train.txt", cf.max_seq_len)
    train_dataloader = DataLoader(train_data,
                                  batch_size=cf.batch_size,
                                  shuffle=True)
    # 测试数据
    test_data = NewsDataset("./data/cnews_final_test.txt", cf.max_seq_len)
    test_dataloader = DataLoader(test_data,
                                 batch_size=cf.batch_size,
                                 shuffle=True)

    # 预训练词向量矩阵
    embedding_matrix = get_pre_embedding_matrix("./data/final_vectors")
    # 模型
    model = TextRNN(cf, torch.tensor(embedding_matrix))
    # 优化器用adam
    optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()))

    # 把模型放到指定设备
    model.to(device)

    # 让模型并行化运算
    if torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model)

    # 训练
    start_time = time.time()

    total_batch = 0  # 总批次
    best_acc_val = 0.0  # 最佳验证集准确率
    last_improved = 0  # 记录上一次提升批次
    require_improvement = 1000  # 如果超过1000轮未提升,提前结束训练

    flag = False
    model.train()
    for epoch_id in trange(cf.epoch, desc="Epoch"):
        # for step,batch in enumerate(tqdm(train_dataloader,"batch",total=len(train_dataloader))):
        for step, batch in enumerate(train_dataloader):

            label_id = batch['label_id'].squeeze(1).to(device)
            seq_len = batch["seq_len"].to(device)
            segment_ids = batch['segment_ids'].to(device)

            # 将序列按长度降序排列
            seq_len, perm_idx = seq_len.sort(0, descending=True)
            label_id = label_id[perm_idx]
            segment_ids = segment_ids[perm_idx].transpose(0, 1)

            loss = model(segment_ids, seq_len, label_id)

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            total_batch += 1

            if total_batch % cf.print_per_batch == 0:
                model.eval()
                with torch.no_grad():
                    loss_train, acc_train = model.get_loss_acc(
                        segment_ids, seq_len, label_id)
                loss_val, acc_val = evaluate(model, test_dataloader, device)

                if acc_val > best_acc_val:
                    # 保存最好结果
                    best_acc_val = acc_val
                    last_improved = total_batch
                    torch.save(model.state_dict(), "./output/model.bin")
                    improved_str = "*"
                else:
                    improved_str = ""

                time_dif = get_time_dif(start_time)
                msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},' \
                      + ' Val Loss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6}'
                print(
                    msg.format(total_batch, loss_train, acc_train, loss_val,
                               acc_val, time_dif, improved_str))

                model.train()

            if total_batch - last_improved > require_improvement:
                print("长时间未优化")
                flag = True
                break
        if flag:
            break
Beispiel #4
0
model = TextRNN()

# 损失函数:这里用交叉熵
criterion = nn.CrossEntropyLoss()
# 优化器 这里用SGD
optimizer = optim.Adam(model.parameters(), lr=0.001)

# device : GPU or CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

# 训练
for epoch in range(EPOCH):
    start_time = time.time()
    for i, data in enumerate(train_loader):
        model.train()
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        # 前向传播
        outputs = model(inputs)
        # 计算损失函数
        loss = criterion(outputs, labels)
        # 清空上一轮梯度
        optimizer.zero_grad()
        # 反向传播
        loss.backward()
        # 参数更新
        optimizer.step()
        accuracy = torch.mean((torch.argmax(outputs,
                                            1) == labels.data).float())
        print('epoch{} loss:{:.4f} acc:{:.4f} time:{:.4f}'.format(