コード例 #1
0
def main():

    # 参数设置
    batch_size = 4
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    epochs = 10
    learning_rate = 5e-6  #Learning Rate不宜太大

    # 获取到dataset
    train_dataset = CNewsDataset('data/cnews/cnews.train.txt')
    valid_dataset = CNewsDataset('data/cnews/cnews.val.txt')
    #test_data = load_data('cnews/cnews.test.txt')

    # 生成Batch
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  shuffle=True)
    valid_dataloader = DataLoader(valid_dataset,
                                  batch_size=batch_size,
                                  shuffle=False)
    #test_dataloader = DataLoader(valid_data, batch_size=batch_size, shuffle=False)

    # 读取BERT的配置文件
    bert_config = BertConfig.from_pretrained('bert-base-chinese')
    num_labels = len(train_dataset.labels)

    # 初始化模型
    model = BertClassifier(bert_config, num_labels).to(device)

    optimizer = AdamW(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    best_acc = 0

    for epoch in range(1, epochs + 1):
        losses = 0  # 损失
        accuracy = 0  # 准确率

        model.train()
        train_bar = tqdm(train_dataloader)
        for input_ids, token_type_ids, attention_mask, label_id in train_bar:
            model.zero_grad()
            train_bar.set_description('Epoch %i train' % epoch)

            output = model(
                input_ids=input_ids.to(device),
                attention_mask=attention_mask.to(device),
                token_type_ids=token_type_ids.to(device),
            )

            loss = criterion(output, label_id.to(device))
            losses += loss.item()

            pred_labels = torch.argmax(output, dim=1)  # 预测出的label
            acc = torch.sum(pred_labels == label_id.to(device)).item() / len(
                pred_labels)  #acc
            accuracy += acc

            loss.backward()
            optimizer.step()
            train_bar.set_postfix(loss=loss.item(), acc=acc)

        average_loss = losses / len(train_dataloader)
        average_acc = accuracy / len(train_dataloader)

        print('\tTrain ACC:', average_acc, '\tLoss:', average_loss)

        # 验证
        model.eval()
        losses = 0  # 损失
        accuracy = 0  # 准确率
        valid_bar = tqdm(valid_dataloader)
        for input_ids, token_type_ids, attention_mask, label_id in valid_bar:
            valid_bar.set_description('Epoch %i valid' % epoch)
            output = model(
                input_ids=input_ids.to(device),
                attention_mask=attention_mask.to(device),
                token_type_ids=token_type_ids.to(device),
            )

            loss = criterion(output, label_id.to(device))
            losses += loss.item()

            pred_labels = torch.argmax(output, dim=1)  # 预测出的label
            acc = torch.sum(pred_labels == label_id.to(device)).item() / len(
                pred_labels)  #acc
            accuracy += acc
            valid_bar.set_postfix(loss=loss.item(), acc=acc)

        average_loss = losses / len(valid_dataloader)
        average_acc = accuracy / len(valid_dataloader)

        print('\tValid ACC:', average_acc, '\tLoss:', average_loss)

        if average_acc > best_acc:
            best_acc = average_acc
            torch.save(model.state_dict(), 'models/best_model.pkl')
コード例 #2
0
def train(dataloader,
          head_trans,
          body_trans,
          classifier,
          load_model=False,
          save_model=True,
          num_epochs=2):

    torch.backends.cudnn.benchmark = True
    # device = 'cuda' if torch.cuda.is_available() else 'cpu'
    device = 'cpu'
    print(device)
    load_model = load_model
    save_model = save_model

    learning_rate = 3e-3
    num_epochs = num_epochs

    # For tensorboard
    writer = SummaryWriter('runs/bert')
    step = 0

    # Initialize Model
    model = BertClassifier(head_trans, body_trans, classifier).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    if load_model:
        model, optimizer, step = load_checkpoint(
            torch.load('bert_chkpnt/my_checkpoint.pth.tar'), model, optimizer)
        return model

    for epoch in range(num_epochs):
        if save_model:
            checkpoint = {
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'step': step
            }
            save_checkpoint(checkpoint)

        loop = tqdm(enumerate(dataloader), total=len(dataloader), leave=False)

        for batch, (head, body, stance) in loop:

            outputs = model(head.to(device), body.to(device))
            breakpoint()
            loss = criterion(outputs.float(), stance.to(device).long())

            writer.add_scalar('Training Loss', loss.item(), step)
            step += 1

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Update progress bar
            loop.set_description(f'Epoch [{epoch+1}/{num_epochs}]')
            loop.set_postfix(loss=loss.item())

            running_loss += loss.item()
            running_accuracy += (
                (torch.argmax(outputs, dim=1)
                 == stance.to(device)).sum().item()) / BATCH_SIZE
            if (batch + 1) % 10 == 0:
                writer.add_scalar('Running Loss', running_loss / 10,
                                  epoch * len(dataloader) + batch)
                writer.add_scalar('Running Accuracy', running_accuracy / 10,
                                  epoch * len(dataloader) + batch)

                running_loss = 0.0
                running_accuracy = 0

    return model
コード例 #3
0
         for i, batch in enumerate(progress_bar, 1):
             outputs = bert_classifier.train_on_batch(batch)
             postfix = update_metrics(metrics, outputs, batch["labels"])
             progress_bar.set_postfix(postfix)
             if (args.eval_every_n_batches > 0 and i % args.eval_every_n_batches == 0 and
                         len(train_dataloader) - i >= args.eval_every_n_batches // 2) or\
                     i == len(train_dataloader):
                 dev_metrics = initialize_metrics()
                 dev_progress_bar = tqdm.tqdm(dev_dataloader)
                 for j, batch in enumerate(dev_progress_bar):
                     outputs = bert_classifier.validate_on_batch(batch)
                     postfix = update_metrics(dev_metrics, outputs, batch["labels"])
                     dev_progress_bar.set_postfix(postfix)
                 if dev_metrics["accuracy"] > best_score:
                     best_score = dev_metrics["accuracy"]
                     best_weights = copy.deepcopy(bert_classifier.state_dict())
     bert_classifier.load_state_dict(best_weights)
 ## загружаем наилучшее состояние
 bert_classifier.eval()
 if args.save_file is not None:
     torch.save(best_weights, args.save_file)
 probs, labels = [None] * len(dev_data), [None] * len(dev_data)
 dev_dataloader = make_dataloader(dev_dataset, batch_size=args.dev_batch_size, shuffle=False)
 dev_progress_bar = tqdm.tqdm(dev_dataloader)
 for i, batch in enumerate(dev_progress_bar):
     outputs = bert_classifier.predict_on_batch(batch)
     for index, prob, label in zip(batch["index"], outputs["probs"], outputs["labels"]):
         probs[index], labels[index] = prob, label
 corr_labels = [int(elem[args.answer_field]==args.pos_label) for elem in dev_data]
 accuracy = accuracy_score(corr_labels, labels)
 metrics = precision_recall_fscore_support(corr_labels, labels)
コード例 #4
0
def main():
    device = torch.device('cuda:3')
    # 获取到dataset
    print('加载训练数据')
    train_data = load_data('dataset/train.csv')
    print('加载验证数据')
    valid_data = load_data('dataset/test.csv')
    # test_data = load_data('cnews/cnews.test.txt')

    batch_size = 16

    # 生成Batch
    print('生成batch')
    train_dataloader = DataLoader(train_data,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=3)
    valid_dataloader = DataLoader(valid_data,
                                  batch_size=batch_size,
                                  shuffle=False,
                                  num_workers=3)
    # test_dataloader = DataLoader(valid_data, batch_size=batch_size, shuffle=False)

    # 读取BERT的配置文件
    bert_config = BertConfig.from_pretrained('./chinese_wwm_pytorch')
    bert_config.num_labels = num_labels
    print(bert_config)

    # 初始化模型
    model = BertClassifier(bert_config)
    # model.to(device)

    # 参数设置
    EPOCHS = 20
    learning_rate = 5e-6  # Learning Rate不宜太大
    optimizer = AdamW(model.parameters(), lr=learning_rate)
    # 损失函数采用交叉熵
    criterion = nn.CrossEntropyLoss()

    with open('output.txt', 'w') as wf:
        wf.write('Batch Size: ' + str(batch_size) + '\tLearning Rate: ' +
                 str(learning_rate) + '\n')

    best_acc = 0
    # 设置并行训练,模型默认是把参数放在device[0]对应的gpu编号的gpu上,所以这里应该和上面设置的cuda:2对应
    net = torch.nn.DataParallel(model, device_ids=[3, 4])
    net.to(device)
    # model.module.avgpool = nn.AdaptiveAvgPool2d(7)
    # 开始训练
    for Epoch in range(1, EPOCHS + 1):
        losses = 0  # 损失
        accuracy = 0  # 准确率
        print('Epoch:', Epoch)

        model.train()
        for batch_index, batch in enumerate(train_dataloader):
            # print(batch_index)
            # print(batch)
            input_ids = batch[0].to(device)
            attention_mask = batch[1].to(device)
            token_type_ids = batch[2].to(device)
            label_ids = batch[3].to(device)
            # 将三个输入喂到模型中
            output = net(  # forward
                input_ids=input_ids,
                attention_mask=attention_mask,
                token_type_ids=token_type_ids,
            )

            loss = criterion(output, label_ids)
            losses += loss.item()

            pred_labels = torch.argmax(output, dim=1)  # 预测出的label
            acc = torch.sum(pred_labels == label_ids.to(device)).item() / len(
                pred_labels)  # acc
            accuracy += acc
            # 打印训练过程中的准确率以及loss
            # print('Epoch: %d | Train: | Batch: %d / %d | Acc: %f | Loss: %f' % (Epoch, batch_index + 1, len(train_dataloader), acc, loss.item()))
            # 模型梯度置零,损失函数反向传播,优化更新
            model.zero_grad()
            loss.backward()
            optimizer.step()
            # torch.cuda.empty_cache()

        average_loss = losses / len(train_dataloader)
        average_acc = accuracy / len(train_dataloader)
        # 打印该epoch训练结果的
        print('\tTrain ACC:', average_acc, '\tLoss:', average_loss)
        # with open('output.txt', 'a') as rf:
        #     output_to_file = '\nEpoch: ' + str(Epoch) + '\tTrain ACC:' + str(average_acc) + '\tLoss: ' + str(
        #         average_loss)
        #     rf.write(output_to_file)

        # 验证
        model.eval()
        losses = 0  # 损失
        accuracy = 0  # 准确率
        # 在验证集上进行验证
        for batch_index, batch in enumerate(valid_dataloader):
            input_ids = batch[0].to(device)
            attention_mask = batch[1].to(device)
            token_type_ids = batch[2].to(device)
            label_ids = batch[3].to(device)
            with torch.no_grad():
                output = model(  # forward
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    token_type_ids=token_type_ids,
                )
            loss = criterion(output, label_ids)
            losses += loss.item()
            # 这里的两部操作都是直接对生成的结果张量进行操作
            pred_labels = torch.argmax(output, dim=1)  # 预测出的label
            acc = torch.sum(pred_labels == label_ids.to(device)).item() / len(
                pred_labels)  # acc
            accuracy += acc

        average_loss = losses / len(valid_dataloader)
        average_acc = accuracy / len(valid_dataloader)

        print('\tValid ACC:', average_acc, '\tLoss:', average_loss)
        # with open('output.txt', 'a') as rf:
        #     output_to_file = '\nEpoch: ' + str(Epoch) + '\tValid ACC:' + str(average_acc) + '\tLoss: ' + str(
        #         average_loss) + '\n'
        #     rf.write(output_to_file)

        if average_acc > best_acc:
            best_acc = average_acc
            torch.save(model.state_dict(), 'best_model_on_trainset.pkl')