def main(paras):

    logger = logging.getLogger(__name__)
    if paras.save_log_file:
        logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                            datefmt='%m/%d/%Y %H:%M:%S',
                            level=paras.logging_level,
                            filename=f'{paras.log_save_path}/{paras.train_log_file}',
                            filemode='w')
    else:
        logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                            datefmt='%m/%d/%Y %H:%M:%S',
                            level=paras.logging_level, )

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    logger.info(f'Loading model: {paras.model_name}')
    tokenizer = BertTokenizer.from_pretrained(paras.model_name)
    bert = BertModel.from_pretrained(paras.model_name)


    train_dataset = RE_Dataset(paras, 'train')
    train_dataloaer = DataLoader(train_dataset, batch_size=paras.batch_size,
                                 shuffle=paras.shuffle, drop_last=paras.drop_last)
    label_to_index = train_dataset.label_to_index
    special_token_list = list(train_dataset.special_token_set)
    # fixme: add special token to tokenizer
    special_tokens_dict = {'additional_special_tokens': special_token_list}
    tokenizer.add_special_tokens(special_tokens_dict)
    # bert.resize_token_embeddings(len(tokenizer))

    test_dataset = RE_Dataset(paras, 'test')
    test_dataloader = DataLoader(test_dataset, batch_size=paras.batch_size,
                                 shuffle=paras.shuffle, drop_last=paras.drop_last)

    bert_classifier = BertClassifier(bert, paras.hidden_size, paras.label_number,
                                     paras.dropout_prob)

    if paras.optimizer == 'adam':
        logger.info('Loading Adam optimizer.')
        optimizer = torch.optim.Adam(bert_classifier.parameters(), lr=paras.learning_rate)
    elif paras.optimizer == 'adamw':
        logger.info('Loading AdamW optimizer.')
        no_decay = [ 'bias', 'LayerNorm.weight' ]
        optimizer_grouped_parameters = [
            {'params': [ p for n, p in bert_classifier.named_parameters() if not any(nd in n for nd in no_decay) ],
             'weight_decay': 0.01},
            {'params': [ p for n, p in bert_classifier.named_parameters() if any(nd in n for nd in no_decay) ],
             'weight_decay': 0.0}
        ]
        optimizer = AdamW(optimizer_grouped_parameters, lr=paras.learning_rate,
                          eps=args.adam_epsilon)
    else:
        logger.warning(f'optimizer must be "Adam" or "AdamW", but got {paras.optimizer}.')
        logger.info('Loading Adam optimizer.')
        optimizer = torch.optim.Adam(bert_classifier.parameters(),
                                     lr=paras.learning_rate)


    logger.info('Training Start.')
    best_eval = {'acc': 0, 'precision': 0, 'recall': 0, 'f1': 0, 'loss': 0}
    for epoch in range(paras.num_train_epochs):
        epoch_loss = 0
        bert_classifier.train()
        for step, batch in enumerate(train_dataloaer):
            optimizer.zero_grad()

            batch_data, batch_label = batch

            encoded_data = tokenizer(batch_data,
                                     padding=True,
                                     truncation=True,
                                     return_tensors='pt',
                                     max_length=paras.max_sequence_length)

            label_tensor = batch_label_to_idx(batch_label, label_to_index)

            loss = bert_classifier(encoded_data, label_tensor)

            epoch_loss += loss_to_int(loss)

            logging.info(f'epoch: {epoch}, step: {step}, loss: {loss:.4f}')

            # fixme: del
            # acc, precision, recall, f1 = evaluation(bert_classifier, tokenizer, test_dataloader,
            #                                         paras.max_sequence_length, label_to_index)
            # logger.info(f'Accuracy: {acc:.4f}, Precision: {precision:.4f}, '
            #             f'Recall: {recall:.4f}, F1-score: {f1:.4f}')

            loss.backward()
            optimizer.step()

        epoch_loss = epoch_loss / len(train_dataloaer)

        acc, precision, recall, f1 = evaluation(bert_classifier, tokenizer, test_dataloader,
                                                paras.max_sequence_length, label_to_index)

        logging.info(f'Epoch: {epoch}, Epoch-Average Loss: {epoch_loss:.4f}')
        logger.info(f'Accuracy: {acc:.4f}, Precision: {precision:.4f}, '
                    f'Recall: {recall:.4f}, F1-score: {f1:.4f}')

        if best_eval['loss'] == 0 or f1 > best_eval['f1']:
            best_eval['loss'] = epoch_loss
            best_eval['acc'] = acc
            best_eval['precision'] = precision
            best_eval['recall'] = recall
            best_eval['f1'] = f1
            torch.save(bert_classifier, f'{paras.log_save_path}/{paras.model_save_name}')

            with open(f'{paras.log_save_path}/{paras.checkpoint_file}', 'w') as wf:
                wf.write(f'Save time: {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}\n')
                wf.write(f'Best F1-score: {best_eval["f1"]:.4f}\n')
                wf.write(f'Precision: {best_eval["precision"]:.4f}\n')
                wf.write(f'Recall: {best_eval["recall"]:.4f}\n')
                wf.write(f'Accuracy: {best_eval["acc"]:.4f}\n')
                wf.write(f'Epoch-Average Loss: {best_eval["loss"]:.4f}\n')

            logger.info(f'Updated model, best F1-score: {best_eval["f1"]:.4f}\n')

    logger.info(f'Train complete, Best F1-score: {best_eval["f1"]:.4f}.')
Example #2
0
def main():

    # 参数设置
    batch_size = 4
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    epochs = 10
    learning_rate = 5e-6  #Learning Rate不宜太大

    # 获取到dataset
    train_dataset = CNewsDataset('data/cnews/cnews.train.txt')
    valid_dataset = CNewsDataset('data/cnews/cnews.val.txt')
    #test_data = load_data('cnews/cnews.test.txt')

    # 生成Batch
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  shuffle=True)
    valid_dataloader = DataLoader(valid_dataset,
                                  batch_size=batch_size,
                                  shuffle=False)
    #test_dataloader = DataLoader(valid_data, batch_size=batch_size, shuffle=False)

    # 读取BERT的配置文件
    bert_config = BertConfig.from_pretrained('bert-base-chinese')
    num_labels = len(train_dataset.labels)

    # 初始化模型
    model = BertClassifier(bert_config, num_labels).to(device)

    optimizer = AdamW(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    best_acc = 0

    for epoch in range(1, epochs + 1):
        losses = 0  # 损失
        accuracy = 0  # 准确率

        model.train()
        train_bar = tqdm(train_dataloader)
        for input_ids, token_type_ids, attention_mask, label_id in train_bar:
            model.zero_grad()
            train_bar.set_description('Epoch %i train' % epoch)

            output = model(
                input_ids=input_ids.to(device),
                attention_mask=attention_mask.to(device),
                token_type_ids=token_type_ids.to(device),
            )

            loss = criterion(output, label_id.to(device))
            losses += loss.item()

            pred_labels = torch.argmax(output, dim=1)  # 预测出的label
            acc = torch.sum(pred_labels == label_id.to(device)).item() / len(
                pred_labels)  #acc
            accuracy += acc

            loss.backward()
            optimizer.step()
            train_bar.set_postfix(loss=loss.item(), acc=acc)

        average_loss = losses / len(train_dataloader)
        average_acc = accuracy / len(train_dataloader)

        print('\tTrain ACC:', average_acc, '\tLoss:', average_loss)

        # 验证
        model.eval()
        losses = 0  # 损失
        accuracy = 0  # 准确率
        valid_bar = tqdm(valid_dataloader)
        for input_ids, token_type_ids, attention_mask, label_id in valid_bar:
            valid_bar.set_description('Epoch %i valid' % epoch)
            output = model(
                input_ids=input_ids.to(device),
                attention_mask=attention_mask.to(device),
                token_type_ids=token_type_ids.to(device),
            )

            loss = criterion(output, label_id.to(device))
            losses += loss.item()

            pred_labels = torch.argmax(output, dim=1)  # 预测出的label
            acc = torch.sum(pred_labels == label_id.to(device)).item() / len(
                pred_labels)  #acc
            accuracy += acc
            valid_bar.set_postfix(loss=loss.item(), acc=acc)

        average_loss = losses / len(valid_dataloader)
        average_acc = accuracy / len(valid_dataloader)

        print('\tValid ACC:', average_acc, '\tLoss:', average_loss)

        if average_acc > best_acc:
            best_acc = average_acc
            torch.save(model.state_dict(), 'models/best_model.pkl')
Example #3
0
def main():
    device = torch.device('cuda:3')
    # 获取到dataset
    print('加载训练数据')
    train_data = load_data('dataset/train.csv')
    print('加载验证数据')
    valid_data = load_data('dataset/test.csv')
    # test_data = load_data('cnews/cnews.test.txt')

    batch_size = 16

    # 生成Batch
    print('生成batch')
    train_dataloader = DataLoader(train_data,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=3)
    valid_dataloader = DataLoader(valid_data,
                                  batch_size=batch_size,
                                  shuffle=False,
                                  num_workers=3)
    # test_dataloader = DataLoader(valid_data, batch_size=batch_size, shuffle=False)

    # 读取BERT的配置文件
    bert_config = BertConfig.from_pretrained('./chinese_wwm_pytorch')
    bert_config.num_labels = num_labels
    print(bert_config)

    # 初始化模型
    model = BertClassifier(bert_config)
    # model.to(device)

    # 参数设置
    EPOCHS = 20
    learning_rate = 5e-6  # Learning Rate不宜太大
    optimizer = AdamW(model.parameters(), lr=learning_rate)
    # 损失函数采用交叉熵
    criterion = nn.CrossEntropyLoss()

    with open('output.txt', 'w') as wf:
        wf.write('Batch Size: ' + str(batch_size) + '\tLearning Rate: ' +
                 str(learning_rate) + '\n')

    best_acc = 0
    # 设置并行训练,模型默认是把参数放在device[0]对应的gpu编号的gpu上,所以这里应该和上面设置的cuda:2对应
    net = torch.nn.DataParallel(model, device_ids=[3, 4])
    net.to(device)
    # model.module.avgpool = nn.AdaptiveAvgPool2d(7)
    # 开始训练
    for Epoch in range(1, EPOCHS + 1):
        losses = 0  # 损失
        accuracy = 0  # 准确率
        print('Epoch:', Epoch)

        model.train()
        for batch_index, batch in enumerate(train_dataloader):
            # print(batch_index)
            # print(batch)
            input_ids = batch[0].to(device)
            attention_mask = batch[1].to(device)
            token_type_ids = batch[2].to(device)
            label_ids = batch[3].to(device)
            # 将三个输入喂到模型中
            output = net(  # forward
                input_ids=input_ids,
                attention_mask=attention_mask,
                token_type_ids=token_type_ids,
            )

            loss = criterion(output, label_ids)
            losses += loss.item()

            pred_labels = torch.argmax(output, dim=1)  # 预测出的label
            acc = torch.sum(pred_labels == label_ids.to(device)).item() / len(
                pred_labels)  # acc
            accuracy += acc
            # 打印训练过程中的准确率以及loss
            # print('Epoch: %d | Train: | Batch: %d / %d | Acc: %f | Loss: %f' % (Epoch, batch_index + 1, len(train_dataloader), acc, loss.item()))
            # 模型梯度置零,损失函数反向传播,优化更新
            model.zero_grad()
            loss.backward()
            optimizer.step()
            # torch.cuda.empty_cache()

        average_loss = losses / len(train_dataloader)
        average_acc = accuracy / len(train_dataloader)
        # 打印该epoch训练结果的
        print('\tTrain ACC:', average_acc, '\tLoss:', average_loss)
        # with open('output.txt', 'a') as rf:
        #     output_to_file = '\nEpoch: ' + str(Epoch) + '\tTrain ACC:' + str(average_acc) + '\tLoss: ' + str(
        #         average_loss)
        #     rf.write(output_to_file)

        # 验证
        model.eval()
        losses = 0  # 损失
        accuracy = 0  # 准确率
        # 在验证集上进行验证
        for batch_index, batch in enumerate(valid_dataloader):
            input_ids = batch[0].to(device)
            attention_mask = batch[1].to(device)
            token_type_ids = batch[2].to(device)
            label_ids = batch[3].to(device)
            with torch.no_grad():
                output = model(  # forward
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    token_type_ids=token_type_ids,
                )
            loss = criterion(output, label_ids)
            losses += loss.item()
            # 这里的两部操作都是直接对生成的结果张量进行操作
            pred_labels = torch.argmax(output, dim=1)  # 预测出的label
            acc = torch.sum(pred_labels == label_ids.to(device)).item() / len(
                pred_labels)  # acc
            accuracy += acc

        average_loss = losses / len(valid_dataloader)
        average_acc = accuracy / len(valid_dataloader)

        print('\tValid ACC:', average_acc, '\tLoss:', average_loss)
        # with open('output.txt', 'a') as rf:
        #     output_to_file = '\nEpoch: ' + str(Epoch) + '\tValid ACC:' + str(average_acc) + '\tLoss: ' + str(
        #         average_loss) + '\n'
        #     rf.write(output_to_file)

        if average_acc > best_acc:
            best_acc = average_acc
            torch.save(model.state_dict(), 'best_model_on_trainset.pkl')