예제 #1
0
def train(fold_all):
    config = BertConfig.from_pretrained('bert-base-chinese')

    print('开始训练...')
    for fold_index in range(FOLD):
        # set fold parameter
        BEST_F1 = 0
        BEST_EPOCH = 0
        loss_list = []
        f1_list = []
        flag = 0

        print('正在加载模型...')
        if USE_GPU:
            model = BertForSequenceClassification.from_pretrained('bert-base-chinese', config=config).cuda()
        else:
            model = BertForSequenceClassification.from_pretrained('bert-base-chinese', config=config)
        optimizer = AdamW(model.parameters(), lr=LR, correct_bias=False)
        scheduler = WarmupLinearSchedule(optimizer, warmup_steps = WARMUP_STEPS, t_total = T_TOTAL)
        
        train_list = []
        for _ in range(5):
            if _ != fold_index:
                train_list = train_list + fold_all[_]
        dev_list = fold_all[fold_index]
        train_bert_list = utils.bert_input(train_list)
        dev_bert_list = utils.bert_input(dev_list)
        train_dataset = layers.Train_Dataset(train_bert_list)
        dev_dataset = layers.Train_Dataset(dev_bert_list)
        train_dataloader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        dev_dataloader = DataLoader(dataset=dev_dataset, batch_size=BATCH_SIZE, shuffle=False)
        
        for epoch in range(EPOCH):
            model.train()
            for text, label in train_dataloader:
                # 转text label为tensor
                text = [sub_text.tolist() for sub_text in text]
                label = [int(sub_label) for sub_label in label]
                if USE_GPU:
                    text = torch.tensor(text).t().cuda()
                    label = torch.tensor(label).cuda()
                else:
                    text = torch.tensor(text).t()
                    label = torch.tensor(label)                 
                
                # 输入模型
                outputs = model(text, labels=label)
                loss, logits = outputs[:2]

                # 优化
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                scheduler.step()
                

                # 存储单批次f1 loss
                f1 = utils.batch_f1(logits, label)
                f1_list.append(f1)
                loss_list.append(loss.item())
                flag += 1

                # 输出f1 loss
                if flag % 200 == 0:
                    f1_mean = np.mean(f1_list)
                    loss_mean = np.mean(loss_list)
                    f1_list = []
                    loss_list = []
                    print('fold: {} | epoch: {} | f1: {} | loss: {}'.format(fold_index, epoch, f1_mean, loss_mean))

            # 验证集
            f1_val = val(model, dev_dataloader)

            print('***********************************************************************')
            print('fold: {} | epoch: {} | 验证集F1值: {}'.format(fold_index, epoch, f1_val))      
            if f1_val > BEST_F1:
                BEST_F1 = f1_val
                BEST_EPOCH = epoch
                torch.save(model, str(fold_index) + '折_' + 'best_model.m')
                # torch.cuda.empty_cache()
            print('fold: {} | 验证集最优F1值: {}'.format(fold_index, BEST_F1))
            print('fold: {} | 验证集最优epoch: {}'.format(fold_index, BEST_EPOCH))
            print('***********************************************************************')
예제 #2
0
def train(fold_all):
    # config = BertConfig.from_pretrained('../../model_lib/robert/pytorch/chinese_roberta_wwm_large_ext_pytorch/bert_config.json')
    # config = BertConfig.from_pretrained('../../model_lib/bert/pytorch/xs/bert_config.json')
    config = BertConfig.from_pretrained(
        '../../model_lib/bert/pytorch/bert-base-chinese/bert_config.json')

    print('开始训练...')
    for fold_index in range(FOLD):
        # set fold parameter
        BEST_F1 = 0
        BEST_EPOCH = 0
        loss_list = []
        f1_list = []
        total_loss_list = []
        total_f1_list = []
        flag = 0

        print('正在加载模型...')
        if USE_GPU:
            # model = BertForSequenceClassification.from_pretrained('../../model_lib/robert/pytorch/chinese_roberta_wwm_large_ext_pytorch/', config=config).cuda()
            # model = BertForSequenceClassification.from_pretrained('../../model_lib/bert/pytorch/xs/', config=config).cuda()
            model = BertForSequenceClassification.from_pretrained(
                '../../model_lib/bert/pytorch/bert-base-chinese/',
                config=config).cuda()
        else:
            # model = BertForSequenceClassification.from_pretrained('../../model_lib/robert/pytorch/chinese_roberta_wwm_large_ext_pytorch/', config=config)
            # model = BertForSequenceClassification.from_pretrained('../../model_lib/bert/pytorch/xs/', config=config)
            model = BertForSequenceClassification.from_pretrained(
                '../../model_lib/bert/pytorch/bert-base-chinese/',
                config=config)
        optimizer = AdamW(model.parameters(), lr=LR, correct_bias=False)
        scheduler = WarmupLinearSchedule(optimizer,
                                         warmup_steps=WARMUP_STEPS,
                                         t_total=T_TOTAL)  # 旧版的调用方法
        # scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=WARMUP_STEPS, num_training_steps=-1)

        # 制作交叉验证的数据集
        train_list = []
        for _ in range(3):
            if _ != fold_index:
                train_list = train_list + fold_all[_]
        dev_list = fold_all[fold_index]

        train_bert_list = utils.bert_input(train_list)
        dev_bert_list = utils.bert_input(dev_list)
        train_dataset = layers.Train_Dataset(train_bert_list)
        dev_dataset = layers.Train_Dataset(dev_bert_list)
        train_dataloader = DataLoader(dataset=train_dataset,
                                      batch_size=BATCH_SIZE,
                                      shuffle=True)
        dev_dataloader = DataLoader(dataset=dev_dataset,
                                    batch_size=BATCH_SIZE,
                                    shuffle=False)

        for epoch in range(EPOCH):
            model.train()
            for text, label in train_dataloader:
                # 转text label为tensor
                text = [sub_text.tolist() for sub_text in text]
                label = [int(sub_label) for sub_label in label]
                if USE_GPU:
                    text = torch.tensor(text).t().cuda()
                    label = torch.tensor(label).cuda()
                else:
                    text = torch.tensor(text).t()
                    label = torch.tensor(label)

                # 输入模型
                outputs = model(text, labels=label)
                loss, logits = outputs[:2]

                # 优化
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                scheduler.step()

                # 存储单批次f1 loss
                f1 = utils.batch_f1(logits, label)
                f1_list.append(f1)
                loss_list.append(loss.item())
                flag += 1

                # 输出f1 loss
                if flag % 200 == 0:
                    f1_mean = np.mean(f1_list)
                    loss_mean = np.mean(loss_list)
                    total_f1_list.extend(f1_list)
                    total_loss_list.extend(loss_list)
                    f1_list = []
                    loss_list = []
                    print('fold: {} | epoch: {} | f1: {} | loss: {}'.format(
                        fold_index, epoch, f1_mean, loss_mean))

            # 验证集,每个epoch验证一次
            f1_val = val(model, dev_dataloader)

            print(
                '***********************************************************************'
            )
            print('fold: {} | epoch: {} | 验证集F1值: {}'.format(
                fold_index, epoch, f1_val))
            if f1_val > BEST_F1:
                BEST_F1 = f1_val
                BEST_EPOCH = epoch
                torch.save(
                    model,
                    'bert_base_chinese_epoch5_lr1e5_ml128_bs16_100000_' +
                    str(fold_index) + 'k_' + 'best_model.m')
                # torch.cuda.empty_cache()
            print('fold: {} | 验证集最优F1值: {}'.format(fold_index, BEST_F1))
            print('fold: {} | 验证集最优epoch: {}'.format(fold_index, BEST_EPOCH))
            print(
                '***********************************************************************'
            )

    # 将total_f1_list和total_loss_list保存为文件格式,以便后期根据特定的要求画图
    np.save('data/total_f1.npy', np.array(total_f1_list))
    np.save('data/total_loss.npy', np.array(total_loss_list))
예제 #3
0
def train(fold_all):
    config = BertConfig.from_pretrained(
        '../../model_lib/robert/pytorch/chinese_roberta_wwm_large_ext_pytorch/bert_config.json'
    )
    # config = BertConfig.from_pretrained('../../model_lib/bert/pytorch/xs/bert_config.json')
    # bert-base-chinese
    # config = BertConfig.from_pretrained('../../model_lib/bert/pytorch/xs/bert_config.json')

    print('开始训练...')
    for fold_index in range(FOLD):
        # set fold parameter
        BEST_F1 = 0
        BEST_EPOCH = 0
        loss_list = []
        f1_list = []
        flag = 0

        print('正在加载模型...')
        if USE_GPU:
            model = BertForSequenceClassification.from_pretrained(
                '../../model_lib/robert/pytorch/chinese_roberta_wwm_large_ext_pytorch/',
                config=config).cuda()
            # model = BertForSequenceClassification.from_pretrained('../../model_lib/bert/pytorch/xs/', config=config).cuda()
        else:
            model = BertForSequenceClassification.from_pretrained(
                '../../model_lib/robert/pytorch/chinese_roberta_wwm_large_ext_pytorch/',
                config=config)
            # model = BertForSequenceClassification.from_pretrained('../../model_lib/bert/pytorch/xs/', config=config)
        optimizer = AdamW(model.parameters(), lr=LR, correct_bias=False)
        scheduler = WarmupLinearSchedule(optimizer,
                                         warmup_steps=WARMUP_STEPS,
                                         t_total=T_TOTAL)  # T_TOTAL?

        # 制作交叉验证的数据集
        train_list = []
        for _ in range(FOLD):
            # 真的交叉验证
            # if _ != fold_index:
            #     train_list = train_list + fold_all[_]
            # 用上所有数据进行训练,假的交叉验证
            train_list = train_list + fold_all[_]
        dev_list = fold_all[fold_index]

        train_bert_list = utils.bert_input(train_list)
        dev_bert_list = utils.bert_input(dev_list)
        train_dataset = layers.Train_Dataset(train_bert_list)
        dev_dataset = layers.Train_Dataset(dev_bert_list)
        train_dataloader = DataLoader(dataset=train_dataset,
                                      batch_size=BATCH_SIZE,
                                      shuffle=True)
        dev_dataloader = DataLoader(dataset=dev_dataset,
                                    batch_size=BATCH_SIZE,
                                    shuffle=False)

        for epoch in range(EPOCH):
            start_time = time.time()
            model.train()
            for text, label in train_dataloader:
                # 转text label为tensor
                text = [sub_text.tolist() for sub_text in text]
                label = [int(sub_label) for sub_label in label]
                if USE_GPU:
                    text = torch.tensor(text).t().cuda()  # 为什么要转置?
                    label = torch.tensor(label).cuda()
                else:
                    text = torch.tensor(text).t()
                    label = torch.tensor(label)

                # 输入模型
                outputs = model(text, labels=label)
                loss, logits = outputs[:2]

                # 优化
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                scheduler.step()

                # 存储单批次f1 loss
                f1 = utils.batch_f1(logits, label)
                f1_list.append(f1)
                loss_list.append(loss.item())
                flag += 1

                # 输出f1 loss
                if flag % 200 == 0:
                    f1_mean = np.mean(f1_list)
                    loss_mean = np.mean(loss_list)
                    f1_list = []
                    loss_list = []
                    print('fold: {} | epoch: {} | f1: {} | loss: {}'.format(
                        fold_index, epoch, f1_mean, loss_mean))

            # 验证集,每个epoch验证一次
            f1_val = val(model, dev_dataloader)

            print(
                '***********************************************************************'
            )
            print('fold: {} | epoch: {} | 验证集F1值: {}'.format(
                fold_index, epoch, f1_val))
            if f1_val > BEST_F1:
                BEST_F1 = f1_val
                BEST_EPOCH = epoch
                torch.save(
                    model, 'robert_wwm_large_ext_f5k_epoch3_lr1e5_ml64_bs12_' +
                    str(fold_index) + 'k_' + 'best_model.m')
                # torch.cuda.empty_cache()
            print('fold: {} | 验证集最优F1值: {}'.format(fold_index, BEST_F1))
            print('fold: {} | 验证集最优epoch: {}'.format(fold_index, BEST_EPOCH))
            print(
                '***********************************************************************'
            )
            end_time = time.time()
            print('epoch cost time:', end_time - start_time)