Esempio n. 1
0
def train():
    device = Config.device
    # 准备数据
    train_data, dev_data = build_dataset(Config)
    train_iter = DatasetIterater(train_data, Config)
    dev_iter = DatasetIterater(dev_data, Config)

    model = Model().to(device)

    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}]

    # optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
    # 这里我们用bertAdam优化器

    optimizer = BertAdam(optimizer_grouped_parameters,
                         lr=Config.learning_rate,
                         warmup=0.05,
                         t_total=len(train_iter) * Config.num_epochs)

    model.to(device)
    model.train()

    best_loss = 100000.0
    for epoch in range(Config.num_epochs):
        print('Epoch [{}/{}]'.format(epoch + 1, Config.num_epochs))
        for step, batch in enumerate(train_iter):
            input_ids, input_mask, start_positions, end_positions = \
                batch[0], batch[1], batch[2], batch[3]
            input_ids, input_mask, start_positions, end_positions = \
                input_ids.to(device), input_mask.to(device), start_positions.to(device), end_positions.to(device)

            loss, _, _ = model(input_ids, attention_mask=input_mask,
                               start_positions=start_positions, end_positions=end_positions)

            loss.backward()
            optimizer.step()
            print('epoch:{}, step:{}, loss:{}')
            train_loss.append(loss)

            if step % 100 == 0:
                eval_loss = evaluate(model, dev_iter)
                if eval_loss < best_loss:
                    best_loss = eval_loss
                    torch.save(model.state_dict(), './save_model/'+'best_model')
                    model.train()
def train():
    device = Config.device
    # 准备数据
    train_data, dev_data = build_dataset(Config)
    train_iter = DatasetIterater(train_data, Config)
    dev_iter = DatasetIterater(dev_data, Config)

    model = Model().to(device)

    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]

    # optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
    # 这里我们用bertAdam优化器
    optimizer = AdamW(
        optimizer_grouped_parameters,
        lr=Config.learning_rate,
        correct_bias=False)  # 要重现BertAdam特定的行为,请设置correct_bias = False
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=0.05,
        num_training_steps=len(train_iter) *
        Config.num_epochs)  # PyTorch调度程序用法如下:

    model.to(device)
    model.train()

    best_loss = 100000.0
    for epoch in range(Config.num_epochs):
        print('Epoch [{}/{}]'.format(epoch + 1, Config.num_epochs))
        for step, batch in enumerate(train_iter):
            start_time = time.time()
            ids, input_ids, input_mask, start_positions, end_positions = \
                batch[0], batch[1], batch[2], batch[3], batch[4]
            input_ids, input_mask, start_positions, end_positions = \
                input_ids.to(device), input_mask.to(device), start_positions.to(device), end_positions.to(device)

            # print(input_ids.size())
            # print(input_mask.size())
            # print(start_positions.size())
            # print(end_positions.size())

            loss, _, _ = model(input_ids,
                               attention_mask=input_mask,
                               start_positions=start_positions,
                               end_positions=end_positions)

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           max_grad_norm=20)
            optimizer.step()
            scheduler.step()

            time_str = datetime.datetime.now().isoformat()
            log_str = 'time:{}, epoch:{}, step:{}, loss:{:8f}, spend_time:{:6f}'.format(
                time_str, epoch, step, loss,
                time.time() - start_time)
            rainbow(log_str)

            train_loss.append(loss)

        if epoch % 1 == 0:
            eval_loss = valid(model, dev_iter)
            if eval_loss < best_loss:
                best_loss = eval_loss
                torch.save(model.state_dict(),
                           './save_model/' + 'best_model.bin')
                model.train()