예제 #1
0
def main(args):
    train_loader, test_loader = load_data(args)

    if not os.path.isdir('checkpoints'):
        os.mkdir('checkpoints')

    args.vocab_len = len(args.vocab['stoi'].keys())

    model = BERT(args.vocab_len, args.max_len, args.heads, args.embedding_dim,
                 args.N)
    if args.cuda:
        model = model.cuda()

    if args.task:
        print('Start Down Stream Task')
        args.epochs = 3
        args.lr = 3e-5

        state_dict = torch.load(args.checkpoints)
        model.load_state_dict(state_dict['model_state_dict'])

        criterion = {'mlm': None, 'nsp': nn.CrossEntropyLoss()}

        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)

        for epoch in range(1, args.epochs + 1):
            train_mlm_loss, train_nsp_loss, train_loss, train_mlm_acc, train_nsp_acc = _train(
                epoch, train_loader, model, optimizer, criterion, args)
            test_mlm_loss, test_nsp_loss, test_loss, test_mlm_acc, test_nsp_acc = _eval(
                epoch, test_loader, model, criterion, args)
            save_checkpoint(model, optimizer, args, epoch)
    else:
        print('Start Pre-training')
        criterion = {
            'mlm': nn.CrossEntropyLoss(ignore_index=0),
            'nsp': nn.CrossEntropyLoss()
        }
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)

        for epoch in range(1, args.epochs):
            train_mlm_loss, train_nsp_loss, train_loss, train_mlm_acc, train_nsp_acc = _train(
                epoch, train_loader, model, optimizer, criterion, args)
            test_mlm_loss, test_nsp_loss, test_loss, test_mlm_acc, test_nsp_acc = _eval(
                epoch, test_loader, model, criterion, args)
            save_checkpoint(model, optimizer, args, epoch)
예제 #2
0
def run_training_bert(args, dataset, train_loader, val_loader, vocab_size):
    checkpoint_path = os.path.join(args.checkpoint_path, args.checkpoint)
    device = torch.device("cuda:" +
                          args.device if torch.cuda.is_available() else "cpu")

    model = BERT().to(device)

    # Initialize BCELoss function
    # criterion = nn.BCEWithLogitsLoss()
    # Setup Adam optimizers for both G and D
    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-5)
    model.train()  # turn on training mode
    # Training Loop
    print("Starting Training Loop...")
    # For each epoch
    for epoch in range(args.epochs):
        # For each batch in the dataloader
        losses = []
        running_corrects = 0
        for i, batch in enumerate(train_loader):
            # format batch
            text, context, label = batch.text, batch.context, batch.label
            # print(text.tolist()[0])
            # print(label.tolist()[0])
            label = label.type(torch.LongTensor).to(device)
            text = text.type(torch.LongTensor).to(device)

            output = model(text, label)
            loss, _ = output

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            losses.append(loss.item())
        epoch_loss = sum(losses) / len(losses)
        print('Epoch: {}, Training Loss: {:.4f}'.format(epoch, epoch_loss))
        # save model
        if epoch % 1 == 0 or epoch == args.epochs - 1:
            torch.save(
                {
                    'epoch': epoch + 1,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'vocab_size': vocab_size,
                    'args': vars(args)
                }, checkpoint_path)
            if args.eval:
                model.eval()
                with torch.no_grad():
                    preds = []
                    labels = []
                    eval_losses = []
                    for i, batch in enumerate(val_loader if val_loader
                                              is not None else train_loader):
                        text, context, label = batch.text, batch.context, batch.label
                        label = label.type(torch.LongTensor).to(device)
                        text = text.type(torch.LongTensor).to(device)
                        output = model(text, label)
                        loss, output = output
                        pred = torch.argmax(output, 1).tolist()
                        preds.extend(pred)
                        labels.extend(label.tolist())
                        eval_losses.append(loss.item())
                    print("{} Precision: {}, Recall: {}, F1: {}, Loss: {}".
                          format(
                              "Train" if val_loader is None else "Valid",
                              sklearn.metrics.precision_score(
                                  np.array(labels).astype('int32'),
                                  np.array(preds)),
                              sklearn.metrics.recall_score(
                                  np.array(labels).astype('int32'),
                                  np.array(preds)),
                              sklearn.metrics.f1_score(
                                  np.array(labels).astype('int32'),
                                  np.array(preds)), np.average(eval_losses)))
예제 #3
0
    # 建立词表
    for i, w in enumerate(word_list):
        word_dict[w] = i + 4
    number_dict = {i: w for i, w in enumerate(word_dict)}
    vocab_size = len(word_dict)

    # 将句子转为对应的id序列
    token_list = list()
    for sentence in sentences:
        arr = [word_dict[s] for s in sentence.split()]
        token_list.append(arr)

    model = BERT()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    batch = make_batch()
    input_ids, segment_ids, masked_tokens, masked_pos, isNext = map(
        torch.LongTensor, zip(*batch))

    for epoch in range(100):
        optimizer.zero_grad()
        logits_lm, logits_clsf = model(input_ids, segment_ids, masked_pos)
        loss_lm = criterion(logits_lm.transpose(1, 2),
                            masked_tokens)  # for masked LM
        loss_lm = (loss_lm.float()).mean()
        loss_clsf = criterion(logits_clsf,
                              isNext)  # for sentence classification
        loss = loss_lm + loss_clsf
        if (epoch + 1) % 10 == 0:
예제 #4
0
    return total_correct/total


if __name__ == '__main__':
    mnli = BERTMNLI(TRAIN_DATA_DIR, bert_type=BERT_TYPE)
    match = BERTMNLI(MATCH_DATA_DIR, bert_type=BERT_TYPE)
    mismatch = BERTMNLI(MISMATCH_DATA_DIR, bert_type=BERT_TYPE)

    checkpoint = torch.load('storage/bert-base-dnli.pt')
    model = BERT(bert_type=BERT_TYPE)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.to(device)
    ###

    optimizer = Adam(model.parameters(), lr = LEARNING_RATE)
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    criterion = nn.CrossEntropyLoss()

    best_acc = 0

    for epoch in range(1, NUM_EPOCHS+1):
        train_loss = train(mnli, model, criterion, optimizer, device)
        match_acc = eval(match, model, device)
        mismatch_acc= eval(mismatch, model, device)
       # print(f'Epoch {epoch}')
        print(f'Epoch {epoch}, Train Loss: {train_loss}, Match Acc: {match_acc}, Mismatch Acc:{mismatch_acc}')
        if match_acc+mismatch_acc > best_acc:
            best_acc = match_acc+mismatch_acc
            torch.save({
예제 #5
0
else:
    device = torch.device("cpu")

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                          do_lower_case=True)
bert_model = BertModel.from_pretrained("bert-base-uncased")
# tokenizer = AlbertTokenizer.from_pretrained('albert-base-v1', do_lower_case=True)
# bert_model = AlbertModel.from_pretrained("albert-base-v1")

model = BERT(2, bert_model)
model = model.to(device)

train_dataloader, validation_dataloader, test_dataloader = get_baseline_dataloader(
    args.data_file, args.batch_size, tokenizer)

optimizer = AdamW(model.parameters(), lr=args.lr)
total_steps = len(train_dataloader) * args.epochs
if new_version:
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=int(0.1 * total_steps),
        #warmup_steps = 0, # Default value in run_glue.py
        num_training_steps=total_steps)
    #t_total = total_steps)
else:
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        # num_warmup_steps = 0,
        warmup_steps=int(0.1 * total_steps),  # Default value in run_glue.py
        # num_training_steps = total_steps)
        t_total=total_steps)