def main(args): train_loader, test_loader = load_data(args) if not os.path.isdir('checkpoints'): os.mkdir('checkpoints') args.vocab_len = len(args.vocab['stoi'].keys()) model = BERT(args.vocab_len, args.max_len, args.heads, args.embedding_dim, args.N) if args.cuda: model = model.cuda() if args.task: print('Start Down Stream Task') args.epochs = 3 args.lr = 3e-5 state_dict = torch.load(args.checkpoints) model.load_state_dict(state_dict['model_state_dict']) criterion = {'mlm': None, 'nsp': nn.CrossEntropyLoss()} optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(1, args.epochs + 1): train_mlm_loss, train_nsp_loss, train_loss, train_mlm_acc, train_nsp_acc = _train( epoch, train_loader, model, optimizer, criterion, args) test_mlm_loss, test_nsp_loss, test_loss, test_mlm_acc, test_nsp_acc = _eval( epoch, test_loader, model, criterion, args) save_checkpoint(model, optimizer, args, epoch) else: print('Start Pre-training') criterion = { 'mlm': nn.CrossEntropyLoss(ignore_index=0), 'nsp': nn.CrossEntropyLoss() } optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(1, args.epochs): train_mlm_loss, train_nsp_loss, train_loss, train_mlm_acc, train_nsp_acc = _train( epoch, train_loader, model, optimizer, criterion, args) test_mlm_loss, test_nsp_loss, test_loss, test_mlm_acc, test_nsp_acc = _eval( epoch, test_loader, model, criterion, args) save_checkpoint(model, optimizer, args, epoch)
def run_training_bert(args, dataset, train_loader, val_loader, vocab_size): checkpoint_path = os.path.join(args.checkpoint_path, args.checkpoint) device = torch.device("cuda:" + args.device if torch.cuda.is_available() else "cpu") model = BERT().to(device) # Initialize BCELoss function # criterion = nn.BCEWithLogitsLoss() # Setup Adam optimizers for both G and D optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-5) model.train() # turn on training mode # Training Loop print("Starting Training Loop...") # For each epoch for epoch in range(args.epochs): # For each batch in the dataloader losses = [] running_corrects = 0 for i, batch in enumerate(train_loader): # format batch text, context, label = batch.text, batch.context, batch.label # print(text.tolist()[0]) # print(label.tolist()[0]) label = label.type(torch.LongTensor).to(device) text = text.type(torch.LongTensor).to(device) output = model(text, label) loss, _ = output optimizer.zero_grad() loss.backward() optimizer.step() losses.append(loss.item()) epoch_loss = sum(losses) / len(losses) print('Epoch: {}, Training Loss: {:.4f}'.format(epoch, epoch_loss)) # save model if epoch % 1 == 0 or epoch == args.epochs - 1: torch.save( { 'epoch': epoch + 1, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'vocab_size': vocab_size, 'args': vars(args) }, checkpoint_path) if args.eval: model.eval() with torch.no_grad(): preds = [] labels = [] eval_losses = [] for i, batch in enumerate(val_loader if val_loader is not None else train_loader): text, context, label = batch.text, batch.context, batch.label label = label.type(torch.LongTensor).to(device) text = text.type(torch.LongTensor).to(device) output = model(text, label) loss, output = output pred = torch.argmax(output, 1).tolist() preds.extend(pred) labels.extend(label.tolist()) eval_losses.append(loss.item()) print("{} Precision: {}, Recall: {}, F1: {}, Loss: {}". format( "Train" if val_loader is None else "Valid", sklearn.metrics.precision_score( np.array(labels).astype('int32'), np.array(preds)), sklearn.metrics.recall_score( np.array(labels).astype('int32'), np.array(preds)), sklearn.metrics.f1_score( np.array(labels).astype('int32'), np.array(preds)), np.average(eval_losses)))
# 建立词表 for i, w in enumerate(word_list): word_dict[w] = i + 4 number_dict = {i: w for i, w in enumerate(word_dict)} vocab_size = len(word_dict) # 将句子转为对应的id序列 token_list = list() for sentence in sentences: arr = [word_dict[s] for s in sentence.split()] token_list.append(arr) model = BERT() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) batch = make_batch() input_ids, segment_ids, masked_tokens, masked_pos, isNext = map( torch.LongTensor, zip(*batch)) for epoch in range(100): optimizer.zero_grad() logits_lm, logits_clsf = model(input_ids, segment_ids, masked_pos) loss_lm = criterion(logits_lm.transpose(1, 2), masked_tokens) # for masked LM loss_lm = (loss_lm.float()).mean() loss_clsf = criterion(logits_clsf, isNext) # for sentence classification loss = loss_lm + loss_clsf if (epoch + 1) % 10 == 0:
return total_correct/total if __name__ == '__main__': mnli = BERTMNLI(TRAIN_DATA_DIR, bert_type=BERT_TYPE) match = BERTMNLI(MATCH_DATA_DIR, bert_type=BERT_TYPE) mismatch = BERTMNLI(MISMATCH_DATA_DIR, bert_type=BERT_TYPE) checkpoint = torch.load('storage/bert-base-dnli.pt') model = BERT(bert_type=BERT_TYPE) model.load_state_dict(checkpoint['model_state_dict']) model.to(device) ### optimizer = Adam(model.parameters(), lr = LEARNING_RATE) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) criterion = nn.CrossEntropyLoss() best_acc = 0 for epoch in range(1, NUM_EPOCHS+1): train_loss = train(mnli, model, criterion, optimizer, device) match_acc = eval(match, model, device) mismatch_acc= eval(mismatch, model, device) # print(f'Epoch {epoch}') print(f'Epoch {epoch}, Train Loss: {train_loss}, Match Acc: {match_acc}, Mismatch Acc:{mismatch_acc}') if match_acc+mismatch_acc > best_acc: best_acc = match_acc+mismatch_acc torch.save({
else: device = torch.device("cpu") tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) bert_model = BertModel.from_pretrained("bert-base-uncased") # tokenizer = AlbertTokenizer.from_pretrained('albert-base-v1', do_lower_case=True) # bert_model = AlbertModel.from_pretrained("albert-base-v1") model = BERT(2, bert_model) model = model.to(device) train_dataloader, validation_dataloader, test_dataloader = get_baseline_dataloader( args.data_file, args.batch_size, tokenizer) optimizer = AdamW(model.parameters(), lr=args.lr) total_steps = len(train_dataloader) * args.epochs if new_version: scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=int(0.1 * total_steps), #warmup_steps = 0, # Default value in run_glue.py num_training_steps=total_steps) #t_total = total_steps) else: scheduler = get_linear_schedule_with_warmup( optimizer, # num_warmup_steps = 0, warmup_steps=int(0.1 * total_steps), # Default value in run_glue.py # num_training_steps = total_steps) t_total=total_steps)