print("Loading Marker Dataset\n") train_dataset = MarkerDataset(fname=fn_train, MD=MD) test_dataset = MarkerDataset(fname=fn_test, MD=MD) epoch = marker_epoch # Trainer Config tconf = TrainerConfig(max_epochs=epoch, batch_size=358, learning_rate=6e-4, lr_decay=True, warmup_tokens=1024, final_tokens=epoch * len(train_dataset) * (MD.vocab_size + 1), num_workers=6) # Create the first training round print("Training: ", str(current_it)) trainer = Trainer(model, train_dataset, test_dataset, tconf) trainer.train() trainer.save_checkpoint(exp_folder, str(current_it)) # Examine the model and create new dataset if current_it % 2 == 0: print("Exam and new dataset-------------\n") print("Training exam \n") examiner.exam(fn_train, train_dataset, trainer) print("Test exam \n") examiner.exam(fn_test, test_dataset, trainer) current_it += 1
train_dataset.block_size, n_layer=8, n_head=8, n_embd=512) model = GPT(mconf) from mingpt.trainer import Trainer, TrainerConfig # initialize a trainer instance and kick off training tconf = TrainerConfig(max_epochs=2, batch_size=512, learning_rate=6e-4, lr_decay=True, warmup_tokens=512 * 20, final_tokens=2 * len(train_dataset) * block_size, num_workers=4, ckpt_path="./model.bin") trainer = Trainer(model, train_dataset, None, tconf) trainer.train() trainer.save_checkpoint() # alright, let's sample some character-level Shakespeare from mingpt.utils import sample context = "Today, Amazon announced " x = torch.tensor([train_dataset.stoi[s] for s in context], dtype=torch.long)[None, ...].to(trainer.device) y = sample(model, x, 2000, temperature=1.0, sample=True, top_k=10)[0] completion = ''.join([train_dataset.itos[int(i)] for i in y]) print(completion)