def train(config): training_t0 = datetime.datetime.now() block_size = config['block_size'] print("config['train_filename']:", config['train_filename']) text = open(config['train_filename'], 'r').read() train_dataset = CharDataset(config['chars'], text, block_size, debug=True) # one line is 63 characters # create model mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size, n_layer=config['n_layer'], n_head=config['n_head'], n_embd=config['n_embd']) model = GPT(mconf) pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print('ANN parameters: %d' % pytorch_total_params) # train tconf = TrainerConfig(max_epochs=2, batch_size=config['batch_size'], learning_rate=6e-4, lr_decay=True, warmup_tokens=512 * 20, final_tokens=2 * len(train_dataset) * block_size, num_workers=4, tqdm=False) # not config['do_finetune']) trainer = Trainer(model, train_dataset, None, tconf) trainer.train() training_t1 = datetime.datetime.now() training_duration = training_t1 - training_t0 print('training_duration', training_duration) torch.save(model.state_dict(), config['model_filename']) return model
def train(model, n_epochs, train_dataset, test_dataset, checkpoint_path): tokens_per_epoch = len(train_dataset) * train_dataset.block_size # initialize a trainer instance and kick off training tconf = TrainerConfig( max_epochs=n_epochs, batch_size=4, learning_rate=3e-3, betas=(0.9, 0.95), weight_decay=0, lr_decay=True, warmup_tokens=tokens_per_epoch, final_tokens=n_epochs * tokens_per_epoch, ckpt_path=checkpoint_path, num_workers=1 ) trainer = Trainer(model, train_dataset, test_dataset, tconf) trainer.train() return trainer
def train(): train_dataset = get_train_dataset() test_dataset = get_test_dataset() model = get_model() epochs = 20 tokens_per_epoch = len(train_dataset) * BLOCK_SIZE conf = TrainerConfig( max_epochs=epochs, batch_size=4, learning_rate=3e-5, lr_decay=False, warmup_tokens=tokens_per_epoch, final_tokens=epochs * tokens_per_epoch, num_workers=4, ) trainer = Trainer(model, train_dataset, test_dataset, conf) trainer.train() torch.save(model, MODEL_PATH) print(f"saved model to {MODEL_PATH}") return model
def train(): train_dataset = ReorderDataset("train", 10000) test_dataset = ReorderDataset("test", 1000) model = get_model() epochs = 100 # NOTE: I think final_tokens is set wrong conf = TrainerConfig( max_epochs=epochs, batch_size=512, learning_rate=6e-4, lr_decay=True, warmup_tokens=1024, final_tokens=epochs * len(train_dataset) * len(TOKENS), num_workers=4, ) trainer = Trainer(model, train_dataset, test_dataset, conf) trainer.train() torch.save(model, MODEL_PATH) print(f"saved model to {MODEL_PATH}") return model
chunk = self.data[idx:idx + self.block_size + 1] dix = [self.stoi[s] for s in chunk] x = torch.tensor(dix[:-1], dtype=torch.long) y = torch.tensor(dix[1:], dtype=torch.long) return x, y block_size = 128 text = open('input.txt', 'r').read() train_dataset = CharDataset(text, block_size) from mingpt.model import GPT, GPTConfig mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size, n_layer=8, n_head=8, n_embd=512) model = GPT(mconf) from mingpt.trainer import Trainer, TrainerConfig from mingpt.utils import sample while True: tconf = TrainerConfig(max_epochs=1, batch_size=256, learning_rate=6e-4) trainer = Trainer(model, train_dataset, tconf) trainer.train() context = "O God, O God!" x = torch.tensor([train_dataset.stoi[s] for s in context], dtype=torch.long)[None, ...].to(trainer.device) y = sample(model, x, 100, temperature=1.0, sample=True, top_k=10)[0] completion = ''.join([train_dataset.itos[int(i)] for i in y]) print(completion)