def train(config): training_t0 = datetime.datetime.now() block_size = config['block_size'] print("config['train_filename']:", config['train_filename']) text = open(config['train_filename'], 'r').read() train_dataset = CharDataset(config['chars'], text, block_size, debug=True) # one line is 63 characters # create model mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size, n_layer=config['n_layer'], n_head=config['n_head'], n_embd=config['n_embd']) model = GPT(mconf) pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print('ANN parameters: %d' % pytorch_total_params) # train tconf = TrainerConfig(max_epochs=2, batch_size=config['batch_size'], learning_rate=6e-4, lr_decay=True, warmup_tokens=512 * 20, final_tokens=2 * len(train_dataset) * block_size, num_workers=4, tqdm=False) # not config['do_finetune']) trainer = Trainer(model, train_dataset, None, tconf) trainer.train() training_t1 = datetime.datetime.now() training_duration = training_t1 - training_t0 print('training_duration', training_duration) torch.save(model.state_dict(), config['model_filename']) return model
def train(model, n_epochs, train_dataset, test_dataset, checkpoint_path): tokens_per_epoch = len(train_dataset) * train_dataset.block_size # initialize a trainer instance and kick off training tconf = TrainerConfig( max_epochs=n_epochs, batch_size=4, learning_rate=3e-3, betas=(0.9, 0.95), weight_decay=0, lr_decay=True, warmup_tokens=tokens_per_epoch, final_tokens=n_epochs * tokens_per_epoch, ckpt_path=checkpoint_path, num_workers=1 ) trainer = Trainer(model, train_dataset, test_dataset, tconf) trainer.train() return trainer
def train(): train_dataset = get_train_dataset() test_dataset = get_test_dataset() model = get_model() epochs = 20 tokens_per_epoch = len(train_dataset) * BLOCK_SIZE conf = TrainerConfig( max_epochs=epochs, batch_size=4, learning_rate=3e-5, lr_decay=False, warmup_tokens=tokens_per_epoch, final_tokens=epochs * tokens_per_epoch, num_workers=4, ) trainer = Trainer(model, train_dataset, test_dataset, conf) trainer.train() torch.save(model, MODEL_PATH) print(f"saved model to {MODEL_PATH}") return model
def train(): train_dataset = ReorderDataset("train", 10000) test_dataset = ReorderDataset("test", 1000) model = get_model() epochs = 100 # NOTE: I think final_tokens is set wrong conf = TrainerConfig( max_epochs=epochs, batch_size=512, learning_rate=6e-4, lr_decay=True, warmup_tokens=1024, final_tokens=epochs * len(train_dataset) * len(TOKENS), num_workers=4, ) trainer = Trainer(model, train_dataset, test_dataset, conf) trainer.train() torch.save(model, MODEL_PATH) print(f"saved model to {MODEL_PATH}") return model
from mingpt.model import GPT, GPTConfig mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size, n_layer=8, n_head=8, n_embd=512) model = GPT(mconf) from mingpt.trainer import Trainer, TrainerConfig # initialize a trainer instance and kick off training tconf = TrainerConfig(max_epochs=2, batch_size=512, learning_rate=6e-4, lr_decay=True, warmup_tokens=512 * 20, final_tokens=2 * len(train_dataset) * block_size, num_workers=4, device='cpu', ckpt_path="./prfaqgen.bin") trainer = Trainer(model, train_dataset, None, tconf) # trainer.train() trainer.load_checkpoint() # trainer.save_checkpoint() # alright, let's sample some character-level Shakespeare from mingpt.utils import sample context = "The launch of Skills Store Arabic was GREEN. The team " x = torch.tensor([train_dataset.stoi[s] for s in context],
block_size = 128 # spatial extent of the model for its context # you can download this file at https://github.com/karpathy/char-rnn/blob/master/data/tinyshakespeare/input.txt text = open('input.txt', 'r').read() # don't worry we won't run out of file handles train_dataset = charDataset(text, block) # one line of poem is roughly 50 characters from mingpt.model import GPT, GPTConfig mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size, n_layer=8, n_head=8, n_embd=512) model = GPT(mconf) from mingpt.trainer import Trainer, TrainerConfig # initialize a trainer instance and kick off training tconf = TrainerConfig(max_epochs=2, batch_size=512, learning_rate=6e-4, # Not sure about the final_token is about. lr_decay=True, warmup_tokens=512*20, final_tokens=2*len(train_dataset)*block_size, num_workers=4) trainer = trainer(model, train_dataset, None, tconf) trainer.train() # alright, let's sample some character-level Shakespeare from mingpt.utils import sample context = "O God, O God!" x = torch.tensor([train_dataset.stoi[s] for s in context], dtype=torch.long)[None,...].to(trainer.device) y = sample(model, x, 2000, temperature=1.0, sample=True, top_k=10)[0] completion = ''.join([train_dataset.itos[int(i)] for i in y]) print(completion)
chunk = self.data[idx:idx + self.block_size + 1] dix = [self.stoi[s] for s in chunk] x = torch.tensor(dix[:-1], dtype=torch.long) y = torch.tensor(dix[1:], dtype=torch.long) return x, y block_size = 128 text = open('input.txt', 'r').read() train_dataset = CharDataset(text, block_size) from mingpt.model import GPT, GPTConfig mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size, n_layer=8, n_head=8, n_embd=512) model = GPT(mconf) from mingpt.trainer import Trainer, TrainerConfig from mingpt.utils import sample while True: tconf = TrainerConfig(max_epochs=1, batch_size=256, learning_rate=6e-4) trainer = Trainer(model, train_dataset, tconf) trainer.train() context = "O God, O God!" x = torch.tensor([train_dataset.stoi[s] for s in context], dtype=torch.long)[None, ...].to(trainer.device) y = sample(model, x, 100, temperature=1.0, sample=True, top_k=10)[0] completion = ''.join([train_dataset.itos[int(i)] for i in y]) print(completion)
if current_it % 2 == 0: print("Loading Main Dataset\n") train_dataset = MathDataset(fname=fn_train, MD=MD) test_dataset = MathDataset(fname=fn_test, MD=MD) epoch = main_epoch else: print("Loading Marker Dataset\n") train_dataset = MarkerDataset(fname=fn_train, MD=MD) test_dataset = MarkerDataset(fname=fn_test, MD=MD) epoch = marker_epoch # Trainer Config tconf = TrainerConfig(max_epochs=epoch, batch_size=358, learning_rate=6e-4, lr_decay=True, warmup_tokens=1024, final_tokens=epoch * len(train_dataset) * (MD.vocab_size + 1), num_workers=6) # Create the first training round print("Training: ", str(current_it)) trainer = Trainer(model, train_dataset, test_dataset, tconf) trainer.train() trainer.save_checkpoint(exp_folder, str(current_it)) # Examine the model and create new dataset if current_it % 2 == 0: print("Exam and new dataset-------------\n") print("Training exam \n") examiner.exam(fn_train, train_dataset, trainer)
def test(config, task, language, df_results, debug=False): model = get_model(config) # followwing two lines are copied from train() block_size = config['block_size'] text = open(config['train_filename'], 'r').read() train_dataset = CharDataset(config['chars'], text, block_size) tconf = TrainerConfig(max_epochs=2, batch_size=config['batch_size'], learning_rate=6e-4, lr_decay=True, warmup_tokens=512 * 20, final_tokens=2 * len(train_dataset) * block_size, num_workers=4) trainer = Trainer(model, train_dataset, None, tconf) # test testing_t0 = datetime.datetime.now() df_test = pd.read_csv(config['test_filename'], header=None, names=config['features']) n = 0 n_ok = 0 n_ko = 0 for index, row in df_test.iterrows(): if row.Task != task or row.Language != language: continue n += 1 # build the context context = language + ',' + task + ',' + row.Input + ',' # get the predicted output string prediction_padded = predict_pron(config, train_dataset, trainer, model, context) # identify where the interesting output is in the raw output if prediction_padded.startswith(context): # remove unwanted prefix prediction_padded = prediction_padded[len(context):] # remove unwanted postfix (i.e. remove padding) eos = prediction_padded.find('\n', 1) #eos_p = prediction_padded.find(',', 1) #eos_t = row.Output.find(',', 1) #if eos_p < 0 or eos_t < 0: if eos < 0: n_ko += 1 else: #prediction = prediction_padded[:eos_p] #target = row.Output[:eos_t] prediction = prediction_padded[:eos] target = row.Output # check if prediction is same as target if prediction == target: n_ok += 1 else: if debug and language != 'eno': print('language:%s, target:%s, prediction:%s,' % (language, target, prediction)) n_ko += 1 else: n_ko += 1 pctg_ok = int(n_ok / n * 100) pctg_ko = 100 - pctg_ok if config['episodes'] == 1: print('%s %5s: n=%d, n_ok=%d, n_ko=%d => %%n_ok=%d%%' % (language, task, n, n_ok, n_ko, pctg_ok)) testing_t1 = datetime.datetime.now() test_duration = testing_t1 - testing_t0 dict_res = { 'lang': language, 'task': task, 'test_accuracy': n_ok / n, #'training_duration': training_duration, 'test_duration': test_duration } return dict_res
If you don't have as many computational resources you have to bring down the batch_size until the model fits into your memory, and then you may also need to adjust the learning rate (e.g. decrease it a bit). Alternatively, you can use an even smaller model up above, bringing down the number of layers, number of heads, and the embedding size. """ tokens_per_epoch = len(train_data) * train_dataset.block_size train_epochs = 20 # todo run a bigger model and longer, this is tiny # initalize a trainer instance and kick off training tconf = TrainerConfig(max_epochs = train_epochs, batch_size = 16*8, learning_rate=3e-3, betas = (0.9, 0.95), weight_decay=0, lr_decay=True, warmup_tokens=tokens_per_epoch, final_tokens=train_epochs*tokens_per_epoch, ckpt_path = 'cifar10_model.pt', num_workers=8) trainer = Trainer(model, train_dataset, test_dataset, tconf) trainer.train() # load the state of the best model we've seen based on early stopping checkpoint = torch.load('cifar10_model.pt') model.load_state_dict(checkpoint) # to sample we also have to technically "train" a separate model for the first token in the sequence # we are going to do so below simply by calculating and normalizing the histogram of the first token counts = torch.ones(ncluster) # start counts as 1 not zero, this is called "smoothing" rp = torch.randperm(len(train_dataset)) nest = 5000 # how many images to use for the estimation for i in range(nest):
train_dataset = CharDataset(text, block_size=sample_block_size) model_config = GPTConfig( train_dataset.vocab_size, train_dataset.block_size, n_layer=8, n_head=8, n_emb=512, ) model = GPT(model_config) trainer_config = TrainerConfig( max_epochs=200, batch_size=512, learning_rate=6e-4, lr_decay=True, warmup_tokens=512 * 20, final_tokens=200 * len(train_dataset) * sample_block_size, num_workers=4, ) trainer = Trainer(model, train_dataset, None, trainer_config) trainer.train() context = "대한민국의 대통령" sample_x = torch.tensor( [train_dataset.char2idx[s] for s in context], dtype=torch.long, )[None, ...].to(trainer.device) sample_y = sample(model, sample_x, 2000,