# learning_rate=6e-4 # lr_decay=True # warmup_tokens=512*20 # final_tokens=200*len(pretrain_dataset)*block_size # num_workers=4 # Hyperparameters for finetuning WITH a pretrained model: # max_epochs=10 # batch_size=256 # learning_rate=6e-4 # lr_decay=True # warmup_tokens=512*20 # final_tokens=200*len(pretrain_dataset)*block_size # num_workers=4 with open(args.finetune_corpus_path, 'r') as infile: data = infile.read() # don't worry we won't run out of file handles train_dataset = dataset.NameDataset(pretrain_dataset, data) if args.reading_params_path: max_epochs = 10 model.load_state_dict(torch.load(args.reading_params_path)) else: max_epochs = 75 tconf = dict(batch_size=256, learning_rate=6e-4, lr_decay=True, max_epochs=max_epochs, warmup_tokens=512 * 20, num_workers=4, ckpt_path=args.writing_params_path) tconf = trainer.TrainerConfig(**tconf) trainer = trainer.Trainer(model, train_dataset, None, tconf)
if args.reading_params_path : # contains pretrained model model.load_state_dict(torch.load(args.reading_params_path)) model = model.to(device) tconf = trainer.TrainerConfig(max_epochs=10, batch_size=256, learning_rate=6e-4, lr_decay=True, warmup_tokens=512*20, final_tokens=200*len(pretrain_dataset)*block_size,num_workers=4) else: # fine-tune without pre-training tconf = trainer.TrainerConfig(max_epochs=75, batch_size=256, learning_rate=6e-4, lr_decay=True, warmup_tokens=512*20, final_tokens=200*len(pretrain_dataset)*block_size) train_dataset = dataset.NameDataset(pretrain_dataset, open(args.finetune_corpus_path,'r').read()) ft_trainer = trainer.Trainer(model, train_dataset,None,tconf) ft_trainer.train() torch.save(model.state_dict(),args.writing_params_path) elif args.function == 'evaluate': assert args.outputs_path is not None assert args.reading_params_path is not None assert args.eval_corpus_path is not None model.load_state_dict(torch.load(args.reading_params_path)) model = model.to(device) correct = 0 total = 0 with open(args.outputs_path, 'w') as fout: predictions = []
def main(): """ Don't change above here; write your code below """ if args.variant == 'vanilla': model = GPT(mconf) # TODO [part c]: Make some model here elif args.variant == 'synthesizer': # TODO [part g]: Make some other model here mconf.synthesizer = True model = GPT(mconf) # From here on, your code should be identical independent of which # variant (vanilla or synthesizer) has been chosen. if args.function == 'pretrain': assert args.pretrain_corpus_path is not None assert args.writing_params_path is not None # TODO [part f]: # - Given: # 1. A corpus specified in args.pretrain_corpus_path # 2. An output path args.writing_params_path for the model parameters # - Goals: # 1. Pretrain the model on this corpus # 2. Save the resulting model in args.writing_params_path # - Make sure to use the following hyperparameters for pretraining: # max_epochs=650 # batch_size=128 # learning_rate=6e-3 # lr_decay=True # warmup_tokens=512*20 # final_tokens=200*len(pretrain_dataset)*block_size # num_workers=4 # pretrain_text = open(args.pretrain_corpus_path, 'r', encoding='utf-8').read() # pretrain_dataset = tconf = TrainerConfig(max_epochs=650, batch_size=128, learning_rate=6e-3, lr_decay=True, warmup_token=512 * 20, final_tokens=200 * len(pretrain_dataset) * block_size, num_workers=4) trainer = Trainer(model, pretrain_dataset, None, tconf) trainer.train() torch.save(model.state_dict(), args.writing_params_path) elif args.function == 'finetune': assert args.writing_params_path is not None assert args.finetune_corpus_path is not None # TODO [part c] [part f]: # - Given: # 1. A finetuning corpus specified in args.finetune_corpus_path # 2. A path args.reading_params_path containing pretrained model # parameters, or None if finetuning without a pretrained model # 3. An output path args.writing_params_path for the model parameters # - Goals: # 1. If args.reading_params_path is specified, load these parameters # into the model # 2. Finetune the model on this corpus # 3. Save the resulting model in args.writing_params_path # - Make sure to use the following hyperparameters: # Hyperparameters for finetuning WITHOUT a pretrained model: # max_epochs=75 # batch_size=256 # learning_rate=6e-4 # lr_decay=True # warmup_tokens=512*20 # final_tokens=200*len(pretrain_dataset)*block_size # num_workers=4 # Hyperparameters for finetuning WITH a pretrained model: # max_epochs=10 # batch_size=256 # learning_rate=6e-4 # lr_decay=True # warmup_tokens=512*20 # final_tokens=200*len(pretrain_dataset)*block_size # num_workers=4 if args.reading_params_path is not None: model.load_state_dict(torch.load(args.reading_params_path)) tconf = TrainerConfig(max_epochs=75, batch_size=256, learning_rate=6e-4, lr_decay=True, warmup_tokens=512 * 20, final_tokens=200 * len(pretrain_dataset) * block_size, num_workers=4) text = open(args.finetune_corpus_path, 'r').read() train_dataset = dataset.NameDataset(pretrain_dataset, text) trainer = Trainer(model, train_dataset, None, tconf) trainer.train() # save to args.writing_params_path torch.save(model.state_dict(), args.writing_params_path) elif args.function == 'evaluate': assert args.outputs_path is not None assert args.reading_params_path is not None assert args.eval_corpus_path is not None model.load_state_dict(torch.load(args.reading_params_path)) model = model.to(device) correct = 0 total = 0 with open(args.outputs_path, 'w') as fout: predictions = [] for line in tqdm(open(args.eval_corpus_path)): x = line.split('\t')[0] x = x + '⁇' x = torch.tensor([pretrain_dataset.stoi[s] for s in x], dtype=torch.long)[None, ...].to(device) pred = utils.sample(model, x, 32, sample=False)[0] completion = ''.join( [pretrain_dataset.itos[int(i)] for i in pred]) pred = completion.split('⁇')[1] predictions.append(pred) fout.write(pred + '\n') total, correct = utils.evaluate_places(args.eval_corpus_path, predictions) if total > 0: print('Correct: {} out of {}: {}%'.format(correct, total, correct / total * 100)) else: print('Predictions written to {}; no targets provided'.format( args.outputs_path))
# batch_size=256 # learning_rate=6e-4 # lr_decay=True # warmup_tokens=512*20 # final_tokens=200*len(pretrain_dataset)*block_size # num_workers=4 # Hyperparameters for finetuning WITH a pretrained model: # max_epochs=10 # batch_size=256 # learning_rate=6e-4 # lr_decay=True # warmup_tokens=512*20 # final_tokens=200*len(pretrain_dataset)*block_size # num_workers=4 finetuneCorpus = open(args.finetune_corpus_path).read() finetuneDataset = dataset.NameDataset(pretrain_dataset, finetuneCorpus) tconf = trainer.TrainerConfig(max_epochs=75, batch_size=256, learning_rate=6e-4, lr_decay=True, warmup_tokens=512 * 20, final_tokens=200 * len(pretrain_dataset) * block_size, num_workers=4, ckpt_path=args.writing_params_path) if args.reading_params_path is not None: model.load_state_dict(torch.load(args.reading_params_path)) trainer = trainer.Trainer(model, finetuneDataset, None, tconf)
assert args.writing_params_path is not None assert args.finetune_corpus_path is not None # - Given: # 1. A finetuning corpus specified in args.finetune_corpus_path # 2. A path args.reading_params_path containing pretrained model # parameters, or None if finetuning without a pretrained model # 3. An output path args.writing_params_path for the model parameters # - Goals: # 1. If args.reading_params_path is specified, load these parameters # into the model # 2. Finetune the model on this corpus # 3. Save the resulting model in args.writing_params_path # Load the finetuning corpus and create the finetuning dataset finetune = open(args.finetune_corpus_path, 'r').read() finetune_dataset = dataset.NameDataset(pretrain_dataset, finetune) # If it is provided, load any model before finetuning or evaluating. if args.reading_params_path: model.load_state_dict(torch.load(args.reading_params_path)) # Hyperparameters for finetuning WITH a pretrained model: tconf = trainer.TrainerConfig(max_epochs=10, batch_size=256, learning_rate=6e-4, lr_decay=True, warmup_tokens=512 * 20, final_tokens=200 * len(pretrain_dataset) * block_size, num_workers=4, ckpt_path=args.writing_params_path)
warmup_tokens=512 * 20, final_tokens=200 * len(pretrain_dataset) * block_size, num_workers=4) # without a pretrained model else: tconf = trainer.TrainerConfig(max_epochs=75, batch_size=256, learning_rate=6e-4, lr_decay=True, warmup_tokens=512 * 20, final_tokens=200 * len(pretrain_dataset) * block_size, num_workers=4) finetune_corpus = open(args.finetune_corpus_path, encoding="utf8").read() finetune_dat = dataset.NameDataset(pretrain_dataset, finetune_corpus) trainer = trainer.Trainer(model, finetune_dat, None, tconf) trainer.train() # Save the resulting model in args.writing_params_path torch.save(model.state_dict(), args.writing_params_path) elif args.function == 'evaluate': assert args.outputs_path is not None assert args.reading_params_path is not None assert args.eval_corpus_path is not None model.load_state_dict(torch.load(args.reading_params_path)) model = model.to(device) correct = 0 total = 0 with open(args.outputs_path, 'w') as fout: predictions = []
final_tokens=200*len(pretrain_dataset)*block_size, num_workers=4 ) else: tconf = trainer.TrainerConfig( max_epochs=75, batch_size=256, learning_rate=6e-4, lr_decay=True, warmup_tokens=512*20, final_tokens=200*len(pretrain_dataset)*block_size, num_workers=4 ) finetune_text = open(args.finetune_corpus_path).read() finetune_name = dataset.NameDataset(pretrain_dataset, finetune_text) modelTrain = trainer.Trainer(model, finetune_name, None, tconf) modelTrain.train() torch.save(model.state_dict(), args.writing_params_path) elif args.function == 'evaluate': assert args.outputs_path is not None assert args.reading_params_path is not None assert args.eval_corpus_path is not None model.load_state_dict(torch.load(args.reading_params_path)) correct = 0 total = 0 model.to(device) with open(args.outputs_path, 'w') as fout: predictions = []
# lr_decay=True # warmup_tokens=512*20 # final_tokens=200*len(pretrain_dataset)*block_size # num_workers=4 if args.reading_params_path: model.load_state_dict(torch.load(args.reading_params_path), strict=False) tconf = trainer.TrainerConfig(max_epochs=10, batch_size=256, learning_rate=6e-4, lr_decay=True, warmup_tokens=512*20, final_tokens=200*len(pretrain_dataset)*block_size, num_workers=4) else: tconf = trainer.TrainerConfig(max_epochs=75, batch_size=256, learning_rate=6e-4, lr_decay=True, warmup_tokens=512*20, final_tokens=200*len(pretrain_dataset)*block_size, num_workers=4) finetune_dataset = dataset.NameDataset(pretrain_dataset, open(args.finetune_corpus_path, encoding="utf8").read()) t = trainer.Trainer(model, finetune_dataset, None, tconf) t.train() torch.save(model.state_dict(), args.writing_params_path) elif args.function == 'evaluate': assert args.outputs_path is not None assert args.reading_params_path is not None assert args.eval_corpus_path is not None model.load_state_dict(torch.load(args.reading_params_path)) correct = 0 total = 0 with open(args.outputs_path, 'w') as fout: predictions = [] for line in tqdm(open(args.eval_corpus_path)): x = line.split('\t')[0]
# lr_decay=True # warmup_tokens=512*20 # final_tokens=200*len(pretrain_dataset)*block_size # num_workers=4 # Hyperparameters for finetuning WITH a pretrained model: # max_epochs=10 # batch_size=256 # learning_rate=6e-4 # lr_decay=True # warmup_tokens=512*20 # final_tokens=200*len(pretrain_dataset)*block_size # num_workers=4 # start code tconf = None ft_text = open(args.finetune_corpus_path).read() train_dataset = dataset.NameDataset(pretrain_dataset, ft_text) if args.reading_params_path is not None: model.load_state_dict( torch.load(args.reading_params_path, map_location=torch.device(device))) tconf = trainer.TrainerConfig(max_epochs=10, batch_size=256, learning_rate=6e-4, lr_decay=True, warmup_tokens=512 * 20, final_tokens=200 * len(pretrain_dataset) * block_size, num_workers=4) else: tconf = trainer.TrainerConfig(max_epochs=75,