def main(data_path, version, config_args, train_args, func, save_dir, pretrain_state=None): if pretrain_state: pretrain_vocab = {'itos': pretrain_state['itos'], 'stoi': pretrain_state['stoi']} state_dict = pretrain_state['state_dict'] else: pretrain_vocab = None state_dict = None # get device device = torch.cuda.current_device() if torch.cuda.is_available() else 'cpu' # load pretrain dataset games = open(data_path).read() # build datasets print('\nProcessing dataset...') train_dataset = dataset.Directory(games, version, config_args, pretrain_vocab)() # load model mconf = model.GPTConfig( vocab_size=train_dataset.vocab_size, args_dict=config_args ) # build model gpt_model = model.GPT(mconf) gpt_model = gpt_model.to(device) train_config = trainer.TrainerConfig(func=func, state_dict=state_dict, args_dict=train_args) model_trainer = trainer.Trainer(gpt_model, train_dataset, save_dir, config=train_config) model_trainer.train()
model = model.GPT(mconf) model = model.to(device) elif args.variant == 'synthesizer': mconf = model.GPTConfig(pretrain_dataset.vocab_size, pretrain_dataset.block_size, n_layer=4, n_head=8, n_embd=256,synthesizer = True) model = model.GPT(mconf) model = model.to(device) if args.function == 'pretrain': assert args.pretrain_corpus_path is not None assert args.writing_params_path is not None tconf = trainer.TrainerConfig(max_epochs=650, batch_size=128, learning_rate=6e-3, lr_decay=True, warmup_tokens=512*20, final_tokens=200*len(pretrain_dataset)*block_size,num_worker=4) ft_trainer = trainer.Trainer(model, pretrain_dataset,None,tconf) #create a trainer object ft_trainer.train() torch.save(model.state_dict(),args.writing_params_path) # save model state to this path elif args.function == 'finetune': assert args.writing_params_path is not None assert args.finetune_corpus_path is not None tconf = None if args.reading_params_path : # contains pretrained model model.load_state_dict(torch.load(args.reading_params_path)) model = model.to(device)
# max_epochs=650 # batch_size=128 # learning_rate=6e-3 # lr_decay=True # warmup_tokens=512*20 # final_tokens=200*len(pretrain_dataset)*block_size # num_workers=4 tconf = dict(batch_size=128, learning_rate=6e-3, lr_decay=True, max_epochs=650, warmup_tokens=512 * 20, final_tokens=200 * len(pretrain_dataset) * block_size, num_workers=4, ckpt_path=args.writing_params_path) tconf = trainer.TrainerConfig(**tconf) trainer = trainer.Trainer(model, pretrain_dataset, None, tconf) trainer.train() elif args.function == 'finetune': assert args.writing_params_path is not None assert args.finetune_corpus_path is not None # TODO [part c] [part f]: # - Given: # 1. A finetuning corpus specified in args.finetune_corpus_path # 2. A path args.reading_params_path containing pretrained model # parameters, or None if finetuning without a pretrained model # 3. An output path args.writing_params_path for the model parameters # - Goals: # 1. If args.reading_params_path is specified, load these parameters # into the model # 2. Finetune the model on this corpus