def main(data_path, version, config_args, train_args, func, save_dir, pretrain_state=None): if pretrain_state: pretrain_vocab = {'itos': pretrain_state['itos'], 'stoi': pretrain_state['stoi']} state_dict = pretrain_state['state_dict'] else: pretrain_vocab = None state_dict = None # get device device = torch.cuda.current_device() if torch.cuda.is_available() else 'cpu' # load pretrain dataset games = open(data_path).read() # build datasets print('\nProcessing dataset...') train_dataset = dataset.Directory(games, version, config_args, pretrain_vocab)() # load model mconf = model.GPTConfig( vocab_size=train_dataset.vocab_size, args_dict=config_args ) # build model gpt_model = model.GPT(mconf) gpt_model = gpt_model.to(device) train_config = trainer.TrainerConfig(func=func, state_dict=state_dict, args_dict=train_args) model_trainer = trainer.Trainer(gpt_model, train_dataset, save_dir, config=train_config) model_trainer.train()
device = torch.cuda.current_device() if torch.cuda.is_available() else 'cpu' block_size = 128 text = open(args.pretrain_corpus_path).read() pretrain_dataset = dataset.CharCorruptionDataset(text, block_size) mconf = model.GPTConfig(pretrain_dataset.vocab_size, pretrain_dataset.block_size, n_layer=4, n_head=8, n_embd=256) """ Don't change above here; write your code below """ if args.variant == 'vanilla': model = model.GPT(mconf) model = model.to(device) elif args.variant == 'synthesizer': mconf = model.GPTConfig(pretrain_dataset.vocab_size, pretrain_dataset.block_size, n_layer=4, n_head=8, n_embd=256,synthesizer = True) model = model.GPT(mconf) model = model.to(device) if args.function == 'pretrain': assert args.pretrain_corpus_path is not None assert args.writing_params_path is not None tconf = trainer.TrainerConfig(max_epochs=650, batch_size=128, learning_rate=6e-3, lr_decay=True, warmup_tokens=512*20,
block_size = 128 text = open(args.pretrain_corpus_path).read() pretrain_dataset = dataset.CharCorruptionDataset(text, block_size) # We don't suggest you change these hyperparameters, as they're known to work. # use them for both the vanilla and the synthesizer models mconf = model.GPTConfig(pretrain_dataset.vocab_size, pretrain_dataset.block_size, n_layer=4, n_head=8, n_embd=256) """ Don't change above here; write your code below """ mconf.attention_mode = attention.AttentionMode[args.variant] model = model.GPT(mconf).to(device) # From here on, your code should be identical independent of which # variant (vanilla or synthesizer) has been chosen. if args.function == 'pretrain': assert args.pretrain_corpus_path is not None assert args.writing_params_path is not None # TODO [part f]: # - Given: # 1. A corpus specified in args.pretrain_corpus_path # 2. An output path args.writing_params_path for the model parameters # - Goals: # 1. Pretrain the model on this corpus # 2. Save the resulting model in args.writing_params_path # - Make sure to use the following hyperparameters for pretraining:
suffix = '_with_chept' if chept_ckpt else '_score_eval' args.save_name = comm_ckpt.split('/')[1] + suffix # get ckpt comm_ckpt = torch.load(comm_ckpt, map_location=torch.device(device)) comm_model_config = comm_ckpt['model_config'] comm_itos = comm_ckpt['itos'] comm_stoi = comm_ckpt['stoi'] comm_vocabs = {'itos': comm_itos, 'stoi': comm_stoi} # build model config comm_mconf = model.GPTConfig(vocab_size=len(comm_itos), args_dict=comm_model_config.__dict__) # load model weights comm_model = model.GPT(comm_mconf) comm_model = comm_model.to(device) comm_model.load_state_dict(comm_ckpt['state_dict']) if chept_ckpt: chept_ckpt = torch.load(chept_ckpt, map_location=torch.device(device)) chept_model_config = chept_ckpt['model_config'] chept_itos = chept_ckpt['itos'] chept_stoi = chept_ckpt['stoi'] chept_vocabs = {'itos': chept_itos, 'stoi': chept_stoi} # build model config chept_mconf = model.GPTConfig(vocab_size=len(chept_itos), args_dict=chept_model_config.__dict__)
parser.add_argument('--n_tries', type=int, default=5, help='Number of retries to give ChePT') args = parser.parse_args() if not args.ckpt: ckpt_path = get_recent_ckpt('ckpts/finetune_default') print("\nWARNING: NO CHECKPOINT GIVEN") print(f"Using {ckpt_path}") else: ckpt_path = args.ckpt args.save_name = ckpt_path.split('/')[1] # get ckpt ckpt = torch.load(ckpt_path, map_location=torch.device(device)) model_config = ckpt['model_config'] itos = ckpt['itos'] stoi = ckpt['stoi'] # build model config mconf = model.GPTConfig( vocab_size=len(itos), args_dict=model_config.__dict__ ) # load model weights gpt_model = model.GPT(mconf) gpt_model = gpt_model.to(device) gpt_model.load_state_dict(ckpt['state_dict']) main(gpt_model, stoi, itos, args)
pretrain_dataset = dataset.CharCorruptionDataset(text, block_size) # We don't suggest you change these hyperparameters, as they're known to work. # use them for both the vanilla and the synthesizer models mconf = model.GPTConfig(pretrain_dataset.vocab_size, pretrain_dataset.block_size, n_layer=4, n_head=8, n_embd=256) """ Don't change above here; write your code below """ if args.variant == 'vanilla': # TODO [part c]: Make some model here model = model.GPT(mconf, "vanilla") elif args.variant == 'synthesizer': # TODO [part g]: Make some other model here model = model.GPT(mconf, "synthesizer") # From here on, your code should be identical independent of which # variant (vanilla or synthesizer) has been chosen. if args.function == 'pretrain': assert args.pretrain_corpus_path is not None assert args.writing_params_path is not None # TODO [part f]: # - Given: # 1. A corpus specified in args.pretrain_corpus_path # 2. An output path args.writing_params_path for the model parameters # - Goals: # 1. Pretrain the model on this corpus
pretrain_dataset = dataset.CharCorruptionDataset(text, block_size) # We don't suggest you change these hyperparameters, as they're known to work. # use them for both the vanilla and the synthesizer models mconf = model.GPTConfig(pretrain_dataset.vocab_size, pretrain_dataset.block_size, n_layer=4, n_head=8, n_embd=256) """ Don't change above here; write your code below """ if args.variant == 'vanilla': pass # TODO [part c]: Make some model here model = model.GPT(mconf, 'vanilla') elif args.variant == 'synthesizer': pass # TODO [part g]: Make some other model here model = model.GPT(mconf, 'synthesizer') # From here on, your code should be identical independent of which # variant (vanilla or synthesizer) has been chosen. if args.function == 'pretrain': assert args.pretrain_corpus_path is not None assert args.writing_params_path is not None # TODO [part f]: # - Given: # 1. A corpus specified in args.pretrain_corpus_path # 2. An output path args.writing_params_path for the model parameters # - Goals: