예제 #1
0
def main(data_path, version, config_args, train_args, func, save_dir, pretrain_state=None):

    if pretrain_state:
        pretrain_vocab = {'itos': pretrain_state['itos'],
                          'stoi': pretrain_state['stoi']}

        state_dict = pretrain_state['state_dict']
    else:
        pretrain_vocab = None
        state_dict = None

    # get device
    device = torch.cuda.current_device() if torch.cuda.is_available() else 'cpu'

    # load pretrain dataset
    games = open(data_path).read()

    # build datasets
    print('\nProcessing dataset...')

    train_dataset = dataset.Directory(games,
                                      version,
                                      config_args,
                                      pretrain_vocab)()
    # load model
    mconf = model.GPTConfig(
        vocab_size=train_dataset.vocab_size,
        args_dict=config_args
    )

    # build model
    gpt_model = model.GPT(mconf)
    gpt_model = gpt_model.to(device)

    train_config = trainer.TrainerConfig(func=func,
                                         state_dict=state_dict,
                                         args_dict=train_args)

    model_trainer = trainer.Trainer(gpt_model,
                                    train_dataset,
                                    save_dir,
                                    config=train_config)
    model_trainer.train()
예제 #2
0
    model = model.GPT(mconf)
    model = model.to(device)
    
    
elif args.variant == 'synthesizer':
    mconf = model.GPTConfig(pretrain_dataset.vocab_size, pretrain_dataset.block_size,
    n_layer=4, n_head=8, n_embd=256,synthesizer = True)
    model = model.GPT(mconf)
    model = model.to(device)

if args.function == 'pretrain':
    assert args.pretrain_corpus_path is not None
    assert args.writing_params_path is not None

    tconf = trainer.TrainerConfig(max_epochs=650, batch_size=128,
                                learning_rate=6e-3, lr_decay=True, warmup_tokens=512*20,
                                final_tokens=200*len(pretrain_dataset)*block_size,num_worker=4) 
            
    ft_trainer = trainer.Trainer(model, pretrain_dataset,None,tconf) #create a trainer object
    ft_trainer.train()
    torch.save(model.state_dict(),args.writing_params_path) # save model state to this path

elif args.function == 'finetune':
    assert args.writing_params_path is not None
    assert args.finetune_corpus_path is not None

    tconf = None
    if args.reading_params_path : # contains pretrained model
        
        model.load_state_dict(torch.load(args.reading_params_path))
        model = model.to(device)
예제 #3
0
파일: run.py 프로젝트: yb-ml/nlp-cs224
    #     max_epochs=650
    #     batch_size=128
    #     learning_rate=6e-3
    #     lr_decay=True
    #     warmup_tokens=512*20
    #     final_tokens=200*len(pretrain_dataset)*block_size
    #     num_workers=4
    tconf = dict(batch_size=128,
                 learning_rate=6e-3,
                 lr_decay=True,
                 max_epochs=650,
                 warmup_tokens=512 * 20,
                 final_tokens=200 * len(pretrain_dataset) * block_size,
                 num_workers=4,
                 ckpt_path=args.writing_params_path)
    tconf = trainer.TrainerConfig(**tconf)
    trainer = trainer.Trainer(model, pretrain_dataset, None, tconf)
    trainer.train()
elif args.function == 'finetune':
    assert args.writing_params_path is not None
    assert args.finetune_corpus_path is not None
    # TODO [part c] [part f]:
    # - Given:
    #     1. A finetuning corpus specified in args.finetune_corpus_path
    #     2. A path args.reading_params_path containing pretrained model
    #         parameters, or None if finetuning without a pretrained model
    #     3. An output path args.writing_params_path for the model parameters
    # - Goals:
    #     1. If args.reading_params_path is specified, load these parameters
    #         into the model
    #     2. Finetune the model on this corpus