Esempio n. 1
0
def main(data_path, version, config_args, train_args, func, save_dir, pretrain_state=None):

    if pretrain_state:
        pretrain_vocab = {'itos': pretrain_state['itos'],
                          'stoi': pretrain_state['stoi']}

        state_dict = pretrain_state['state_dict']
    else:
        pretrain_vocab = None
        state_dict = None

    # get device
    device = torch.cuda.current_device() if torch.cuda.is_available() else 'cpu'

    # load pretrain dataset
    games = open(data_path).read()

    # build datasets
    print('\nProcessing dataset...')

    train_dataset = dataset.Directory(games,
                                      version,
                                      config_args,
                                      pretrain_vocab)()
    # load model
    mconf = model.GPTConfig(
        vocab_size=train_dataset.vocab_size,
        args_dict=config_args
    )

    # build model
    gpt_model = model.GPT(mconf)
    gpt_model = gpt_model.to(device)

    train_config = trainer.TrainerConfig(func=func,
                                         state_dict=state_dict,
                                         args_dict=train_args)

    model_trainer = trainer.Trainer(gpt_model,
                                    train_dataset,
                                    save_dir,
                                    config=train_config)
    model_trainer.train()
Esempio n. 2
0
device = torch.cuda.current_device() if torch.cuda.is_available() else 'cpu'

# Keep the block size 128
# Why is the pretraining corpus always required (even if we're not pretraining?)
# It's because we're using it as a hack to always have the same vocabulary
# (that is, the same mapping from character to integer, and we build the
# vocab from the pretraining corpus.)
block_size = 128
text = open(args.pretrain_corpus_path).read()
pretrain_dataset = dataset.CharCorruptionDataset(text, block_size)

# We don't suggest you change these hyperparameters, as they're known to work.
# use them for both the vanilla and the synthesizer models
mconf = model.GPTConfig(pretrain_dataset.vocab_size,
                        pretrain_dataset.block_size,
                        n_layer=4,
                        n_head=8,
                        n_embd=256)
"""
Don't change above here; write your code below
"""

if args.variant == 'vanilla':
    pass  # TODO [part c]: Make some model here
elif args.variant == 'synthesizer':
    pass  # TODO [part g]: Make some other model here

# From here on, your code should be identical independent of which
# variant (vanilla or synthesizer) has been chosen.

if args.function == 'pretrain':
        assert os.path.isfile(test_file)
    else:
        assert os.path.isfile(chept_ckpt)

    suffix = '_with_chept' if chept_ckpt else '_score_eval'
    args.save_name = comm_ckpt.split('/')[1] + suffix
    # get ckpt
    comm_ckpt = torch.load(comm_ckpt, map_location=torch.device(device))
    comm_model_config = comm_ckpt['model_config']
    comm_itos = comm_ckpt['itos']
    comm_stoi = comm_ckpt['stoi']

    comm_vocabs = {'itos': comm_itos, 'stoi': comm_stoi}

    # build model config
    comm_mconf = model.GPTConfig(vocab_size=len(comm_itos),
                                 args_dict=comm_model_config.__dict__)

    # load model weights
    comm_model = model.GPT(comm_mconf)
    comm_model = comm_model.to(device)

    comm_model.load_state_dict(comm_ckpt['state_dict'])

    if chept_ckpt:
        chept_ckpt = torch.load(chept_ckpt, map_location=torch.device(device))
        chept_model_config = chept_ckpt['model_config']
        chept_itos = chept_ckpt['itos']
        chept_stoi = chept_ckpt['stoi']

        chept_vocabs = {'itos': chept_itos, 'stoi': chept_stoi}
Esempio n. 4
0
device = torch.cuda.current_device() if torch.cuda.is_available() else 'cpu'

# Keep the block size 128
# Why is the pretraining corpus always required (even if we're not pretraining?)
# It's because we're using it as a hack to always have the same vocabulary
# (that is, the same mapping from character to integer, and we build the
# vocab from the pretraining corpus.)
block_size = 128
text = open(args.pretrain_corpus_path).read()
pretrain_dataset = dataset.CharCorruptionDataset(text, block_size)

# We suggest you don't change these hyperparameters, as they're known to work.
# use them for both the vanilla and the synthesizer models
mconf = model.GPTConfig(pretrain_dataset.vocab_size,
                        pretrain_dataset.block_size,
                        n_layer=4,
                        n_head=8,
                        n_embd=256)
"""
Don't change above here; write your code below
"""
if args.variant == 'vanilla':
    model = model.GPT(mconf)
    model = model.to(device)
elif args.variant == 'synthesizer':
    mconf = model.GPTConfig(vocab_size=pretrain_dataset.vocab_size,
                            block_size=pretrain_dataset.block_size,
                            n_layer=4,
                            n_head=8,
                            n_embd=256,
                            synthesizer=True)
    parser.add_argument('--n_tries', type=int, default=5,
                        help='Number of retries to give ChePT')
    args = parser.parse_args()

    if not args.ckpt:
        ckpt_path = get_recent_ckpt('ckpts/finetune_default')
        print("\nWARNING: NO CHECKPOINT GIVEN")
        print(f"Using {ckpt_path}")
    else:
        ckpt_path = args.ckpt
    args.save_name = ckpt_path.split('/')[1]
    # get ckpt
    ckpt = torch.load(ckpt_path, map_location=torch.device(device))
    model_config = ckpt['model_config']
    itos = ckpt['itos']
    stoi = ckpt['stoi']

    # build model config
    mconf = model.GPTConfig(
        vocab_size=len(itos),
        args_dict=model_config.__dict__
    )

    # load model weights
    gpt_model = model.GPT(mconf)
    gpt_model = gpt_model.to(device)

    gpt_model.load_state_dict(ckpt['state_dict'])

    main(gpt_model, stoi, itos, args)
Esempio n. 6
0
device = torch.cuda.current_device() if torch.cuda.is_available() else 'cpu'

# Keep the block size 128
# Why is the pretraining corpus always required (even if we're not pretraining?)
# It's because we're using it as a hack to always have the same vocabulary
# (that is, the same mapping from character to integer, and we build the
# vocab from the pretraining corpus.)
block_size = 128
text = open(args.pretrain_corpus_path).read()
pretrain_dataset = dataset.CharCorruptionDataset(text, block_size)

# We don't suggest you change these hyperparameters, as they're known to work.
# use them for both the vanilla and the synthesizer models
mconf = model.GPTConfig(pretrain_dataset.vocab_size,
                        pretrain_dataset.block_size,
                        n_layer=4,
                        n_head=8,
                        n_embd=256)
"""
Don't change above here; write your code below
"""

if args.variant == 'vanilla':
    # [part c]:
    mconf = model.GPTConfig(pretrain_dataset.vocab_size,
                            pretrain_dataset.block_size,
                            n_layer=4,
                            n_head=8,
                            n_embd=256,
                            variant="vanilla")
elif args.variant == 'synthesizer':