Esempio n. 1
0
    #         learning_rate=6e-4
    #         lr_decay=True
    #         warmup_tokens=512*20
    #         final_tokens=200*len(pretrain_dataset)*block_size
    #         num_workers=4
    #     Hyperparameters for finetuning WITH a pretrained model:
    #         max_epochs=10
    #         batch_size=256
    #         learning_rate=6e-4
    #         lr_decay=True
    #         warmup_tokens=512*20
    #         final_tokens=200*len(pretrain_dataset)*block_size
    #         num_workers=4
    with open(args.finetune_corpus_path, 'r') as infile:
        data = infile.read()  # don't worry we won't run out of file handles
    train_dataset = dataset.NameDataset(pretrain_dataset, data)
    if args.reading_params_path:
        max_epochs = 10
        model.load_state_dict(torch.load(args.reading_params_path))
    else:
        max_epochs = 75

    tconf = dict(batch_size=256,
                 learning_rate=6e-4,
                 lr_decay=True,
                 max_epochs=max_epochs,
                 warmup_tokens=512 * 20,
                 num_workers=4,
                 ckpt_path=args.writing_params_path)
    tconf = trainer.TrainerConfig(**tconf)
    trainer = trainer.Trainer(model, train_dataset, None, tconf)
Esempio n. 2
0
    if args.reading_params_path : # contains pretrained model
        
        model.load_state_dict(torch.load(args.reading_params_path))
        model = model.to(device)

        tconf = trainer.TrainerConfig(max_epochs=10, batch_size=256,
                            learning_rate=6e-4, lr_decay=True, warmup_tokens=512*20,
                            final_tokens=200*len(pretrain_dataset)*block_size,num_workers=4)

    else:
        # fine-tune without pre-training
        tconf = trainer.TrainerConfig(max_epochs=75, batch_size=256,
                            learning_rate=6e-4, lr_decay=True, warmup_tokens=512*20,
                            final_tokens=200*len(pretrain_dataset)*block_size)
            
    train_dataset = dataset.NameDataset(pretrain_dataset, open(args.finetune_corpus_path,'r').read())
    ft_trainer = trainer.Trainer(model, train_dataset,None,tconf)
    ft_trainer.train()
    torch.save(model.state_dict(),args.writing_params_path)


elif args.function == 'evaluate':
        assert args.outputs_path is not None
        assert args.reading_params_path is not None
        assert args.eval_corpus_path is not None
        model.load_state_dict(torch.load(args.reading_params_path))
        model = model.to(device)
        correct = 0
        total = 0
        with open(args.outputs_path, 'w') as fout:
            predictions = []
def main():
    """
    Don't change above here; write your code below
    """

    if args.variant == 'vanilla':
        model = GPT(mconf)  # TODO [part c]: Make some model here
    elif args.variant == 'synthesizer':
        # TODO [part g]: Make some other model here
        mconf.synthesizer = True
        model = GPT(mconf)

    # From here on, your code should be identical independent of which
    # variant (vanilla or synthesizer) has been chosen.

    if args.function == 'pretrain':
        assert args.pretrain_corpus_path is not None
        assert args.writing_params_path is not None
        # TODO [part f]:
        # - Given:
        #     1. A corpus specified in args.pretrain_corpus_path
        #     2. An output path args.writing_params_path for the model parameters
        # - Goals:
        #     1. Pretrain the model on this corpus
        #     2. Save the resulting model in args.writing_params_path
        # - Make sure to use the following hyperparameters for pretraining:
        #     max_epochs=650
        #     batch_size=128
        #     learning_rate=6e-3
        #     lr_decay=True
        #     warmup_tokens=512*20
        #     final_tokens=200*len(pretrain_dataset)*block_size
        #     num_workers=4
        # pretrain_text = open(args.pretrain_corpus_path, 'r', encoding='utf-8').read()
        # pretrain_dataset =
        tconf = TrainerConfig(max_epochs=650,
                              batch_size=128,
                              learning_rate=6e-3,
                              lr_decay=True,
                              warmup_token=512 * 20,
                              final_tokens=200 * len(pretrain_dataset) *
                              block_size,
                              num_workers=4)
        trainer = Trainer(model, pretrain_dataset, None, tconf)
        trainer.train()
        torch.save(model.state_dict(), args.writing_params_path)

    elif args.function == 'finetune':
        assert args.writing_params_path is not None
        assert args.finetune_corpus_path is not None
        # TODO [part c] [part f]:
        # - Given:
        #     1. A finetuning corpus specified in args.finetune_corpus_path
        #     2. A path args.reading_params_path containing pretrained model
        #         parameters, or None if finetuning without a pretrained model
        #     3. An output path args.writing_params_path for the model parameters
        # - Goals:
        #     1. If args.reading_params_path is specified, load these parameters
        #         into the model
        #     2. Finetune the model on this corpus
        #     3. Save the resulting model in args.writing_params_path
        # - Make sure to use the following hyperparameters:
        #     Hyperparameters for finetuning WITHOUT a pretrained model:
        #         max_epochs=75
        #         batch_size=256
        #         learning_rate=6e-4
        #         lr_decay=True
        #         warmup_tokens=512*20
        #         final_tokens=200*len(pretrain_dataset)*block_size
        #         num_workers=4
        #     Hyperparameters for finetuning WITH a pretrained model:
        #         max_epochs=10
        #         batch_size=256
        #         learning_rate=6e-4
        #         lr_decay=True
        #         warmup_tokens=512*20
        #         final_tokens=200*len(pretrain_dataset)*block_size
        #         num_workers=4
        if args.reading_params_path is not None:
            model.load_state_dict(torch.load(args.reading_params_path))
        tconf = TrainerConfig(max_epochs=75,
                              batch_size=256,
                              learning_rate=6e-4,
                              lr_decay=True,
                              warmup_tokens=512 * 20,
                              final_tokens=200 * len(pretrain_dataset) *
                              block_size,
                              num_workers=4)
        text = open(args.finetune_corpus_path, 'r').read()
        train_dataset = dataset.NameDataset(pretrain_dataset, text)
        trainer = Trainer(model, train_dataset, None, tconf)
        trainer.train()
        # save to args.writing_params_path
        torch.save(model.state_dict(), args.writing_params_path)

    elif args.function == 'evaluate':
        assert args.outputs_path is not None
        assert args.reading_params_path is not None
        assert args.eval_corpus_path is not None
        model.load_state_dict(torch.load(args.reading_params_path))
        model = model.to(device)
        correct = 0
        total = 0
        with open(args.outputs_path, 'w') as fout:
            predictions = []
            for line in tqdm(open(args.eval_corpus_path)):
                x = line.split('\t')[0]
                x = x + '⁇'
                x = torch.tensor([pretrain_dataset.stoi[s] for s in x],
                                 dtype=torch.long)[None, ...].to(device)
                pred = utils.sample(model, x, 32, sample=False)[0]
                completion = ''.join(
                    [pretrain_dataset.itos[int(i)] for i in pred])
                pred = completion.split('⁇')[1]
                predictions.append(pred)
                fout.write(pred + '\n')
            total, correct = utils.evaluate_places(args.eval_corpus_path,
                                                   predictions)
        if total > 0:
            print('Correct: {} out of {}: {}%'.format(correct, total,
                                                      correct / total * 100))
        else:
            print('Predictions written to {}; no targets provided'.format(
                args.outputs_path))
Esempio n. 4
0
    #         batch_size=256
    #         learning_rate=6e-4
    #         lr_decay=True
    #         warmup_tokens=512*20
    #         final_tokens=200*len(pretrain_dataset)*block_size
    #         num_workers=4
    #     Hyperparameters for finetuning WITH a pretrained model:
    #         max_epochs=10
    #         batch_size=256
    #         learning_rate=6e-4
    #         lr_decay=True
    #         warmup_tokens=512*20
    #         final_tokens=200*len(pretrain_dataset)*block_size
    #         num_workers=4
    finetuneCorpus = open(args.finetune_corpus_path).read()
    finetuneDataset = dataset.NameDataset(pretrain_dataset, finetuneCorpus)

    tconf = trainer.TrainerConfig(max_epochs=75,
                                  batch_size=256,
                                  learning_rate=6e-4,
                                  lr_decay=True,
                                  warmup_tokens=512 * 20,
                                  final_tokens=200 * len(pretrain_dataset) *
                                  block_size,
                                  num_workers=4,
                                  ckpt_path=args.writing_params_path)

    if args.reading_params_path is not None:
        model.load_state_dict(torch.load(args.reading_params_path))

    trainer = trainer.Trainer(model, finetuneDataset, None, tconf)
Esempio n. 5
0
    assert args.writing_params_path is not None
    assert args.finetune_corpus_path is not None
    # - Given:
    #     1. A finetuning corpus specified in args.finetune_corpus_path
    #     2. A path args.reading_params_path containing pretrained model
    #         parameters, or None if finetuning without a pretrained model
    #     3. An output path args.writing_params_path for the model parameters
    # - Goals:
    #     1. If args.reading_params_path is specified, load these parameters
    #         into the model
    #     2. Finetune the model on this corpus
    #     3. Save the resulting model in args.writing_params_path

    # Load the finetuning corpus and create the finetuning dataset
    finetune = open(args.finetune_corpus_path, 'r').read()
    finetune_dataset = dataset.NameDataset(pretrain_dataset, finetune)

    # If it is provided, load any model before finetuning or evaluating.
    if args.reading_params_path:
        model.load_state_dict(torch.load(args.reading_params_path))
        # Hyperparameters for finetuning WITH a pretrained model:
        tconf = trainer.TrainerConfig(max_epochs=10,
                                      batch_size=256,
                                      learning_rate=6e-4,
                                      lr_decay=True,
                                      warmup_tokens=512 * 20,
                                      final_tokens=200 *
                                      len(pretrain_dataset) * block_size,
                                      num_workers=4,
                                      ckpt_path=args.writing_params_path)
Esempio n. 6
0
                                      warmup_tokens=512 * 20,
                                      final_tokens=200 *
                                      len(pretrain_dataset) * block_size,
                                      num_workers=4)
    # without a pretrained model
    else:
        tconf = trainer.TrainerConfig(max_epochs=75,
                                      batch_size=256,
                                      learning_rate=6e-4,
                                      lr_decay=True,
                                      warmup_tokens=512 * 20,
                                      final_tokens=200 *
                                      len(pretrain_dataset) * block_size,
                                      num_workers=4)
    finetune_corpus = open(args.finetune_corpus_path, encoding="utf8").read()
    finetune_dat = dataset.NameDataset(pretrain_dataset, finetune_corpus)
    trainer = trainer.Trainer(model, finetune_dat, None, tconf)
    trainer.train()
    # Save the resulting model in args.writing_params_path
    torch.save(model.state_dict(), args.writing_params_path)

elif args.function == 'evaluate':
    assert args.outputs_path is not None
    assert args.reading_params_path is not None
    assert args.eval_corpus_path is not None
    model.load_state_dict(torch.load(args.reading_params_path))
    model = model.to(device)
    correct = 0
    total = 0
    with open(args.outputs_path, 'w') as fout:
        predictions = []
Esempio n. 7
0
            final_tokens=200*len(pretrain_dataset)*block_size,
            num_workers=4
        )

    else:
        tconf = trainer.TrainerConfig(
            max_epochs=75,
            batch_size=256,
            learning_rate=6e-4,
            lr_decay=True,
            warmup_tokens=512*20,
            final_tokens=200*len(pretrain_dataset)*block_size,
            num_workers=4
        )
    finetune_text = open(args.finetune_corpus_path).read()
    finetune_name = dataset.NameDataset(pretrain_dataset, finetune_text)
    
    modelTrain = trainer.Trainer(model, finetune_name, None, tconf)
    modelTrain.train()
    torch.save(model.state_dict(), args.writing_params_path)
    
elif args.function == 'evaluate':
    assert args.outputs_path is not None
    assert args.reading_params_path is not None
    assert args.eval_corpus_path is not None
    model.load_state_dict(torch.load(args.reading_params_path))
    correct = 0
    total = 0
    model.to(device)
    with open(args.outputs_path, 'w') as fout:
        predictions = []
Esempio n. 8
0
    #         lr_decay=True
    #         warmup_tokens=512*20
    #         final_tokens=200*len(pretrain_dataset)*block_size
    #         num_workers=4

    if args.reading_params_path:
        model.load_state_dict(torch.load(args.reading_params_path), strict=False)
        tconf = trainer.TrainerConfig(max_epochs=10, batch_size=256, learning_rate=6e-4,
                      lr_decay=True, warmup_tokens=512*20, final_tokens=200*len(pretrain_dataset)*block_size,
                      num_workers=4)
    else:
        tconf = trainer.TrainerConfig(max_epochs=75, batch_size=256, learning_rate=6e-4,
                      lr_decay=True, warmup_tokens=512*20, final_tokens=200*len(pretrain_dataset)*block_size,
                      num_workers=4)

    finetune_dataset = dataset.NameDataset(pretrain_dataset, open(args.finetune_corpus_path, encoding="utf8").read())
    t = trainer.Trainer(model, finetune_dataset, None, tconf)
    t.train()

    torch.save(model.state_dict(), args.writing_params_path)
elif args.function == 'evaluate':
    assert args.outputs_path is not None
    assert args.reading_params_path is not None
    assert args.eval_corpus_path is not None
    model.load_state_dict(torch.load(args.reading_params_path))
    correct = 0
    total = 0
    with open(args.outputs_path, 'w') as fout:
        predictions = []
        for line in tqdm(open(args.eval_corpus_path)):
            x = line.split('\t')[0]
Esempio n. 9
0
    #         lr_decay=True
    #         warmup_tokens=512*20
    #         final_tokens=200*len(pretrain_dataset)*block_size
    #         num_workers=4
    #     Hyperparameters for finetuning WITH a pretrained model:
    #         max_epochs=10
    #         batch_size=256
    #         learning_rate=6e-4
    #         lr_decay=True
    #         warmup_tokens=512*20
    #         final_tokens=200*len(pretrain_dataset)*block_size
    #         num_workers=4
    # start code
    tconf = None
    ft_text = open(args.finetune_corpus_path).read()
    train_dataset = dataset.NameDataset(pretrain_dataset, ft_text)

    if args.reading_params_path is not None:
        model.load_state_dict(
            torch.load(args.reading_params_path,
                       map_location=torch.device(device)))
        tconf = trainer.TrainerConfig(max_epochs=10,
                                      batch_size=256,
                                      learning_rate=6e-4,
                                      lr_decay=True,
                                      warmup_tokens=512 * 20,
                                      final_tokens=200 *
                                      len(pretrain_dataset) * block_size,
                                      num_workers=4)
    else:
        tconf = trainer.TrainerConfig(max_epochs=75,