Esempio n. 1
0
        def __init__(self, params):
            self.trainer, self.early_stopping = None, None

            m = LM(params['emb_dim'],
                   params['hid_dim'],
                   d,
                   num_layers=params['num_layers'],
                   cell=params['cell'],
                   dropout=params['dropout'],
                   train_init=params['train_init'],
                   deepout_layers=params['deepout_layers'],
                   maxouts=params['maxouts'],
                   word_dropout=params['word_dropout'])
            u.initialize_model(m)

            optimizer = getattr(optim, args.optim)(m.parameters(), lr=args.lr)

            self.early_stopping = EarlyStopping(5, patience=3)

            def early_stop_hook(trainer, epoch, batch_num, num_checkpoints):
                valid_loss = trainer.validate_model()
                self.early_stopping.add_checkpoint(sum(valid_loss.pack()))

            trainer = Trainer(m, {
                "train": train,
                "test": test,
                "valid": valid
            },
                              optimizer,
                              max_norm=args.max_norm)
            trainer.add_hook(early_stop_hook, hooks_per_epoch=5)
            trainer.add_loggers(StdLogger())

            self.trainer = trainer
Esempio n. 2
0
    class create_runner(object):
        def __init__(self, params):
            self.trainer, self.early_stopping = None, None

            m = LM(params['emb_dim'],
                   params['hid_dim'],
                   d,
                   num_layers=params['num_layers'],
                   cell=params['cell'],
                   dropout=params['dropout'],
                   train_init=params['train_init'],
                   deepout_layers=params['deepout_layers'],
                   maxouts=params['maxouts'],
                   word_dropout=params['word_dropout'])
            u.initialize_model(m)

            optimizer = getattr(optim, args.optim)(m.parameters(), lr=args.lr)

            self.early_stopping = EarlyStopping(5, patience=3)

            def early_stop_hook(trainer, epoch, batch_num, num_checkpoints):
                valid_loss = trainer.validate_model()
                self.early_stopping.add_checkpoint(sum(valid_loss.pack()))

            trainer = Trainer(m, {
                "train": train,
                "test": test,
                "valid": valid
            },
                              optimizer,
                              max_norm=args.max_norm)
            trainer.add_hook(early_stop_hook, hooks_per_epoch=5)
            trainer.add_loggers(StdLogger())

            self.trainer = trainer

        def __call__(self, n_iters):
            # max run will be 5 epochs
            batches = len(self.trainer.datasets['train'])
            batches = int((batches / args.max_iter) * args.max_epochs)

            if args.gpu:
                self.trainer.model.cuda()
            (_, loss), _ = self.trainer.train_batches(batches * n_iters, 10)
            self.trainer.model.cpu()

            return {'loss': loss, 'early_stop': self.early_stopping.stopped}
Esempio n. 3
0
                       rnn={
                           'type': 'orthogonal',
                           'args': {
                               'gain': 1.0
                           }
                       })

    optimizer = getattr(optim, args.optim)(model.parameters(), lr=args.lr)

    print(model)
    print()
    print('* number of parameters: {}'.format(model.n_params()))

    model.to(device=args.device)

    early_stopping = EarlyStopping(args.patience)
    trainer = Trainer(model, {
        'train': train,
        'valid': valid
    },
                      optimizer,
                      losses=('ppl', ),
                      early_stopping=early_stopping,
                      max_norm=args.max_norm,
                      checkpoint=Checkpoint('EncoderDecoder',
                                            mode='nlast',
                                            keep=3).setup(args))
    trainer.add_loggers(StdLogger())
    # trainer.add_loggers(VisdomLogger(env='encdec'))
    trainer.add_loggers(TensorboardLogger(comment='encdec'))
Esempio n. 4
0
    print(m)
    print('* number of parameters: {}'.format(m.n_params()))

    optimizer = getattr(optim, args.optim)(
        m.parameters(), lr=args.lr, betas=(0., 0.99), eps=1e-5)

    # create trainer
    trainer = Trainer(
        m, {"train": train, "test": test, "valid": valid}, optimizer,
        max_norm=args.max_norm)

    # hooks
    # - general hook
    early_stopping = None
    if args.patience > 0:
        early_stopping = EarlyStopping(10, patience=args.patience)
    model_hook = u.make_lm_hook(
        d, temperature=args.temperature, max_seq_len=args.max_seq_len,
        gpu=args.gpu, level=args.level, early_stopping=early_stopping)
    trainer.add_hook(model_hook, hooks_per_epoch=args.hooks_per_epoch)
    # - scheduled sampling hook
    if args.use_schedule:
        schedule = inflection_sigmoid(
            len(train) * args.schedule_inflection, args.schedule_steepness,
            a=args.schedule_init, inverse=True)
        trainer.add_hook(
            u.make_schedule_hook(schedule, verbose=True), hooks_per_epoch=10e4)
    # - lr schedule hook
    hook = make_lr_hook(
        optimizer, args.lr_schedule_factor, args.lr_schedule_checkpoints)
    # run a hook args.checkpoint * 4 batches
Esempio n. 5
0
    # kl annealing
    kl_schedule = kl_sigmoid_annealing_schedule(inflection=args.inflection)

    class VAETrainer(Trainer):
        def on_batch_end(self, epoch, batch, loss):
            # reset kl weight
            total_batches = len(self.datasets['train'])
            self.model.encoder.kl_weight = kl_schedule(
                batch + total_batches * epoch)

    losses = [{'loss': 'rec'},
              {'loss': 'kl', 'format': lambda loss: loss}]

    trainer = VAETrainer(
        model, {'train': train, 'valid': valid, 'test': test}, optimizer,
        losses=losses, early_stopping=EarlyStopping(5, patience=args.patience),
        max_norm=args.max_norm, scheduler=scheduler)

    # hooks
    trainer.add_hook(make_generate_hook(args.level),
                     hooks_per_epoch=args.hooks_per_epoch)
    trainer.add_hook(kl_weight_hook, hooks_per_epoch=100)
    if args.use_schedule:
        hook = u.make_schedule_hook(
            inflection_sigmoid(len(train) * 2, 1.75, inverse=True))
        trainer.add_hook(hook, hooks_per_epoch=1000)

    trainer.add_loggers(
        StdLogger(),
        # VisdomLogger(env='vae', losses=('rec', 'kl'), max_y=600)
    )
Esempio n. 6
0
               deepout_layers=args.deepout_layers,
               deepout_act=args.deepout_act,
               word_dropout=args.word_dropout,
               target_code=lang_d.get_unk(),
               conds=conds)
        u.initialize_model(m)

    print(m)
    print(' * n parameters. {}'.format(m.n_params()))

    if args.gpu:
        m.cuda()

    optimizer = getattr(optim, args.optim)(m.parameters(), lr=args.lr)
    scheduler = optim.lr_scheduler.StepLR(optimizer, 1, 0.5)
    early_stopping = EarlyStopping(max(args.patience, 10),
                                   patience=args.patience)

    # hook
    check_hook = u.make_clm_hook(d,
                                 max_seq_len=args.max_seq_len,
                                 gpu=args.gpu,
                                 sampled_conds=5,
                                 method=args.decoding_method,
                                 temperature=args.temperature)
    # logger
    std_logger = StdLogger()
    # trainer
    trainer = Trainer(m, {
        'train': train,
        'valid': valid,
        'test': test
Esempio n. 7
0
def main():
    parser = argparse.ArgumentParser()
    # dataset
    parser.add_argument('--input', type=str, default='data')
    parser.add_argument('--min_item_freq', type=int, default=50)
    parser.add_argument('--max_vocab_size', type=int, default=20000)
    parser.add_argument('--min_len', default=1, type=int)
    parser.add_argument('--max_len', default=15, type=int)
    parser.add_argument('--dev', default=0.1, type=float)
    parser.add_argument('--rnd_seed', default=12345, type=int)
    parser.add_argument('--max_items', default=None, type=int)
    parser.add_argument('--task', default='sentences', type=str)
    parser.add_argument('--level', default='word', type=str)
    parser.add_argument('--focus_size', default=15, type=int)
    parser.add_argument('--left_size', default=15, type=int)
    parser.add_argument('--right_size', default=15, type=int)
    parser.add_argument('--shingle_stride', default=None, type=int)
    parser.add_argument('--allow_overlap', action='store_true', default=False)
    parser.add_argument('--shuffle', action='store_true')
    parser.add_argument('--tokenize', action='store_true', default=False)
    parser.add_argument('--grow', action='store_true')
    parser.add_argument('--grow_n_epochs', default=1, type=int)

    # training
    parser.add_argument('--epochs', default=5, type=int)
    parser.add_argument('--gpu', action='store_true')
    parser.add_argument('--batch_size', default=30, type=int)
    parser.add_argument('--optim', default='Adam', type=str)
    parser.add_argument('--lr', default=0.0003, type=float)
    parser.add_argument('--max_norm', default=10., type=float)
    parser.add_argument('--dropout', default=0.25, type=float)
    parser.add_argument('--word_dropout', default=0.0, type=float)
    parser.add_argument('--use_schedule', action='store_true')
    parser.add_argument('--patience', default=10, type=int)
    parser.add_argument('--reverse', action='store_true')
    parser.add_argument('--batches_for_checkpoint', default=50, type=int)
    parser.add_argument('--checkpoints_for_hooks', default=1, type=int)
    parser.add_argument('--target', default='Ze was', type=str)
    parser.add_argument('--bidi', action='store_true')
    parser.add_argument('--beam', action='store_true')
    parser.add_argument('--plot', action='store_true')
    parser.add_argument('--json', type=str, default='history.json')

    # model
    parser.add_argument('--model_path', default='./model_storage', type=str)
    parser.add_argument('--num_layers', default=1, type=int)
    parser.add_argument('--emb_dim', default=64, type=int)
    parser.add_argument('--hid_dim', default=150, type=int)
    parser.add_argument('--cell', default='GRU')
    parser.add_argument('--train_init', action='store_true')
    parser.add_argument('--add_init_jitter', action='store_true')
    parser.add_argument('--encoder-summary', default='inner-attention')
    parser.add_argument('--deepout_layers', type=int, default=0)
    parser.add_argument('--att_type', type=str, default='none')

    args = parser.parse_args()

    if args.task == 'sentences' and args.level == 'word':
        args.target = [t.lower() for t in word_tokenize(args.target)]
    elif args.task == 'sentences' and args.level == 'char':
        args.target = tuple(args.target.lower())

    train, valid, vocab_dict = uz.shingle_dataset(args, vocab_dict=None)

    print(f' * vocabulary size {len(vocab_dict)}')
    print(f' * number of train batches {len(train)}')
    print(f' * number of dev batches {len(valid)}')
    print(f' * maximum batch size {args.batch_size}')

    model = make_skipthoughts_model(num_layers=args.num_layers,
                                    emb_dim=args.emb_dim,
                                    hid_dim=args.hid_dim,
                                    src_dict=vocab_dict,
                                    cell=args.cell,
                                    bidi=args.bidi,
                                    encoder_summary=args.encoder_summary,
                                    att_type=args.att_type,
                                    task=args.task,
                                    tie_weights=False)

    u.initialize_model(model,
                       rnn={
                           'type': 'orthogonal',
                           'args': {
                               'gain': 1.0
                           }
                       })

    optimizer = getattr(optim, args.optim)(model.parameters(),
                                           lr=args.lr)  #, amsgrad=True)

    print(model)
    print('* number of parameters: {}'.format(model.n_params()))

    if args.gpu:
        model.cuda()

    early_stopping = EarlyStopping(patience=args.patience, maxsize=1)
    trainer = SkipthoughtsTrainer(model, {
        'train': train,
        'valid': valid
    },
                                  optimizer,
                                  early_stopping=early_stopping,
                                  max_norm=args.max_norm)

    if args.json:
        logger = JsonLogger(json_file=args.json)
    else:
        logger = StdLogger()

    trainer.add_loggers(logger)

    trainer.set_additional_params(args, vocab_dict)

    hook = make_translation_hook(args.target,
                                 args.gpu,
                                 beam=args.beam,
                                 max_len=args.right_size)
    trainer.add_hook(hook, num_checkpoints=args.checkpoints_for_hooks)

    #hook = u.make_schedule_hook(
    #    inflection_sigmoid(len(train) * 2, 1.75, inverse=True))
    #trainer.add_hook(hook, num_checkpoints=args.checkpoints_for_hooks)

    (best_model,
     valid_loss), test_loss = trainer.train(args.epochs,
                                            args.batches_for_checkpoint,
                                            shuffle=True,
                                            use_schedule=args.use_schedule)

    u.save_checkpoint(args.model_path,
                      best_model,
                      vars(args),
                      d=vocab_dict,
                      ppl=valid_loss,
                      suffix='final')