Example #1
0
        def __init__(self, params):
            self.trainer, self.early_stopping = None, None

            m = LM(params['emb_dim'],
                   params['hid_dim'],
                   d,
                   num_layers=params['num_layers'],
                   cell=params['cell'],
                   dropout=params['dropout'],
                   train_init=params['train_init'],
                   deepout_layers=params['deepout_layers'],
                   maxouts=params['maxouts'],
                   word_dropout=params['word_dropout'])
            u.initialize_model(m)

            optimizer = getattr(optim, args.optim)(m.parameters(), lr=args.lr)

            self.early_stopping = EarlyStopping(5, patience=3)

            def early_stop_hook(trainer, epoch, batch_num, num_checkpoints):
                valid_loss = trainer.validate_model()
                self.early_stopping.add_checkpoint(sum(valid_loss.pack()))

            trainer = Trainer(m, {
                "train": train,
                "test": test,
                "valid": valid
            },
                              optimizer,
                              max_norm=args.max_norm)
            trainer.add_hook(early_stop_hook, hooks_per_epoch=5)
            trainer.add_loggers(StdLogger())

            self.trainer = trainer
Example #2
0
    def from_lm(cls, lm, embeddings=None, **kwargs):
        if embeddings is not None:
            if embeddings.weight.size(1) != lm.embeddings.weight.size(1):
                raise ValueError("Uncompatible embedding matrices")

            # Initialize embeddings to random values to account for OOVs
            # or use the unknown embedding from the LM instead if available
            vocab, unk = len(embeddings.d), lm.embeddings.d.get_unk()
            if unk is not None:
                embeddings.weight.data.copy_(
                    lm.embeddings.weight.data[unk].unsqueeze(0).expand(
                        vocab, embeddings.embedding_dim))
            else:
                import seqmod.utils as u
                u.initialize_model(embeddings)

            found, target = 0, {w: idx for idx, w in enumerate(lm.embeddings.d.vocab)}
            for idx, w in enumerate(embeddings.d.vocab):
                if w not in target:
                    continue
                found += 1
                embeddings.weight.data[idx].copy_(lm.embeddings.weight.data[target[w]])
            logging.warn("Initialized [%d/%d] embs from LM" % (found, vocab))

        else:
            logging.warn("Reusing LM embedding vocabulary. This vocabulary might not "
                         "correspond to the input data if it wasn't processed with "
                         "the same Dict")
            embeddings = lm.embeddings

        hid_dim, layers = lm.rnn.hidden_size, lm.rnn.num_layers
        cell, bidi = type(lm.rnn).__name__, kwargs.pop('bidi', False)
        if bidi:
            logging.warn('Cannot initialize bidirectional layers from sequential LM. '
                         'The bidirectional option will be ignored')
        inst = cls(embeddings, hid_dim, layers, cell, bidi=False, **kwargs)
        for param, weight in inst.rnn.named_parameters():
            weight.data.copy_(getattr(lm.rnn, param).data)

        return inst
Example #3
0
        reuse_hidden=args.att_type.lower() != 'none',
        dropout=args.dropout,
        input_feed=args.input_feed,
        word_dropout=args.word_dropout,
        deepout_layers=args.deepout_layers,
        tie_weights=args.tie_weights,
        reverse=args.reverse)

    # model.freeze_submodule('encoder')
    # model.encoder.register_backward_hook(u.log_grad)
    # model.decoder.register_backward_hook(u.log_grad)

    u.initialize_model(model,
                       rnn={
                           'type': 'orthogonal',
                           'args': {
                               'gain': 1.0
                           }
                       })

    optimizer = getattr(optim, args.optim)(model.parameters(), lr=args.lr)

    print(model)
    print()
    print('* number of parameters: {}'.format(model.n_params()))

    model.to(device=args.device)

    early_stopping = EarlyStopping(args.patience)
    trainer = Trainer(model, {
        'train': train,
Example #4
0
    print("Building model...")
    model = SequenceVAE(args.emb_dim,
                        args.hid_dim,
                        args.z_dim,
                        train.d['src'],
                        num_layers=args.num_layers,
                        cell=args.cell,
                        dropout=args.dropout,
                        add_z=args.add_z,
                        word_dropout=args.word_dropout,
                        tie_weights=args.tie_weights,
                        project_init=args.project_init,
                        inflection_point=args.inflection_point)
    print(model)

    u.initialize_model(model)

    if args.load_embeddings:
        weight = load_embeddings(train.d['src'].vocab, args.flavor,
                                 args.suffix, '~/data/word_embeddings')
        model.init_embeddings(weight)

    if args.gpu:
        model.cuda()

    def on_lr_update(old_lr, new_lr):
        trainer.log("info", "Resetting lr [%g -> %g]" % (old_lr, new_lr))

    optimizer = Optimizer(
        model.parameters(),
        args.optim,
Example #5
0
                data, d, args.batch_size, args.bptt, device=args.device,
            ).splits(test=args.test_split, dev=args.dev_split)

    print(' * vocabulary size. {}'.format(len(d)))
    print(' * number of train batches. {}'.format(len(train)))

    print('Building model...')
    m = LM(args.emb_dim, args.hid_dim, d, exposure_rate=args.schedule_init,
           num_layers=args.num_layers, cell=args.cell, dropout=args.dropout,
           att_dim=args.att_dim, tie_weights=args.tie_weights, mixtures=args.mixtures,
           deepout_layers=args.deepout_layers, train_init=args.train_init,
           deepout_act=args.deepout_act, maxouts=args.maxouts,
           sampled_softmax=args.sampled_softmax, word_dropout=args.word_dropout)

    u.initialize_model(
        m,
        rnn={'type': 'orthogonal_', 'args': {'gain': 1.0}},
        emb={'type': 'uniform_', 'args': {'a': -0.05, 'b': 0.05}})

    m.to(device=args.device)

    print(m)
    print('* number of parameters: {}'.format(m.n_params()))

    if args.optim == 'Adam':
        optimizer = getattr(optim, args.optim)(
            m.parameters(), lr=args.lr, betas=(0., 0.99), eps=1e-5)
    else:
        optimizer = getattr(optim, args.optim)(m.parameters(), lr=args.lr)

    # create trainer
    loss_type = 'bpc' if args.level == 'char' else 'ppl'
Example #6
0
                                     word_dropout=args.word_dropout,
                                     reuse_hidden=False,
                                     input_feed=False,
                                     att_type=None,
                                     cond_dims=cond_dims,
                                     cond_vocabs=cond_vocabs,
                                     add_init_jitter=True,
                                     train_init=args.train_init,
                                     reverse=args.reverse)

        losses, weights = ('ppl', ), None

    print(m)
    print('* number of params: ', sum(p.nelement() for p in m.parameters()))

    u.initialize_model(m)
    if args.init_embeddings:
        m.encoder.embeddings.init_embeddings_from_file(args.embeddings_path,
                                                       verbose=True)

    m.to(args.device)

    optimizer = getattr(optim, args.optimizer)(m.parameters(), lr=args.lr)
    # Decrease lr by a factor after each epoch
    scheduler = optim.lr_scheduler.StepLR(optimizer, args.lr_schedule_epochs,
                                          args.lr_schedule_factor)

    model_name = 'AE.GRL{}.C{}'.format(str(args.grl), str(conditional))
    trainer = Trainer(m, {
        'train': train,
        'valid': valid
Example #7
0
                    args.mode,
                    cell=args.cell,
                    hid_dim=args.hid_dim,
                    num_layers=args.num_layers,
                    summary=args.summary,
                    softmax=args.softmax,
                    dropout=args.dropout)

    print("Initializing parameters ...")
    utils.initialize_model(m,
                           rnn={
                               'type': 'rnn_orthogonal',
                               'args': {
                                   'forget_bias': True
                               }
                           },
                           emb={
                               'type': 'uniform_',
                               'args': {
                                   'a': -0.1,
                                   'b': 0.1
                               }
                           })

    if args.init_embeddings:
        embeddings.init_embeddings_from_file(args.embeddings_path,
                                             verbose=True)

    m.to(device=args.device)

    optimizer = getattr(optim, args.optim)(m.parameters(), lr=args.lr)
    losses = [{
Example #8
0
def main():
    parser = argparse.ArgumentParser()
    # dataset
    parser.add_argument('--input', type=str, default='data')
    parser.add_argument('--min_item_freq', type=int, default=50)
    parser.add_argument('--max_vocab_size', type=int, default=20000)
    parser.add_argument('--min_len', default=1, type=int)
    parser.add_argument('--max_len', default=15, type=int)
    parser.add_argument('--dev', default=0.1, type=float)
    parser.add_argument('--rnd_seed', default=12345, type=int)
    parser.add_argument('--max_items', default=None, type=int)
    parser.add_argument('--task', default='sentences', type=str)
    parser.add_argument('--level', default='word', type=str)
    parser.add_argument('--focus_size', default=15, type=int)
    parser.add_argument('--left_size', default=15, type=int)
    parser.add_argument('--right_size', default=15, type=int)
    parser.add_argument('--shingle_stride', default=None, type=int)
    parser.add_argument('--allow_overlap', action='store_true', default=False)
    parser.add_argument('--shuffle', action='store_true')
    parser.add_argument('--tokenize', action='store_true', default=False)
    parser.add_argument('--grow', action='store_true')
    parser.add_argument('--grow_n_epochs', default=1, type=int)

    # training
    parser.add_argument('--epochs', default=5, type=int)
    parser.add_argument('--gpu', action='store_true')
    parser.add_argument('--batch_size', default=30, type=int)
    parser.add_argument('--optim', default='Adam', type=str)
    parser.add_argument('--lr', default=0.0003, type=float)
    parser.add_argument('--max_norm', default=10., type=float)
    parser.add_argument('--dropout', default=0.25, type=float)
    parser.add_argument('--word_dropout', default=0.0, type=float)
    parser.add_argument('--use_schedule', action='store_true')
    parser.add_argument('--patience', default=10, type=int)
    parser.add_argument('--reverse', action='store_true')
    parser.add_argument('--batches_for_checkpoint', default=50, type=int)
    parser.add_argument('--checkpoints_for_hooks', default=1, type=int)
    parser.add_argument('--target', default='Ze was', type=str)
    parser.add_argument('--bidi', action='store_true')
    parser.add_argument('--beam', action='store_true')
    parser.add_argument('--plot', action='store_true')
    parser.add_argument('--json', type=str, default='history.json')

    # model
    parser.add_argument('--model_path', default='./model_storage', type=str)
    parser.add_argument('--num_layers', default=1, type=int)
    parser.add_argument('--emb_dim', default=64, type=int)
    parser.add_argument('--hid_dim', default=150, type=int)
    parser.add_argument('--cell', default='GRU')
    parser.add_argument('--train_init', action='store_true')
    parser.add_argument('--add_init_jitter', action='store_true')
    parser.add_argument('--encoder-summary', default='inner-attention')
    parser.add_argument('--deepout_layers', type=int, default=0)
    parser.add_argument('--att_type', type=str, default='none')

    args = parser.parse_args()

    if args.task == 'sentences' and args.level == 'word':
        args.target = [t.lower() for t in word_tokenize(args.target)]
    elif args.task == 'sentences' and args.level == 'char':
        args.target = tuple(args.target.lower())

    train, valid, vocab_dict = uz.shingle_dataset(args, vocab_dict=None)

    print(f' * vocabulary size {len(vocab_dict)}')
    print(f' * number of train batches {len(train)}')
    print(f' * number of dev batches {len(valid)}')
    print(f' * maximum batch size {args.batch_size}')

    model = make_skipthoughts_model(num_layers=args.num_layers,
                                    emb_dim=args.emb_dim,
                                    hid_dim=args.hid_dim,
                                    src_dict=vocab_dict,
                                    cell=args.cell,
                                    bidi=args.bidi,
                                    encoder_summary=args.encoder_summary,
                                    att_type=args.att_type,
                                    task=args.task,
                                    tie_weights=False)

    u.initialize_model(model,
                       rnn={
                           'type': 'orthogonal',
                           'args': {
                               'gain': 1.0
                           }
                       })

    optimizer = getattr(optim, args.optim)(model.parameters(),
                                           lr=args.lr)  #, amsgrad=True)

    print(model)
    print('* number of parameters: {}'.format(model.n_params()))

    if args.gpu:
        model.cuda()

    early_stopping = EarlyStopping(patience=args.patience, maxsize=1)
    trainer = SkipthoughtsTrainer(model, {
        'train': train,
        'valid': valid
    },
                                  optimizer,
                                  early_stopping=early_stopping,
                                  max_norm=args.max_norm)

    if args.json:
        logger = JsonLogger(json_file=args.json)
    else:
        logger = StdLogger()

    trainer.add_loggers(logger)

    trainer.set_additional_params(args, vocab_dict)

    hook = make_translation_hook(args.target,
                                 args.gpu,
                                 beam=args.beam,
                                 max_len=args.right_size)
    trainer.add_hook(hook, num_checkpoints=args.checkpoints_for_hooks)

    #hook = u.make_schedule_hook(
    #    inflection_sigmoid(len(train) * 2, 1.75, inverse=True))
    #trainer.add_hook(hook, num_checkpoints=args.checkpoints_for_hooks)

    (best_model,
     valid_loss), test_loss = trainer.train(args.epochs,
                                            args.batches_for_checkpoint,
                                            shuffle=True,
                                            use_schedule=args.use_schedule)

    u.save_checkpoint(args.model_path,
                      best_model,
                      vars(args),
                      d=vocab_dict,
                      ppl=valid_loss,
                      suffix='final')