Beispiel #1
0
 def fork_target(self, **init_opts):
     import copy
     model = copy.deepcopy(self)
     model.freeze_submodule('src_embeddings')
     model.freeze_submodule('encoder')
     model.decoder.apply(u.make_initializer(**init_opts))
     model.project.apply(u.make_initializer(**init_opts))
     return model
 def fork_target(self, **init_opts):
     import copy
     model = copy.deepcopy(self)
     model.freeze_submodule('src_embeddings')
     model.freeze_submodule('encoder')
     model.decoder.apply(u.make_initializer(**init_opts))
     model.project.apply(u.make_initializer(**init_opts))
     return model
Beispiel #3
0
                           dropout=args.dropout,
                           word_dropout=args.word_dropout,
                           bidi=not args.non_bidi,
                           cell=args.cell,
                           maxout=args.maxout,
                           tie_weights=args.tie_weights,
                           init_hidden=args.init_hidden)

    # model.freeze_submodule('encoder')
    # model.encoder.register_backward_hook(u.log_grad)
    # model.decoder.register_backward_hook(u.log_grad)

    model.apply(
        u.make_initializer(rnn={
            'type': 'orthogonal',
            'args': {
                'gain': 1.0
            }
        }))

    optimizer = Optimizer(model.parameters(),
                          args.optim,
                          args.learning_rate,
                          args.max_grad_norm,
                          lr_decay=args.learning_rate_decay,
                          start_decay_at=args.start_decay_at)

    criterion = make_criterion(len(src_dict), src_dict.get_pad())

    print('* number of parameters: %d' % model.n_params())
    print(model)
    print(' * maximum batch size. %d' % batch_size)

    print('Building model...')

    model = EncoderDecoder(
        (args.layers, args.dec_layers), args.emb_dim, args.hid_dim,
        args.att_dim, src_dict, att_type=args.att_type, dropout=args.dropout,
        word_dropout=args.word_dropout,
        bidi=not args.non_bidi, cell=args.cell, maxout=args.maxout,
        tie_weights=args.tie_weights, init_hidden=args.init_hidden)

    # model.freeze_submodule('encoder')
    # model.encoder.register_backward_hook(u.log_grad)
    # model.decoder.register_backward_hook(u.log_grad)

    model.apply(u.make_initializer(
        rnn={'type': 'orthogonal', 'args': {'gain': 1.0}}))

    optimizer = Optimizer(
        model.parameters(), args.optim, args.learning_rate, args.max_grad_norm,
        lr_decay=args.learning_rate_decay, start_decay_at=args.start_decay_at)

    criterion = make_criterion(len(src_dict), src_dict.get_pad())

    print('* number of parameters: %d' % model.n_params())
    print(model)

    if args.gpu:
        model.cuda(), criterion.cuda()

    trainer = EncoderDecoderTrainer(
        model, {'train': train, 'valid': valid}, criterion, optimizer)
Beispiel #5
0
        train.set_batch_size(args.batch_size), test.set_batch_size(args.batch_size)
        valid.set_batch_size(args.batch_size)
    datasets = {'train': train, 'valid': valid, 'test': test}
    vocab = len(train.d['src'].vocab)
    print("* Number of train batches %d" % len(train))

    print("Building model...")
    hid_dim = args.hid_dim if args.dec_hid_dim == 0 else (args.hid_dim, args.dec_hid_dim)
    layers = args.layers if args.dec_layers == 0 else (args.layers, args.dec_layers)
    model = SequenceVAE(
        args.emb_dim, hid_dim, args.z_dim, train.d['src'],
        num_layers=layers, cell=args.cell, bidi=not args.non_bidi,
        dropout=args.dropout, add_z=args.add_z, word_dropout=args.word_dropout,
        tie_weights=args.tie_weights, project_init=args.project_init)
    print(model)
    model.apply(u.make_initializer())
    # model.encoder.register_backward_hook(u.log_grad)

    if args.load_embeddings:
        weight = load_embeddings(
            train.d['src'].vocab,
            args.flavor,
            args.suffix,
            '~/data/word_embeddings')
        model.init_embeddings(weight)

    criterion = vae_criterion(vocab, train.d['src'].get_pad())

    if args.gpu:
        model.cuda(), criterion.cuda()
Beispiel #6
0
            test_data, d, args.batch_size, args.bptt, gpu=args.gpu,
            evaluation=True)
        del train_data, valid_data, test_data

    print(' * vocabulary size. %d' % len(d))
    print(' * number of train batches. %d' % len(train))

    print('Building model...')
    model = LM(len(d), args.emb_dim, args.hid_dim,
               num_layers=args.layers, cell=args.cell, dropout=args.dropout,
               att_dim=args.att_dim, tie_weights=args.tie_weights,
               deepout_layers=args.deepout_layers,
               deepout_act=args.deepout_act, word_dropout=args.word_dropout,
               target_code=d.get_unk())

    model.apply(u.make_initializer())
    if args.gpu:
        model.cuda()

    print(model)
    print('* number of parameters: %d' % model.n_params())

    optim = Optimizer(
        model.parameters(), args.optim, args.learning_rate, args.max_grad_norm,
        lr_decay=args.learning_rate_decay, start_decay_at=args.start_decay_at,
        decay_every=args.decay_every)
    criterion = nn.CrossEntropyLoss()

    # create trainer
    trainer = LMTrainer(model, {"train": train, "test": test, "valid": valid},
                        criterion, optim)