def fork_target(self, **init_opts): import copy model = copy.deepcopy(self) model.freeze_submodule('src_embeddings') model.freeze_submodule('encoder') model.decoder.apply(u.make_initializer(**init_opts)) model.project.apply(u.make_initializer(**init_opts)) return model
dropout=args.dropout, word_dropout=args.word_dropout, bidi=not args.non_bidi, cell=args.cell, maxout=args.maxout, tie_weights=args.tie_weights, init_hidden=args.init_hidden) # model.freeze_submodule('encoder') # model.encoder.register_backward_hook(u.log_grad) # model.decoder.register_backward_hook(u.log_grad) model.apply( u.make_initializer(rnn={ 'type': 'orthogonal', 'args': { 'gain': 1.0 } })) optimizer = Optimizer(model.parameters(), args.optim, args.learning_rate, args.max_grad_norm, lr_decay=args.learning_rate_decay, start_decay_at=args.start_decay_at) criterion = make_criterion(len(src_dict), src_dict.get_pad()) print('* number of parameters: %d' % model.n_params()) print(model)
print(' * maximum batch size. %d' % batch_size) print('Building model...') model = EncoderDecoder( (args.layers, args.dec_layers), args.emb_dim, args.hid_dim, args.att_dim, src_dict, att_type=args.att_type, dropout=args.dropout, word_dropout=args.word_dropout, bidi=not args.non_bidi, cell=args.cell, maxout=args.maxout, tie_weights=args.tie_weights, init_hidden=args.init_hidden) # model.freeze_submodule('encoder') # model.encoder.register_backward_hook(u.log_grad) # model.decoder.register_backward_hook(u.log_grad) model.apply(u.make_initializer( rnn={'type': 'orthogonal', 'args': {'gain': 1.0}})) optimizer = Optimizer( model.parameters(), args.optim, args.learning_rate, args.max_grad_norm, lr_decay=args.learning_rate_decay, start_decay_at=args.start_decay_at) criterion = make_criterion(len(src_dict), src_dict.get_pad()) print('* number of parameters: %d' % model.n_params()) print(model) if args.gpu: model.cuda(), criterion.cuda() trainer = EncoderDecoderTrainer( model, {'train': train, 'valid': valid}, criterion, optimizer)
train.set_batch_size(args.batch_size), test.set_batch_size(args.batch_size) valid.set_batch_size(args.batch_size) datasets = {'train': train, 'valid': valid, 'test': test} vocab = len(train.d['src'].vocab) print("* Number of train batches %d" % len(train)) print("Building model...") hid_dim = args.hid_dim if args.dec_hid_dim == 0 else (args.hid_dim, args.dec_hid_dim) layers = args.layers if args.dec_layers == 0 else (args.layers, args.dec_layers) model = SequenceVAE( args.emb_dim, hid_dim, args.z_dim, train.d['src'], num_layers=layers, cell=args.cell, bidi=not args.non_bidi, dropout=args.dropout, add_z=args.add_z, word_dropout=args.word_dropout, tie_weights=args.tie_weights, project_init=args.project_init) print(model) model.apply(u.make_initializer()) # model.encoder.register_backward_hook(u.log_grad) if args.load_embeddings: weight = load_embeddings( train.d['src'].vocab, args.flavor, args.suffix, '~/data/word_embeddings') model.init_embeddings(weight) criterion = vae_criterion(vocab, train.d['src'].get_pad()) if args.gpu: model.cuda(), criterion.cuda()
test_data, d, args.batch_size, args.bptt, gpu=args.gpu, evaluation=True) del train_data, valid_data, test_data print(' * vocabulary size. %d' % len(d)) print(' * number of train batches. %d' % len(train)) print('Building model...') model = LM(len(d), args.emb_dim, args.hid_dim, num_layers=args.layers, cell=args.cell, dropout=args.dropout, att_dim=args.att_dim, tie_weights=args.tie_weights, deepout_layers=args.deepout_layers, deepout_act=args.deepout_act, word_dropout=args.word_dropout, target_code=d.get_unk()) model.apply(u.make_initializer()) if args.gpu: model.cuda() print(model) print('* number of parameters: %d' % model.n_params()) optim = Optimizer( model.parameters(), args.optim, args.learning_rate, args.max_grad_norm, lr_decay=args.learning_rate_decay, start_decay_at=args.start_decay_at, decay_every=args.decay_every) criterion = nn.CrossEntropyLoss() # create trainer trainer = LMTrainer(model, {"train": train, "test": test, "valid": valid}, criterion, optim)