def __load(self): encoder = Models.Encoder(self.metadata, self.src_dict) decoder = Models.Decoder(self.metadata, self.trg_dict) model = Models.NMTModel(encoder, decoder) generator = nn.Sequential( nn.Linear(self.metadata.rnn_size, self.trg_dict.size()), nn.LogSoftmax(dim=1)) model.cpu() generator.cpu() self._initializer(model, generator) model.generator = generator model.eval() self.model = model # Compute initial state model_state_dict, generator_state_dict = self._get_state_dicts() self._model_init_state = { k: v for k, v in sorted(model_state_dict.items()) if 'generator' not in k } self._model_init_state.update({ "generator." + k: v for k, v in sorted(generator_state_dict.items()) }) self._model_loaded = False
def get_decoder(self, dec, opt, dicts): opt.seq = 'decoder' if dec == 'nse': opt.layers = 2 return nse.NSE(opt) elif dec == 'n2n': # implicit assumption encoder == nse self.embed_A = util.EmbMem(opt.word_vec_size, 'relu') self.embed_C = util.EmbMem(opt.word_vec_size, 'relu') return n2n.N2N(opt) elif dec == 'dnc': if opt.mem == 'lstm_dnc': opt.rnn_size = opt.word_vec_size return dnc.DNC(opt) elif dec == 'lstm': opt.rnn_size = opt.word_vec_size return Models.Decoder(opt, dicts['tgt'])
def load_from_checkpoint(checkpoint_path, using_cuda): checkpoint = torch.load(checkpoint_path, map_location=lambda storage, loc: storage) model_opt = NMTEngine.Parameters() model_opt.__dict__.update(checkpoint['opt']) src_dict = checkpoint['dicts']['src'] trg_dict = checkpoint['dicts']['tgt'] encoder = Models.Encoder(model_opt, src_dict) decoder = Models.Decoder(model_opt, trg_dict) model = Models.NMTModel(encoder, decoder) model.load_state_dict(checkpoint['model']) generator = nn.Sequential( nn.Linear(model_opt.rnn_size, trg_dict.size()), nn.LogSoftmax()) generator.load_state_dict(checkpoint['generator']) if using_cuda: model.cuda() generator.cuda() else: model.cpu() generator.cpu() model.generator = generator model.eval() optim = checkpoint['optim'] optim.set_parameters(model.parameters()) optim.optimizer.load_state_dict( checkpoint['optim'].optimizer.state_dict()) return NMTEngine(model_opt, src_dict, trg_dict, model, optim, checkpoint, using_cuda)
def new_instance(src_dict, trg_dict, model_params=None, random_seed=None, gpu_ids=None, init_value=0.1): if model_params is None: from nmmt import NMTEngine model_params = NMTEngine.Parameters() if gpu_ids is not None and len(gpu_ids) > 0: torch.cuda.set_device(gpu_ids[0]) encoder = Models.Encoder(model_params, src_dict) decoder = Models.Decoder(model_params, trg_dict) generator = nn.Sequential(nn.Linear(model_params.rnn_size, trg_dict.size()), nn.LogSoftmax()) model = Models.NMTModel(encoder, decoder) if gpu_ids is not None and len(gpu_ids) > 0: model.cuda() generator.cuda() if len(gpu_ids) > 1: model = nn.DataParallel(model, device_ids=gpu_ids, dim=1) generator = nn.DataParallel(generator, device_ids=gpu_ids, dim=0) else: model.cpu() generator.cpu() model.generator = generator for p in model.parameters(): p.data.uniform_(-init_value, init_value) optim = Optim(model_params.optim, model_params.learning_rate, model_params.max_grad_norm, lr_decay=model_params.learning_rate_decay, start_decay_at=model_params.start_decay_at) optim.set_parameters(model.parameters()) return NMTEngineTrainer(model, optim, src_dict, trg_dict, model_params=model_params, gpu_ids=gpu_ids, random_seed=random_seed)