def __load(self): encoder = Models.Encoder(self.metadata, self.src_dict) decoder = Models.Decoder(self.metadata, self.trg_dict) model = Models.NMTModel(encoder, decoder) generator = nn.Sequential( nn.Linear(self.metadata.rnn_size, self.trg_dict.size()), nn.LogSoftmax(dim=1)) model.cpu() generator.cpu() self._initializer(model, generator) model.generator = generator model.eval() self.model = model # Compute initial state model_state_dict, generator_state_dict = self._get_state_dicts() self._model_init_state = { k: v for k, v in sorted(model_state_dict.items()) if 'generator' not in k } self._model_init_state.update({ "generator." + k: v for k, v in sorted(generator_state_dict.items()) }) self._model_loaded = False
def get_encoder(self, enc, opt, dicts): opt.seq = 'encoder' if enc == 'nse': opt.layers = 2 opt.word_vec_size = self.embed_in.weight.size(1) opt.rnn_size = self.embed_in.weight.size(1) return nse.NSE(opt) elif enc == 'n2n': opt.layers = 1 utt_emb_sz = (dicts['src'].size(), opt.word_vec_size) self.embed_A = nn.Embedding(*utt_emb_sz) self.embed_C = nn.Embedding(*utt_emb_sz) return n2n.N2N(opt) elif enc == 'dnc': if opt.mem == 'dnc_lstm': opt.rnn_size = opt.word_vec_size return dnc.DNC(opt) elif enc == 'lstm': if opt.mem != 'lstm_lstm': opt.layers = 2 opt.rnn_size = opt.word_vec_size return Models.Encoder(opt, dicts['src'])
def load_from_checkpoint(checkpoint_path, using_cuda): checkpoint = torch.load(checkpoint_path, map_location=lambda storage, loc: storage) model_opt = NMTEngine.Parameters() model_opt.__dict__.update(checkpoint['opt']) src_dict = checkpoint['dicts']['src'] trg_dict = checkpoint['dicts']['tgt'] encoder = Models.Encoder(model_opt, src_dict) decoder = Models.Decoder(model_opt, trg_dict) model = Models.NMTModel(encoder, decoder) model.load_state_dict(checkpoint['model']) generator = nn.Sequential( nn.Linear(model_opt.rnn_size, trg_dict.size()), nn.LogSoftmax()) generator.load_state_dict(checkpoint['generator']) if using_cuda: model.cuda() generator.cuda() else: model.cpu() generator.cpu() model.generator = generator model.eval() optim = checkpoint['optim'] optim.set_parameters(model.parameters()) optim.optimizer.load_state_dict( checkpoint['optim'].optimizer.state_dict()) return NMTEngine(model_opt, src_dict, trg_dict, model, optim, checkpoint, using_cuda)
def new_instance(src_dict, trg_dict, model_params=None, random_seed=None, gpu_ids=None, init_value=0.1): if model_params is None: from nmmt import NMTEngine model_params = NMTEngine.Parameters() if gpu_ids is not None and len(gpu_ids) > 0: torch.cuda.set_device(gpu_ids[0]) encoder = Models.Encoder(model_params, src_dict) decoder = Models.Decoder(model_params, trg_dict) generator = nn.Sequential(nn.Linear(model_params.rnn_size, trg_dict.size()), nn.LogSoftmax()) model = Models.NMTModel(encoder, decoder) if gpu_ids is not None and len(gpu_ids) > 0: model.cuda() generator.cuda() if len(gpu_ids) > 1: model = nn.DataParallel(model, device_ids=gpu_ids, dim=1) generator = nn.DataParallel(generator, device_ids=gpu_ids, dim=0) else: model.cpu() generator.cpu() model.generator = generator for p in model.parameters(): p.data.uniform_(-init_value, init_value) optim = Optim(model_params.optim, model_params.learning_rate, model_params.max_grad_norm, lr_decay=model_params.learning_rate_decay, start_decay_at=model_params.start_decay_at) optim.set_parameters(model.parameters()) return NMTEngineTrainer(model, optim, src_dict, trg_dict, model_params=model_params, gpu_ids=gpu_ids, random_seed=random_seed)