def __load(self): encoder = Models.Encoder(self.metadata, self.src_dict) decoder = Models.Decoder(self.metadata, self.trg_dict) model = Models.NMTModel(encoder, decoder) generator = nn.Sequential( nn.Linear(self.metadata.rnn_size, self.trg_dict.size()), nn.LogSoftmax(dim=1)) model.cpu() generator.cpu() self._initializer(model, generator) model.generator = generator model.eval() self.model = model # Compute initial state model_state_dict, generator_state_dict = self._get_state_dicts() self._model_init_state = { k: v for k, v in sorted(model_state_dict.items()) if 'generator' not in k } self._model_init_state.update({ "generator." + k: v for k, v in sorted(generator_state_dict.items()) }) self._model_loaded = False
def get_encoder(self, enc, opt, dicts): opt.seq = 'encoder' if enc == 'nse': opt.layers = 2 opt.word_vec_size = self.embed_in.weight.size(1) opt.rnn_size = self.embed_in.weight.size(1) return nse.NSE(opt) elif enc == 'n2n': opt.layers = 1 utt_emb_sz = (dicts['src'].size(), opt.word_vec_size) self.embed_A = nn.Embedding(*utt_emb_sz) self.embed_C = nn.Embedding(*utt_emb_sz) return n2n.N2N(opt) elif enc == 'dnc': if opt.mem == 'dnc_lstm': opt.rnn_size = opt.word_vec_size return dnc.DNC(opt) elif enc == 'lstm': if opt.mem != 'lstm_lstm': opt.layers = 2 opt.rnn_size = opt.word_vec_size return Models.Encoder(opt, dicts['src'])
def load_from_checkpoint(checkpoint_path, using_cuda): checkpoint = torch.load(checkpoint_path, map_location=lambda storage, loc: storage) model_opt = NMTEngine.Parameters() model_opt.__dict__.update(checkpoint['opt']) src_dict = checkpoint['dicts']['src'] trg_dict = checkpoint['dicts']['tgt'] encoder = Models.Encoder(model_opt, src_dict) decoder = Models.Decoder(model_opt, trg_dict) model = Models.NMTModel(encoder, decoder) model.load_state_dict(checkpoint['model']) generator = nn.Sequential( nn.Linear(model_opt.rnn_size, trg_dict.size()), nn.LogSoftmax()) generator.load_state_dict(checkpoint['generator']) if using_cuda: model.cuda() generator.cuda() else: model.cpu() generator.cpu() model.generator = generator model.eval() optim = checkpoint['optim'] optim.set_parameters(model.parameters()) optim.optimizer.load_state_dict( checkpoint['optim'].optimizer.state_dict()) return NMTEngine(model_opt, src_dict, trg_dict, model, optim, checkpoint, using_cuda)
def new_instance(src_dict, trg_dict, model_params=None, random_seed=None, gpu_ids=None, init_value=0.1): if model_params is None: from nmmt import NMTEngine model_params = NMTEngine.Parameters() if gpu_ids is not None and len(gpu_ids) > 0: torch.cuda.set_device(gpu_ids[0]) encoder = Models.Encoder(model_params, src_dict) decoder = Models.Decoder(model_params, trg_dict) generator = nn.Sequential(nn.Linear(model_params.rnn_size, trg_dict.size()), nn.LogSoftmax()) model = Models.NMTModel(encoder, decoder) if gpu_ids is not None and len(gpu_ids) > 0: model.cuda() generator.cuda() if len(gpu_ids) > 1: model = nn.DataParallel(model, device_ids=gpu_ids, dim=1) generator = nn.DataParallel(generator, device_ids=gpu_ids, dim=0) else: model.cpu() generator.cpu() model.generator = generator for p in model.parameters(): p.data.uniform_(-init_value, init_value) optim = Optim(model_params.optim, model_params.learning_rate, model_params.max_grad_norm, lr_decay=model_params.learning_rate_decay, start_decay_at=model_params.start_decay_at) optim.set_parameters(model.parameters()) return NMTEngineTrainer(model, optim, src_dict, trg_dict, model_params=model_params, gpu_ids=gpu_ids, random_seed=random_seed)
def __init__(self, embedding, hidden_size=300, num_layers=1, bidirectional=True, bridge=False): super(TextEncoder, self).__init__() self.hidden_dim = hidden_size self.num_layers = num_layers self.embedding = embedding self.enc_layers = self.embedding.embedding_size rnn_type = "GRU" brnn = bidirectional rnn_size = self.hidden_dim dropout = 0.3 self.encoder = Models.RNNEncoder(rnn_type, brnn, num_layers, rnn_size, dropout, embedding, bridge)
def get_decoder(self, dec, opt, dicts): opt.seq = 'decoder' if dec == 'nse': opt.layers = 2 return nse.NSE(opt) elif dec == 'n2n': # implicit assumption encoder == nse self.embed_A = util.EmbMem(opt.word_vec_size, 'relu') self.embed_C = util.EmbMem(opt.word_vec_size, 'relu') return n2n.N2N(opt) elif dec == 'dnc': if opt.mem == 'lstm_dnc': opt.rnn_size = opt.word_vec_size return dnc.DNC(opt) elif dec == 'lstm': opt.rnn_size = opt.word_vec_size return Models.Decoder(opt, dicts['tgt'])
def __init__(self, embeddings, num_layers=1, rnn_type="GRU", hidden_size=300, bidirectional=False): super(TextDecoder, self).__init__() global_attention = "general" coverage_attn = False context_gate = None copy_attn = False reuse_copy_attn = False dropout = 0.3 self.decoder = Models.StdRNNDecoder(rnn_type, bidirectional, num_layers, hidden_size, global_attention, coverage_attn, context_gate, copy_attn, dropout, embeddings, reuse_copy_attn) def forward(self, *input, **kargs): return self.decoder(*input, **kargs)