Example #1
0
    def __load(self):
        encoder = Models.Encoder(self.metadata, self.src_dict)
        decoder = Models.Decoder(self.metadata, self.trg_dict)
        model = Models.NMTModel(encoder, decoder)

        generator = nn.Sequential(
            nn.Linear(self.metadata.rnn_size, self.trg_dict.size()),
            nn.LogSoftmax(dim=1))

        model.cpu()
        generator.cpu()

        self._initializer(model, generator)

        model.generator = generator
        model.eval()

        self.model = model

        # Compute initial state
        model_state_dict, generator_state_dict = self._get_state_dicts()

        self._model_init_state = {
            k: v
            for k, v in sorted(model_state_dict.items())
            if 'generator' not in k
        }
        self._model_init_state.update({
            "generator." + k: v
            for k, v in sorted(generator_state_dict.items())
        })

        self._model_loaded = False
Example #2
0
    def get_encoder(self, enc, opt, dicts):
        opt.seq = 'encoder'

        if enc == 'nse':
            opt.layers = 2
            opt.word_vec_size = self.embed_in.weight.size(1)
            opt.rnn_size = self.embed_in.weight.size(1)
            return nse.NSE(opt)

        elif enc == 'n2n':
            opt.layers = 1
            utt_emb_sz = (dicts['src'].size(), opt.word_vec_size)
            self.embed_A = nn.Embedding(*utt_emb_sz)
            self.embed_C = nn.Embedding(*utt_emb_sz)

            return n2n.N2N(opt)

        elif enc == 'dnc':
            if opt.mem == 'dnc_lstm':
                opt.rnn_size = opt.word_vec_size
            return dnc.DNC(opt)

        elif enc == 'lstm':
            if opt.mem != 'lstm_lstm':
                opt.layers = 2
            opt.rnn_size = opt.word_vec_size
            return Models.Encoder(opt, dicts['src'])
Example #3
0
    def load_from_checkpoint(checkpoint_path, using_cuda):
        checkpoint = torch.load(checkpoint_path,
                                map_location=lambda storage, loc: storage)

        model_opt = NMTEngine.Parameters()
        model_opt.__dict__.update(checkpoint['opt'])

        src_dict = checkpoint['dicts']['src']
        trg_dict = checkpoint['dicts']['tgt']

        encoder = Models.Encoder(model_opt, src_dict)
        decoder = Models.Decoder(model_opt, trg_dict)

        model = Models.NMTModel(encoder, decoder)
        model.load_state_dict(checkpoint['model'])

        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, trg_dict.size()), nn.LogSoftmax())
        generator.load_state_dict(checkpoint['generator'])

        if using_cuda:
            model.cuda()
            generator.cuda()
        else:
            model.cpu()
            generator.cpu()

        model.generator = generator
        model.eval()

        optim = checkpoint['optim']
        optim.set_parameters(model.parameters())
        optim.optimizer.load_state_dict(
            checkpoint['optim'].optimizer.state_dict())

        return NMTEngine(model_opt, src_dict, trg_dict, model, optim,
                         checkpoint, using_cuda)
Example #4
0
    def new_instance(src_dict, trg_dict, model_params=None, random_seed=None, gpu_ids=None, init_value=0.1):
        if model_params is None:
            from nmmt import NMTEngine
            model_params = NMTEngine.Parameters()

        if gpu_ids is not None and len(gpu_ids) > 0:
            torch.cuda.set_device(gpu_ids[0])

        encoder = Models.Encoder(model_params, src_dict)
        decoder = Models.Decoder(model_params, trg_dict)
        generator = nn.Sequential(nn.Linear(model_params.rnn_size, trg_dict.size()), nn.LogSoftmax())

        model = Models.NMTModel(encoder, decoder)

        if gpu_ids is not None and len(gpu_ids) > 0:
            model.cuda()
            generator.cuda()

            if len(gpu_ids) > 1:
                model = nn.DataParallel(model, device_ids=gpu_ids, dim=1)
                generator = nn.DataParallel(generator, device_ids=gpu_ids, dim=0)
        else:
            model.cpu()
            generator.cpu()

        model.generator = generator

        for p in model.parameters():
            p.data.uniform_(-init_value, init_value)

        optim = Optim(model_params.optim, model_params.learning_rate, model_params.max_grad_norm,
                      lr_decay=model_params.learning_rate_decay, start_decay_at=model_params.start_decay_at)
        optim.set_parameters(model.parameters())

        return NMTEngineTrainer(model, optim, src_dict, trg_dict,
                                model_params=model_params, gpu_ids=gpu_ids, random_seed=random_seed)
Example #5
0
    def __init__(self,
                 embedding,
                 hidden_size=300,
                 num_layers=1,
                 bidirectional=True,
                 bridge=False):
        super(TextEncoder, self).__init__()

        self.hidden_dim = hidden_size
        self.num_layers = num_layers

        self.embedding = embedding
        self.enc_layers = self.embedding.embedding_size

        rnn_type = "GRU"
        brnn = bidirectional

        rnn_size = self.hidden_dim
        dropout = 0.3
        self.encoder = Models.RNNEncoder(rnn_type, brnn, num_layers, rnn_size,
                                         dropout, embedding, bridge)
Example #6
0
    def get_decoder(self, dec, opt, dicts):

        opt.seq = 'decoder'

        if dec == 'nse':
            opt.layers = 2
            return nse.NSE(opt)

        elif dec == 'n2n':  # implicit assumption encoder == nse
            self.embed_A = util.EmbMem(opt.word_vec_size, 'relu')
            self.embed_C = util.EmbMem(opt.word_vec_size, 'relu')

            return n2n.N2N(opt)

        elif dec == 'dnc':
            if opt.mem == 'lstm_dnc':
                opt.rnn_size = opt.word_vec_size
            return dnc.DNC(opt)

        elif dec == 'lstm':
            opt.rnn_size = opt.word_vec_size
            return Models.Decoder(opt, dicts['tgt'])
Example #7
0
    def __init__(self,
                 embeddings,
                 num_layers=1,
                 rnn_type="GRU",
                 hidden_size=300,
                 bidirectional=False):
        super(TextDecoder, self).__init__()

        global_attention = "general"
        coverage_attn = False
        context_gate = None
        copy_attn = False
        reuse_copy_attn = False
        dropout = 0.3

        self.decoder = Models.StdRNNDecoder(rnn_type, bidirectional,
                                            num_layers, hidden_size,
                                            global_attention, coverage_attn,
                                            context_gate, copy_attn, dropout,
                                            embeddings, reuse_copy_attn)

        def forward(self, *input, **kargs):
            return self.decoder(*input, **kargs)