Example #1
0
    def __load(self):
        encoder = Models.Encoder(self.metadata, self.src_dict)
        decoder = Models.Decoder(self.metadata, self.trg_dict)
        model = Models.NMTModel(encoder, decoder)

        generator = nn.Sequential(
            nn.Linear(self.metadata.rnn_size, self.trg_dict.size()),
            nn.LogSoftmax(dim=1))

        model.cpu()
        generator.cpu()

        self._initializer(model, generator)

        model.generator = generator
        model.eval()

        self.model = model

        # Compute initial state
        model_state_dict, generator_state_dict = self._get_state_dicts()

        self._model_init_state = {
            k: v
            for k, v in sorted(model_state_dict.items())
            if 'generator' not in k
        }
        self._model_init_state.update({
            "generator." + k: v
            for k, v in sorted(generator_state_dict.items())
        })

        self._model_loaded = False
Example #2
0
    def get_decoder(self, dec, opt, dicts):

        opt.seq = 'decoder'

        if dec == 'nse':
            opt.layers = 2
            return nse.NSE(opt)

        elif dec == 'n2n':  # implicit assumption encoder == nse
            self.embed_A = util.EmbMem(opt.word_vec_size, 'relu')
            self.embed_C = util.EmbMem(opt.word_vec_size, 'relu')

            return n2n.N2N(opt)

        elif dec == 'dnc':
            if opt.mem == 'lstm_dnc':
                opt.rnn_size = opt.word_vec_size
            return dnc.DNC(opt)

        elif dec == 'lstm':
            opt.rnn_size = opt.word_vec_size
            return Models.Decoder(opt, dicts['tgt'])
Example #3
0
    def load_from_checkpoint(checkpoint_path, using_cuda):
        checkpoint = torch.load(checkpoint_path,
                                map_location=lambda storage, loc: storage)

        model_opt = NMTEngine.Parameters()
        model_opt.__dict__.update(checkpoint['opt'])

        src_dict = checkpoint['dicts']['src']
        trg_dict = checkpoint['dicts']['tgt']

        encoder = Models.Encoder(model_opt, src_dict)
        decoder = Models.Decoder(model_opt, trg_dict)

        model = Models.NMTModel(encoder, decoder)
        model.load_state_dict(checkpoint['model'])

        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, trg_dict.size()), nn.LogSoftmax())
        generator.load_state_dict(checkpoint['generator'])

        if using_cuda:
            model.cuda()
            generator.cuda()
        else:
            model.cpu()
            generator.cpu()

        model.generator = generator
        model.eval()

        optim = checkpoint['optim']
        optim.set_parameters(model.parameters())
        optim.optimizer.load_state_dict(
            checkpoint['optim'].optimizer.state_dict())

        return NMTEngine(model_opt, src_dict, trg_dict, model, optim,
                         checkpoint, using_cuda)
Example #4
0
    def new_instance(src_dict, trg_dict, model_params=None, random_seed=None, gpu_ids=None, init_value=0.1):
        if model_params is None:
            from nmmt import NMTEngine
            model_params = NMTEngine.Parameters()

        if gpu_ids is not None and len(gpu_ids) > 0:
            torch.cuda.set_device(gpu_ids[0])

        encoder = Models.Encoder(model_params, src_dict)
        decoder = Models.Decoder(model_params, trg_dict)
        generator = nn.Sequential(nn.Linear(model_params.rnn_size, trg_dict.size()), nn.LogSoftmax())

        model = Models.NMTModel(encoder, decoder)

        if gpu_ids is not None and len(gpu_ids) > 0:
            model.cuda()
            generator.cuda()

            if len(gpu_ids) > 1:
                model = nn.DataParallel(model, device_ids=gpu_ids, dim=1)
                generator = nn.DataParallel(generator, device_ids=gpu_ids, dim=0)
        else:
            model.cpu()
            generator.cpu()

        model.generator = generator

        for p in model.parameters():
            p.data.uniform_(-init_value, init_value)

        optim = Optim(model_params.optim, model_params.learning_rate, model_params.max_grad_norm,
                      lr_decay=model_params.learning_rate_decay, start_decay_at=model_params.start_decay_at)
        optim.set_parameters(model.parameters())

        return NMTEngineTrainer(model, optim, src_dict, trg_dict,
                                model_params=model_params, gpu_ids=gpu_ids, random_seed=random_seed)