Ejemplo n.º 1
0
    def _load_model(self):
        print('Loading pretrained model')
        if self.config['model']['seq2seq'] == 'vanilla':
            print('Loading Seq2Seq Vanilla model')

            self.model = Seq2Seq(
                src_emb_dim=self.config['model']['dim_word_src'],
                trg_emb_dim=self.config['model']['dim_word_trg'],
                src_vocab_size=len(self.src_dict),
                trg_vocab_size=len(self.tgt_dict),
                src_hidden_dim=self.config['model']['dim'],
                trg_hidden_dim=self.config['model']['dim'],
                batch_size=self.config['data']['batch_size'],
                bidirectional=self.config['model']['bidirectional'],
                pad_token_src=self.src_dict['<pad>'],
                pad_token_trg=self.tgt_dict['<pad>'],
                nlayers=self.config['model']['n_layers_src'],
                nlayers_trg=self.config['model']['n_layers_trg'],
                dropout=0.
            ).cuda()

        elif self.config['model']['seq2seq'] == 'dialog':

            self.model = Seq2SeqAttentionSharedEmbedding(
                emb_dim=self.config['model']['dim_word_src'],
                type_emb_dim=self.config['model']['dim_type_emb'],
                vocab_size=len(self.src['word2id']),
                type_size=self.config['model']["type_size"],
                src_hidden_dim=self.config['model']['dim'],
                trg_hidden_dim=self.config['model']['dim'],
                ctx_hidden_dim=self.config['model']['dim'],

                attention_mode='dot',
                batch_size=self.config['data']['batch_size'],
                bidirectional=self.config['model']['bidirectional'],
                pad_token_src=self.src['word2id']['<pad>'],
                pad_token_trg=self.trg['word2id']['<pad>'],
                nlayers=self.config['model']['n_layers_src'],
                nlayers_trg=self.config['model']['n_layers_trg'],
                dropout=0.05
            ).cuda()

        self.model.load_state_dict(torch.load(
            open(self.model_weights)
        ))
Ejemplo n.º 2
0
logging.info('Learning Rate : %f ' % (config['training']['lrate']))

logging.info('Found %d words ' % (vocab_size))

weight_mask = torch.ones(vocab_size).cuda()
weight_mask[trg['word2id']['<pad>']] = 0
loss_criterion = nn.CrossEntropyLoss(weight=weight_mask).cuda()

model = Seq2SeqAttentionSharedEmbedding(
    emb_dim=config['model']['dim_word_src'],
    vocab_size=vocab_size,
    src_hidden_dim=config['model']['dim'],
    trg_hidden_dim=config['model']['dim'],
    ctx_hidden_dim=config['model']['dim'],
    attention_mode='dot',
    batch_size=batch_size,
    bidirectional=config['model']['bidirectional'],
    pad_token_src=src['word2id']['<pad>'],
    pad_token_trg=trg['word2id']['<pad>'],
    nlayers=config['model']['n_layers_src'],
    nlayers_trg=config['model']['n_layers_trg'],
    dropout=0.,
).cuda()

if load_dir:
    model.load_state_dict(torch.load(
        open(load_dir)
    ))

# __TODO__ Make this more flexible for other learning methods.
if config['training']['optimizer'] == 'adam':