예제 #1
0
    def nmtmodel_forward(self, opt, source_l=3, bsize=1):
        """
        Creates a nmtmodel with a custom opt function.
        Forwards a testbatch and checks output size.

        Args:
            opt: Namespace with options
            source_l: length of input sequence
            bsize: batchsize
        """
        word_dict = self.get_vocab()
        feature_dicts = []

        embeddings = make_embeddings(opt, word_dict, feature_dicts)
        enc = make_encoder(opt, embeddings)

        embeddings = make_embeddings(opt,
                                     word_dict,
                                     feature_dicts,
                                     for_encoder=False)
        dec = make_decoder(opt, embeddings)

        model = onmt.models.model.NMTModel(enc, dec)

        test_src, test_tgt, test_length = self.get_batch(source_l=source_l,
                                                         bsize=bsize)
        outputs, attn, _ = model(test_src, test_tgt, test_length)
        outputsize = torch.zeros(source_l - 1, bsize, opt.rnn_size)
        # Make sure that output has the correct size and type
        self.assertEqual(outputs.size(), outputsize.size())
        self.assertEqual(type(outputs), torch.autograd.Variable)
        self.assertEqual(type(outputs.data), torch.FloatTensor)
예제 #2
0
    def encoder_forward(self, opt, source_l=3, bsize=1):
        '''
        Tests if the encoder works as expected

        args:
            opt: set of options
            source_l: Length of generated input sentence
            bsize: Batchsize of generated input
        '''
        word_dict = self.get_vocab()
        feature_dicts = []
        embeddings = make_embeddings(opt, word_dict, feature_dicts)
        enc = make_encoder(opt, embeddings)

        test_src, test_tgt, test_length = self.get_batch(source_l=source_l,
                                                         bsize=bsize)

        hidden_t, outputs = enc(test_src, test_length)

        # Initialize vectors to compare size with
        test_hid = torch.zeros(self.opt.enc_layers, bsize, opt.rnn_size)
        test_out = torch.zeros(source_l, bsize, opt.rnn_size)

        # Ensure correct sizes and types
        self.assertEqual(test_hid.size(), hidden_t[0].size(),
                         hidden_t[1].size())
        self.assertEqual(test_out.size(), outputs.size())
        self.assertEqual(type(outputs), torch.autograd.Variable)
        self.assertEqual(type(outputs.data), torch.FloatTensor)
예제 #3
0
    def audiomodel_forward(self, opt, tgt_l=2, bsize=1, t=37):
        """
        Creates a speech-to-text nmtmodel with a custom opt function.
        Forwards a testbatch and checks output size.

        Args:
            opt: Namespace with options
            source_l: length of input sequence
            bsize: batchsize
        """
        if opt.encoder_type == 'transformer' or opt.encoder_type == 'cnn':
            return

        word_dict = self.get_vocab()
        feature_dicts = []

        enc = AudioEncoder(opt.enc_layers, opt.brnn, opt.rnn_size, opt.dropout,
                           opt.sample_rate, opt.window_size)

        embeddings = make_embeddings(opt,
                                     word_dict,
                                     feature_dicts,
                                     for_encoder=False)
        dec = make_decoder(opt, embeddings)

        model = onmt.models.model.NMTModel(enc, dec)

        test_src, test_tgt, test_length = self.get_batch_audio(
            bsize=bsize,
            sample_rate=opt.sample_rate,
            window_size=opt.window_size,
            t=t,
            tgt_l=tgt_l)
        outputs, attn, _ = model(test_src, test_tgt, test_length)
        outputsize = torch.zeros(tgt_l - 1, bsize, opt.rnn_size)
        # Make sure that output has the correct size and type
        self.assertEqual(outputs.size(), outputsize.size())
        self.assertEqual(type(outputs), torch.autograd.Variable)
        self.assertEqual(type(outputs.data), torch.FloatTensor)
예제 #4
0
    def embeddings_forward(self, opt, source_l=3, bsize=1):
        '''
        Tests if the embeddings works as expected

        args:
            opt: set of options
            source_l: Length of generated input sentence
            bsize: Batchsize of generated input
        '''
        word_dict = self.get_vocab()
        feature_dicts = []
        emb = make_embeddings(opt, word_dict, feature_dicts)
        test_src, _, __ = self.get_batch(source_l=source_l, bsize=bsize)
        if opt.decoder_type == 'transformer':
            input = torch.cat([test_src, test_src], 0)
            res = emb(input)
            compare_to = torch.zeros(source_l * 2, bsize,
                                     opt.src_word_vec_size)
        else:
            res = emb(test_src)
            compare_to = torch.zeros(source_l, bsize, opt.src_word_vec_size)

        self.assertEqual(res.size(), compare_to.size())