Exemple #1
0
    def initialize(self, batch):
        '''
        param:
            batch: Batch object
        '''
        with tf.variable_scope('inference') as scope:
            linear_targets = batch._lin_targets
            is_training = linear_targets is not None
            batch_size = batch.get_size()

            # Encoder
            encoder = Encoder(is_training=is_training)
            encoder_outputs = encoder.encode(batch.get_embedds(),
                                             batch.get_input_lengths())

            # Decoder
            if is_training:
                helper = TrainingHelper(batch.get_inputs(),
                                        batch.get_mel_targets(),
                                        self._hparams.num_mels,
                                        self._hparams.outputs_per_step)
            else:
                helper = TestingHelper(batch_size, self._hparams.num_mels,
                                       self._hparams.outputs_per_step)
            decoder = Decoder(helper, is_training=is_training)
            mel_outputs, lin_outputs, final_decoder_state = decoder.decode(
                encoder_outputs, batch_size)

            # Alignments
            alignments = tf.transpose(
                final_decoder_state[0].alignment_history.stack(), [1, 2, 0])

            self.inputs, self.input_lengths, self.mel_targets, self.linear_targets = batch.get_all(
            )
            self.mel_outputs = mel_outputs
            self.linear_outputs = lin_outputs
            self.alignments = alignments
            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)
class Seq2Seq(nn.Module):
    def __init__(self,
                 vocab_size,
                 embed_size,
                 hidden_size,
                 rnn_type='LSTM',
                 num_layers=1,
                 bidirectional=False,
                 attention_type='Bilinear',
                 dropout=0):
        super(Seq2Seq, self).__init__()
        self.embedding = nn.Embedding(num_embeddings=vocab_size,
                                      embedding_dim=embed_size)
        self.encoder = Encoder(embed_size=embed_size,
                               hidden_size=hidden_size,
                               rnn_type=rnn_type,
                               num_layers=num_layers,
                               bidirectional=bidirectional,
                               dropout=dropout)
        self.decoder = Decoder(embedding=self.embedding,
                               hidden_size=hidden_size,
                               rnn_type=rnn_type,
                               num_layers=num_layers,
                               attention_type=attention_type,
                               dropout=dropout)

    def load_pretrained_embeddings(self, path):
        self.embedding.weight.data.copy_(torch.from_numpy(
            np.load(path)))  # Load pretrained embeddings
        self.embedding.weight.requires_grad = False  # Don't update word vectors during training

    def forward(self, src, trg):
        """
        :param src: LongTensor (batch_size, src_time_step)
        :param trg: LongTensor (batch_size, trg_time_step)
        :return:
        """
        src_memory, src_mask, src_lens, init_states = self.encode(src)
        init_output = self.decoder.get_init_output(src_memory, src_lens,
                                                   init_states)
        return self.decoder(src_memory, src_mask, init_states, init_output,
                            trg)

    def encode(self, src):
        """
        :param src: LongTensor (batch_size, time_step)
        :param src_lens: LongTensor (batch_size,)
        :return:
        """
        src = sentence_clip(src)
        src_mask = (src != PAD_INDEX)
        src_lens = src_mask.long().sum(dim=1, keepdim=False)
        src_embedding = self.embedding(
            src)  # Tensor(batch_size, time_step, embed_size)
        src_memory, final_states = self.encoder(src_embedding, src_lens)
        return src_memory, src_mask, src_lens, final_states

    def decode(self, src, max_len):
        """
        :param src: LongTensor (batch_size, src_time_step)
        :param max_len: int
        :return:
        """
        src_memory, src_mask, src_lens, init_states = self.encode(src)
        init_output = self.decoder.get_init_output(src_memory, src_lens,
                                                   init_states)
        outputs = self.decoder.decode(src_memory, src_mask, init_states,
                                      init_output, max_len)
        return outputs

    def beam_decode(self, src, max_len, beam_size):
        """
        :param src: LongTensor (batch_size, src_time_step)
        :param max_len: int
        :param beam_size: int
        :return:
        """
        src_memory, src_mask, src_lens, init_states = self.encode(src)
        init_output = self.decoder.get_init_output(src_memory, src_lens,
                                                   init_states)
        outputs = self.decoder.beam_decode(src_memory, src_mask, init_states,
                                           init_output, max_len, beam_size)
        return outputs