コード例 #1
    def forward(self, decoder_input, z, drop_prob, initial_state=None):
        :param decoder_input: tensor with shape of [batch_size, seq_len, embed_size]
        :param z: sequence context with shape of [batch_size, latent_variable_size]
        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout
        :param initial_state: initial state of decoder rnn

        :return: unnormalized logits of sentense words distribution probabilities
                    with shape of [batch_size, seq_len, word_vocab_size]
                 final rnn state with shape of [num_layers, batch_size, decoder_rnn_size]

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'

        [batch_size, seq_len, _] = decoder_input.size()
            decoder rnn is conditioned on context via additional bias = W_cond * z to every input token
        decoder_input = F.dropout(decoder_input, drop_prob)

        z = t.cat([z] * seq_len, 1).view(batch_size, seq_len,
        decoder_input = t.cat([decoder_input, z], 2)

        rnn_out, final_state = self.rnn(decoder_input, initial_state)
        rnn_out = rnn_out.contiguous().view(-1, self.params.decoder_rnn_size)

        result = self.fc(rnn_out)
        result = result.view(batch_size, seq_len, self.params.word_vocab_size)

        return result, final_state
コード例 #2
    def forward(self, input):
        :param input: [batch_size, seq_len, embed_size] tensor
        :return: context of input sentenses with shape of [batch_size, latent_variable_size]

        [batch_size, seq_len, embed_size] = input.size()
        #TEST: note sure this is right (was done for giving right dimensions to the lstm)
        input = input.view(seq_len, batch_size, embed_size)
        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'
        ''' Unfold rnn with zero initial state and get its final state from the last layer

        mux, logvarx = [], []
        hx = None
        for i in range(seq_len - 1):
            _, hx = self.rnn(input[i].unsqueeze(1), hx)
            h = self.ziphidden(*hx)
            mu = self.linear_mu(h)
            logvar = self.linear_var(h)
            h = self.reparameterize(mu, logvar)

        return h
コード例 #3
    def only_decoder_beam(self, decoder_input, z, drop_prob, initial_state=None):
        assert parameters_allocation_check(self)

        #         print decoder_input.size()

        [beam_batch_size, _, _] = decoder_input.size()

        decoder_input = F.dropout(decoder_input, drop_prob)

        z = z.unsqueeze(0)

        #         print z.size()

        z = t.cat([z] * beam_batch_size, 0)

        #         print z.size()
        #         z = z.contiguous().view(1, -1)

        #         z = z.view(beam_batch_size, self.params.latent_variable_size)

        #         print z.size()

        decoder_input = t.cat([decoder_input, z], 2)

        #         print "decoder_input:",decoder_input.size()

        rnn_out, final_state = self.rnn(decoder_input, initial_state)

        #         print "rnn_out:",rnn_out.size()
        #         print "final_state_1:",final_state[0].size()
        #         print "final_state_1:",final_state[1].size()

        return rnn_out, final_state
コード例 #4
    def forward(self, input):
        :param input: [batch_size, seq_len, embed_size] tensor
        :return: context of input sentenses with shape of [batch_size, latent_variable_size]

        [batch_size, seq_len, embed_size] = input.size()

        input = input.view(-1, embed_size)
        input = self.hw1(input)
        input = input.view(batch_size, seq_len, embed_size)

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'

        ''' Unfold rnn with zero initial state and get its final state from the last layer
        _, (_, final_state) = self.rnn(input)

        final_state = final_state.view(self.params.encoder_num_layers, 2, batch_size, self.params.encoder_rnn_size)
        final_state = final_state[-1]
        h_1, h_2 = final_state[0], final_state[1]
        final_state = t.cat([h_1, h_2], 1)

        return final_state
コード例 #5
    def forward(self, input):
        :param input: [batch_size, seq_len, embed_size] tensor
        :return: context of input sentenses with shape of [batch_size, latent_variable_size]

        [batch_size, seq_len, embed_size] = input.size()

        input = input.view(-1, embed_size)
        input = self.hw1(input)
        input = input.view(batch_size, seq_len, embed_size)

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'

        ''' Unfold rnn with zero initial state and get its final state from the last layer
        _, (final_state, _) = self.rnn(input)

        final_state = final_state.view(self.params.encoder_num_layers, 2, batch_size, self.params.encoder_rnn_size)
        final_state = final_state[-1]
        h_1, h_2 = final_state[0], final_state[1]
        final_state = t.cat([h_1, h_2], 1)

        final_state = self.hw2(final_state)

        return final_state
コード例 #6
    def forward(self,
        :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type
        :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type
        :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type

        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :param z: context if sampling is performing

        :return: unnormalized logits of sentence words distribution probabilities
                    with shape of [batch_size, seq_len, word_vocab_size]
                 kld loss estimation

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'
        use_cuda = self.embedding.word_embed.weight.is_cuda

        assert z is None and fold(lambda acc, parameter: acc and parameter is not None,
                                  [encoder_word_input, encoder_character_input, decoder_word_input],
                                  True) \
               or (z is not None and decoder_word_input is not None), \
            "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments"

        if z is None:
            ''' Get context from encoder and sample z ~ N(mu, std)
            [batch_size, _] = encoder_word_input.size()

            encoder_input = self.embedding(encoder_word_input,

            context = self.encoder(encoder_input)

            mu = self.context_to_mu(context)
            logvar = self.context_to_logvar(context)
            std = t.exp(0.5 * logvar)

            z = Variable(
                t.randn([batch_size, self.params.latent_variable_size]))
            if use_cuda:
                z = z.cuda()

            z = z * std + mu

            kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1,
            kld = None

        decoder_input = self.embedding.word_embed(decoder_word_input)
        out = self.decoder(decoder_input, z, drop_prob)

        return out, kld
コード例 #7
ファイル: decoder_gru.py プロジェクト: xushenkun/vae
    def forward(self, decoder_input, z, drop_prob, initial_state=None):
        :param decoder_input: tensor with shape of [batch_size, max_seq_len + 1, word_embed_size]
        :param z: latent variable with shape of [batch_size, latent_variable_size]
        :param initial_state: initial state of generator rnn
        :return: unnormalized logits of sentense words distribution probabilities
                    with shape of [batch_size, max_seq_len + 1, word_embed_size]
                 final rnn state with shape of [num_layers, batch_size, decoder_rnn_size]

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'

        [batch_size, seq_len, _] = decoder_input.size()
        '''decoder rnn is conditioned on context via additional bias = W_cond * z applied to every input token'''
        z = z.unsqueeze(1).repeat(1, seq_len, 1)
        decoder_input = t.cat([decoder_input, z], 2)
        decoder_input = F.dropout(decoder_input, drop_prob, training=z is None)

        result, final_state = self.rnn(decoder_input, initial_state)

        result = result.contiguous().view(-1, self.params.decoder_rnn_size)
        if self.params.use_highway:
            result = self.highway(result)
        result = self.fc(result)
        if self.params.decoder_type == 'gru_emb':
            result = result.view(batch_size, seq_len,
            result = result.view(batch_size, seq_len,

        return result, final_state
コード例 #8
    def forward(self, input, State):
        :param input: [batch_size, seq_len, embed_size] tensor
        :return: context of input sentenses with shape of [batch_size, latent_variable_size]

        # print "Three"
        [batch_size, seq_len, embed_size] = input.size()
        # input shape   32    ,    26     ,    825

        input = input.view(-1, embed_size)
        # input shape   832(=32*26),825

        input = self.hw1(input)
        # input shape 832(=32*26),825

        input = input.view(batch_size, seq_len, embed_size)
        # input shape 32    ,    26     ,    825

        assert parameters_allocation_check(
        ), "Invalid CUDA options. Parameters should be allocated in the same memory"
        """ Unfold rnn with zero initial state and get its final state from the last layer

        context_ = []
        for word_id in range(seq_len):
            encoder_outputs, (h_0, final_state) = self.rnn(
                input[:, word_id].unsqueeze(1), State)
            """Inputs: input, (h_0, c_0)
            - **input** of shape `(seq_len, batch, input_size)`: tensor containing the features
            of the input sequence.
            The input can also be a packed variable length sequence.
            Outputs: output, (h_n, c_n)
            - **output** of shape `(seq_len, batch, num_directions * hidden_size)`: tensor
            containing the output features `(h_t)` from the last layer of the LSTM,
            for each `t`. If a :class:`torch.nn.utils.rnn.PackedSequence` has been
            given as the input, the output will also be a packed sequence.
            State = (h_0, final_state)

            c_0 = final_state

            final_state = final_state.view(self.params.encoder_num_layers, 2,
            final_state = final_state[-1]
            h_1, h_2 = final_state[0], final_state[1]
            final_state = t.cat([h_1, h_2], 1)


        return encoder_outputs, final_state, h_0, c_0, context_
コード例 #9
    def only_decoder_beam(self, decoder_input, z, drop_prob, encoder_outputs, initial_state=None):

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'
        [beam_batch_size, _, _] = decoder_input.size()
            decoder rnn is conditioned on context via additional bias = W_cond * z to every input token
        decoder_input = F.dropout(decoder_input, drop_prob)
        z = z.unsqueeze(0)
        z = t.cat([z] * beam_batch_size, 0)
        decoder_input = t.cat([decoder_input, z], 2)
        rnn_out, final_state = self.batch_unrolling(decoder_input,  initial_state, False)

        return rnn_out, final_state
コード例 #10
    def forward(self, decoder_input, z, drop_prob, initial_state=None):
        assert parameters_allocation_check(self)
        [batch_size, seq_len, _] = decoder_input.size()

        decoder_input = F.dropout(decoder_input, drop_prob)

        z = t.cat([z] * seq_len, 1).view(batch_size, seq_len, self.params.latent_variable_size)
        decoder_input = t.cat([decoder_input, z], 2)

        rnn_out, final_state = self.rnn(decoder_input, initial_state)
        rnn_out = rnn_out.contiguous().view(-1, self.params.decoder_rnn_size)

        result = self.fc(rnn_out)
        result = result.view(batch_size, seq_len, self.params.word_vocab_size)

        return result, final_state
コード例 #11
    def forward(self, decoder_input, z, drop_prob):
        :param decoder_input: tensor with shape of [batch_size, seq_len, embed_size]
        :param z: sequence latent variable with shape of [batch_size, latent_variable_size]
        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :return: unnormalized logits of sentense words distribution probabilities
                 with shape of [batch_size, seq_len, word_vocab_size]

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'

        [batch_size, seq_len, _] = decoder_input.size()
            decoder is conditioned on context via additional bias = W_cond * z to every input token

        z = t.cat([z] * seq_len, 1).view(batch_size, seq_len,
        decoder_input = t.cat([decoder_input, z], 2)
        decoder_input = F.dropout(decoder_input, drop_prob)

        # x is tensor with shape [batch_size, input_size=in_channels, seq_len=input_width]
        x = decoder_input.transpose(1, 2).contiguous()

        for layer, kernel in enumerate(self.kernels):
            # apply conv layer with non-linearity and drop last elements of sequence to perfrom input shifting
            x = F.conv1d(x,

            x_width = x.size()[2]
            x = x[:, :, :(x_width -

            x = F.relu(x)

        x = x.transpose(1, 2).contiguous()
        x = x.view(-1, self.out_size)
        x = self.fc(x)
        result = x.view(-1, seq_len, self.params.word_vocab_size)

        return result
コード例 #12
    def forward(self, decoder_input, z, drop_prob):
        :param decoder_input: tensor with shape of [batch_size, seq_len, embed_size]
        :param z: sequence latent variable with shape of [batch_size, latent_variable_size]
        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :return: unnormalized logits of sentense words distribution probabilities
                 with shape of [batch_size, seq_len, word_vocab_size]

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'

        [batch_size, seq_len, _] = decoder_input.size()

            decoder is conditioned on context via additional bias = W_cond * z to every input token

        z = t.cat([z] * seq_len, 1).view(batch_size, seq_len, self.params.latent_variable_size)
        decoder_input = t.cat([decoder_input, z], 2)
        decoder_input = F.dropout(decoder_input, drop_prob)

        # x is tensor with shape [batch_size, input_size=in_channels, seq_len=input_width]
        x = decoder_input.transpose(1, 2).contiguous()

        for layer, kernel in enumerate(self.kernels):
            # apply conv layer with non-linearity and drop last elements of sequence to perfrom input shifting
            x = F.conv1d(x, kernel,

            x_width = x.size()[2]
            x = x[:, :, :(x_width - self.params.decoder_paddings[layer])].contiguous()

            x = F.relu(x)

        x = x.transpose(1, 2).contiguous()
        x = x.view(-1, self.out_size)
        x = self.fc(x)
        result = x.view(-1, seq_len, self.params.word_vocab_size)

        return result
コード例 #13
ファイル: decoder.py プロジェクト: Joise1/Homework
    def only_decoder_beam(self,

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'

        #         print decoder_input.size()

        [beam_batch_size, _, _] = decoder_input.size()
            decoder rnn is conditioned on context via additional bias = W_cond * z to every input token
        decoder_input = F.dropout(decoder_input, drop_prob)

        z = z.unsqueeze(0)

        #         print z.size()

        z = t.cat([z] * beam_batch_size, 0)

        #         print z.size()
        #         z = z.contiguous().view(1, -1)

        #         z = z.view(beam_batch_size, self.params.latent_variable_size)

        #         print z.size()

        decoder_input = t.cat([decoder_input, z], 2)

        #         print "decoder_input:",decoder_input.size()

        rnn_out, final_state = self.rnn(decoder_input, initial_state)

        #         print "rnn_out:",rnn_out.size()
        #         print "final_state_1:",final_state[0].size()
        #         print "final_state_1:",final_state[1].size()

        return rnn_out, final_state
コード例 #14
    def forward(self, input_labes, out_labels, num_sampled):
        :param input_labes: Tensor with shape of [batch_size] of Long type
        :param out_labels: Tensor with shape of [batch_size] of Long type
        :param num_sampled: An int. The number of sampled from noise examples
        :return: Loss estimation with shape of [batch_size]
            loss defined in Mikolov et al. Distributed Representations of Words and Phrases and their Compositionality

        assert parameters_allocation_check(self), \
            Invalid CUDA options. out_embed and in_embed parameters both should be stored in the same memory
            got out_embed.is_cuda = {}, in_embed.is_cuda = {}
            """.format(self.out_embed.weight.is_cuda, self.in_embed.weight.is_cuda)

        use_cuda = self.out_embed.weight.is_cuda

        [batch_size] = input_labes.size()

        input = self.in_embed(input_labes)
        output = self.out_embed(out_labels)

        noise = Variable(
                     num_sampled).uniform_(0, self.num_classes - 1).long())
        if use_cuda:
            noise = noise.cuda()
        noise = self.out_embed(noise).neg()

        log_target = (input * output).sum(1).squeeze().sigmoid().log()
        ''' ∑[batch_size, num_sampled, embed_size] * [batch_size, embed_size, 1] ->
            ∑[batch_size, num_sampled] -> [batch_size] '''
        sum_log_sampled = t.bmm(
            noise, input.unsqueeze(2)).sigmoid().log().sum(1).squeeze()

        loss = log_target + sum_log_sampled

        return -loss
コード例 #15
    def forward(self,

        assert parameters_allocation_check(self)
        use_cuda = self.embedding.word_embed.weight.is_cuda

        assert z is None and fold(lambda acc, parameter: acc and parameter is not None,
                                  [encoder_word_input, encoder_character_input, decoder_word_input_2],
                                  True) \
               or (z is not None and decoder_word_input_2 is not None)

        if z is None:
            [batch_size, _] = encoder_word_input.size()

            encoder_input = self.embedding(encoder_word_input,

            [batch_size_2, _] = encoder_word_input_2.size()

            encoder_input_2 = self.embedding_2(encoder_word_input_2,

            context, h_0, c_0 = self.encoder(encoder_input, None)

            State = (h_0, c_0)
            context_2, _, _ = self.encoder_2(encoder_input_2, State)

            mu = self.context_to_mu(context_2)
            logvar = self.context_to_logvar(context_2)
            std = t.exp(0.5 * logvar)

            z = Variable(
                t.randn([batch_size, self.params.latent_variable_size]))
            if use_cuda:
                z = z.cuda()

            z = z * std + mu

            kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1,

            # encoder_input = self.embedding(encoder_word_input, encoder_character_input)
            # _ , h_0 , c_0 = self.encoder_3(encoder_input, None)
            initial_state = State

            kld = None
            mu = None
            std = None

        decoder_input_2 = self.embedding_2.word_embed(decoder_word_input_2)
        out, final_state = self.decoder(decoder_input_2, z, drop_prob,

        return out, final_state, kld, mu, std
コード例 #16
    def forward(self,

        #Modified the parameters of forward function according to Encoder-2
        :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type
        :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type
        :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type
        :param initial_state: initial state of decoder rnn in order to perform sampling

        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :param z: context if sampling is performing

        :return: unnormalized logits of sentence words distribution probabilities
                    with shape of [batch_size, seq_len, word_vocab_size]
                 final rnn state with shape of [num_layers, batch_size, decoder_rnn_size]

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'
        use_cuda = self.embedding.word_embed.weight.is_cuda

        assert z is None and fold(lambda acc, parameter: acc and parameter is not None,
                                  [encoder_word_input, encoder_character_input, decoder_word_input_2],
                                  True) \
            or (z is not None and decoder_word_input_2 is not None), \
            "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments"

        if z is None:
            ''' Get context from encoder and sample z ~ N(mu, std)
            [batch_size, _] = encoder_word_input.size()

            encoder_input = self.embedding(encoder_word_input,
            ''' ===================================================Doing the same for encoder-2===================================================
            [batch_size_2, _] = encoder_word_input_2.size()

            encoder_input_2 = self.embedding_2(encoder_word_input_2,
            ''' ==================================================================================================================================

            context, h_0, c_0 = self.encoder(encoder_input, None)

            State = (h_0, c_0)  #Final state of Encoder-1
            context_2, _, _ = self.encoder_2(encoder_input_2,
                                             State)  #Encoder_2 for Ques_2

            mu = self.context_to_mu(context_2)
            logvar = self.context_to_logvar(context_2)
            std = t.exp(0.5 * logvar)

            z = Variable(
                t.randn([batch_size, self.params.latent_variable_size]))
            if use_cuda:
                z = z.cuda()

            z = z * std + mu

            kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1,

            # encoder_input = self.embedding(encoder_word_input, encoder_character_input)
            # _ , h_0 , c_0 = self.encoder_3(encoder_input, None)
            initial_state = State  #Final state of Encoder-1

            kld = None
            mu = None
            std = None

        decoder_input_2 = self.embedding_2.word_embed(
        )  # What to do with this decoder input ? --> Slightly resolved
        out, final_state = self.decoder(
            decoder_input_2, z, drop_prob,
            initial_state)  # Take a look at the decoder

        return out, final_state, kld, mu, std
コード例 #17
    def forward(self,
                unk_idx: int,
                drop_prob: float,
                encoder_word_input: object = None,
                encoder_character_input: object = None,
                encoder_word_input_2: object = None,
                encoder_character_input_2: object = None,
                decoder_word_input_2: object = None,
                decoder_character_input_2: object = None,
                z: object = None,
                initial_state: tuple = None) -> tuple:
        :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type
        :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type
        :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type
        :param initial_state: initial state of decoder rnn in order to perform sampling

        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :param z: context if sampling is performing

        :return: unnormalized logits of sentence words distribution probabilities
                    with shape of [batch_size, seq_len, word_vocab_size]
                 final rnn state with shape of [num_layers, batch_size, decoder_rnn_size]

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'
        use_cuda = self.embedding.word_embed.weight.is_cuda

        assert z is None and fold(lambda acc, parameter: acc and parameter is not None,
                                  [encoder_word_input, encoder_character_input, decoder_word_input_2],
                                  True) \
            or (z is not None and decoder_word_input_2 is not None), \
            "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments"

        if z is None:
            ''' Get context from encoder and sample z ~ N(mu, std) '''
            [batch_size, _] = encoder_word_input.size()
            encoder_input = self.embedding(encoder_word_input,
                                           encoder_character_input, unk_idx,
            ''' ===================================================Doing the same for encoder-2=================================================== '''
            [batch_size_2, _] = encoder_word_input_2.size()
            encoder_input_2 = self.embedding_2(encoder_word_input_2,
                                               unk_idx, drop_prob)
            ''' ================================================================================================================================== '''
            enc_out_original, context, h_0, c_0, _ = self.encoder_original(
                encoder_input, None)
            state_original = (h_0, c_0)  # Final state of Encoder-1
            enc_out_paraphrase, context_2, h_0, c_0, context_ = self.encoder_paraphrase(
                encoder_input_2, state_original)  # Encoder_2 for Ques_2
            state_paraphrase = (h_0, c_0)  # Final state of Encoder-2

            if context_ is not None:

                mu_ = []
                logvar_ = []
                for entry in context_:

                std = t.exp(0.5 * logvar_[-1])

                z = Variable(
                    t.randn([batch_size, self.params.latent_variable_size]))
                if use_cuda:
                    z = z.cuda()

                z = z * std + mu_[-1]

                mu = t.stack(mu_)
                logvar = t.stack(logvar_)

                kld = -0.5 * t.sum(1 + logvar - mu.pow(2) - logvar.exp())
                kld = kld / mu.shape[0]


                mu = self.context_to_mu(context_2)
                logvar = self.context_to_logvar(context_2)
                std = t.exp(0.5 * logvar)

                z = Variable(
                    t.randn([batch_size, self.params.latent_variable_size]))
                if use_cuda:
                    z = z.cuda()

                z = z * std + mu

                kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1,

            kld = None
            mu = None
            std = None

        decoder_input_2 = self.embedding_2.word_embed(decoder_word_input_2)
        out, final_state = self.decoder(decoder_input_2, z, drop_prob,
                                        enc_out_paraphrase, state_original)

        return out, final_state, kld, mu, std
コード例 #18
    def forward(

        # Modified the parameters of forward function according to Encoder-2
        :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type
        :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type
        :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type
        :param initial_state: initial state of decoder rnn in order to perform sampling

        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :param z: context if sampling is performing

        :return: unnormalized logits of sentence words distribution probabilities
                    with shape of [batch_size, seq_len, word_vocab_size]
                 final rnn state with shape of [num_layers, batch_size, decoder_rnn_size]

        assert parameters_allocation_check(
        ), "Invalid CUDA options. Parameters should be allocated in the same memory"
        use_cuda = self.embedding.word_embed.weight.is_cuda

        assert (
            z is None and fold(
                lambda acc, parameter: acc and parameter is not None,
                    encoder_word_input, encoder_character_input,
            ) or (z is not None and decoder_word_input_2 is not None)
        ), "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments"

        if z is None:
            """Get context from encoder and sample z ~ N(mu, std)"""  # 把word和character拼接成一个向量
            [batch_size, _] = encoder_word_input.size()

            encoder_input = self.embedding(encoder_word_input,
                                           encoder_character_input, unk_idx,
            """ ===================================================Doing the same for encoder-2===================================================
            [batch_size_2, _] = encoder_word_input_2.size()

            encoder_input_2 = self.embedding_2(encoder_word_input_2,
                                               unk_idx, drop_prob)
            """ ==================================================================================================================================

            enc_out_original, context, h_0, c_0, _ = self.encoder_original(
                encoder_input, None)
            state_original = (h_0, c_0)  # Final state of Encoder-1 原始句子编码
            # state_original = context
            enc_out_paraphrase, context_2, h_0, c_0, context_ = self.encoder_paraphrase(
                state_original)  # Encoder_2 for Ques_2  接下去跟释义句编码
            state_paraphrase = (h_0, c_0)  # Final state of Encoder-2 原始句子编码
            # state_paraphrase = context_2

            if context_ is not None:

                mu_ = []
                logvar_ = []
                for entry in context_:

                z_sampled = self.sample_gaussian(batch_size)
                if use_cuda:
                    z_sampled = z_sampled.cuda()

                mu = t.stack(mu_)
                logvar = t.stack(logvar_)

                if self.params.wae:
                    z_tilda = self.sample_z_tilda_from_posterior(
                        z_sampled, logvar_[-1], mu_[-1], 1).cuda()
                    p = t.distributions.Normal(mu, t.exp(logvar))
                    q = t.distributions.Normal(mu,
                    kld = t.sum(t.distributions.kl_divergence(p, q))
                    kld = kld / mu.shape[0]
                    kld = 0
                    for i in range(len(mu_)):
                        p = t.distributions.Normal(mu_[i], t.exp(logvar_[i]))
                        q = t.distributions.Normal(
                        kld += t.sum(t.distributions.kl_divergence(p, q))
                    kld = kld / len(mu_)
                    wasserstein_loss = self.imq_kernel(
                        z_sampled, z_tilda, self.params.latent_variable_size)
                    kld = 0.01 * kld + 10 * wasserstein_loss
                    z_tilda = self.sample_z_tilda_from_posterior(
                        z_sampled, logvar_[-1], mu_[-1], 0.5).cuda()
                    kld = 0
                    for i in range(len(mu_)):
                        kld += (-0.5 * t.sum(
                            logvar_[i] - t.pow(mu_[i], 2) - t.exp(logvar_[i]) +
                            1, 1)).mean().squeeze()
                    kld = kld / len(mu_)


                mu = self.context_to_mu(context_2)
                logvar = self.context_to_logvar(context_2)

                z_sampled = self.sample_gaussian(batch_size)
                if use_cuda:
                    z_sampled = z_sampled.cuda()

                if self.params.wae:
                    z_tilda = self.sample_z_tilda_from_posterior(
                        z_sampled, logvar, mu, 1).cuda()
                    p = t.distributions.Normal(mu, t.exp(logvar))
                    q = t.distributions.Normal(mu,
                    kld = t.sum(t.distributions.kl_divergence(p, q))
                    wasserstein_loss = self.imq_kernel(
                        z_sampled, z_tilda, self.params.latent_variable_size)
                    kld = 0.01 * kld + 10 * wasserstein_loss
                    z_tilda = self.sample_z_tilda_from_posterior(
                        z_sampled, logvar, mu, 0.5).cuda()
                    kld = (-0.5 *
                           t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1,
            kld = None
            mu = None
            std = None

        # What to do with this decoder input ? --> Slightly resolved
        decoder_input_2 = self.embedding_2.word_embed(decoder_word_input_2)
        # if context_ is not None:
        #     decoder_input_2 = t.ones(decoder_input_2.size()).cuda()
        out, final_state = self.decoder(decoder_input_2, z_tilda, drop_prob,
                                        enc_out_paraphrase, state_original)

        return out, final_state, kld, mu, None