コード例 #1
0
    def forward(self, decoder_input, z, drop_prob, initial_state=None):
        """
        :param decoder_input: tensor with shape of [batch_size, seq_len, embed_size]
        :param z: sequence context with shape of [batch_size, latent_variable_size]
        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout
        :param initial_state: initial state of decoder rnn

        :return: unnormalized logits of sentense words distribution probabilities
                    with shape of [batch_size, seq_len, word_vocab_size]
                 final rnn state with shape of [num_layers, batch_size, decoder_rnn_size]
        """

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'

        [batch_size, seq_len, _] = decoder_input.size()
        '''
            decoder rnn is conditioned on context via additional bias = W_cond * z to every input token
        '''
        decoder_input = F.dropout(decoder_input, drop_prob)

        z = t.cat([z] * seq_len, 1).view(batch_size, seq_len,
                                         self.params.latent_variable_size)
        decoder_input = t.cat([decoder_input, z], 2)

        rnn_out, final_state = self.rnn(decoder_input, initial_state)
        rnn_out = rnn_out.contiguous().view(-1, self.params.decoder_rnn_size)

        result = self.fc(rnn_out)
        result = result.view(batch_size, seq_len, self.params.word_vocab_size)

        return result, final_state
コード例 #2
0
    def forward(self, input):
        """
        :param input: [batch_size, seq_len, embed_size] tensor
        :return: context of input sentenses with shape of [batch_size, latent_variable_size]
        """

        [batch_size, seq_len, embed_size] = input.size()
        #TEST: note sure this is right (was done for giving right dimensions to the lstm)
        input = input.view(seq_len, batch_size, embed_size)
        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'
        ''' Unfold rnn with zero initial state and get its final state from the last layer
        '''

        mux, logvarx = [], []
        hx = None
        for i in range(seq_len - 1):
            _, hx = self.rnn(input[i].unsqueeze(1), hx)
            h = self.ziphidden(*hx)
            mu = self.linear_mu(h)
            logvar = self.linear_var(h)
            h = self.reparameterize(mu, logvar)
            mux.append(mu)
            logvarx.append(logvar)

        return h
コード例 #3
0
    def only_decoder_beam(self, decoder_input, z, drop_prob, initial_state=None):
        assert parameters_allocation_check(self)

        #         print decoder_input.size()

        [beam_batch_size, _, _] = decoder_input.size()

        decoder_input = F.dropout(decoder_input, drop_prob)

        z = z.unsqueeze(0)

        #         print z.size()

        z = t.cat([z] * beam_batch_size, 0)

        #         print z.size()
        #         z = z.contiguous().view(1, -1)

        #         z = z.view(beam_batch_size, self.params.latent_variable_size)

        #         print z.size()

        decoder_input = t.cat([decoder_input, z], 2)

        #         print "decoder_input:",decoder_input.size()

        rnn_out, final_state = self.rnn(decoder_input, initial_state)

        #         print "rnn_out:",rnn_out.size()
        #         print "final_state_1:",final_state[0].size()
        #         print "final_state_1:",final_state[1].size()

        return rnn_out, final_state
コード例 #4
0
    def forward(self, input):
        """
        :param input: [batch_size, seq_len, embed_size] tensor
        :return: context of input sentenses with shape of [batch_size, latent_variable_size]
        """

        [batch_size, seq_len, embed_size] = input.size()

        input = input.view(-1, embed_size)
        input = self.hw1(input)
        input = input.view(batch_size, seq_len, embed_size)

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'

        ''' Unfold rnn with zero initial state and get its final state from the last layer
        '''
        _, (_, final_state) = self.rnn(input)

        final_state = final_state.view(self.params.encoder_num_layers, 2, batch_size, self.params.encoder_rnn_size)
        final_state = final_state[-1]
        h_1, h_2 = final_state[0], final_state[1]
        final_state = t.cat([h_1, h_2], 1)

        return final_state
コード例 #5
0
    def forward(self, input):
        """
        :param input: [batch_size, seq_len, embed_size] tensor
        :return: context of input sentenses with shape of [batch_size, latent_variable_size]
        """

        [batch_size, seq_len, embed_size] = input.size()

        input = input.view(-1, embed_size)
        input = self.hw1(input)
        input = input.view(batch_size, seq_len, embed_size)

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'

        ''' Unfold rnn with zero initial state and get its final state from the last layer
        '''
        _, (final_state, _) = self.rnn(input)

        final_state = final_state.view(self.params.encoder_num_layers, 2, batch_size, self.params.encoder_rnn_size)
        final_state = final_state[-1]
        h_1, h_2 = final_state[0], final_state[1]
        final_state = t.cat([h_1, h_2], 1)

        final_state = self.hw2(final_state)

        return final_state
コード例 #6
0
    def forward(self,
                drop_prob,
                encoder_word_input=None,
                encoder_character_input=None,
                decoder_word_input=None,
                z=None):
        """
        :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type
        :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type
        :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type

        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :param z: context if sampling is performing

        :return: unnormalized logits of sentence words distribution probabilities
                    with shape of [batch_size, seq_len, word_vocab_size]
                 kld loss estimation
        """

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'
        use_cuda = self.embedding.word_embed.weight.is_cuda

        assert z is None and fold(lambda acc, parameter: acc and parameter is not None,
                                  [encoder_word_input, encoder_character_input, decoder_word_input],
                                  True) \
               or (z is not None and decoder_word_input is not None), \
            "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments"

        if z is None:
            ''' Get context from encoder and sample z ~ N(mu, std)
            '''
            [batch_size, _] = encoder_word_input.size()

            encoder_input = self.embedding(encoder_word_input,
                                           encoder_character_input)

            context = self.encoder(encoder_input)

            mu = self.context_to_mu(context)
            logvar = self.context_to_logvar(context)
            std = t.exp(0.5 * logvar)

            z = Variable(
                t.randn([batch_size, self.params.latent_variable_size]))
            if use_cuda:
                z = z.cuda()

            z = z * std + mu

            kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1,
                                1)).mean().squeeze()
        else:
            kld = None

        decoder_input = self.embedding.word_embed(decoder_word_input)
        out = self.decoder(decoder_input, z, drop_prob)

        return out, kld
コード例 #7
0
ファイル: decoder_gru.py プロジェクト: xushenkun/vae
    def forward(self, decoder_input, z, drop_prob, initial_state=None):
        """
        :param decoder_input: tensor with shape of [batch_size, max_seq_len + 1, word_embed_size]
        :param z: latent variable with shape of [batch_size, latent_variable_size]
        :param initial_state: initial state of generator rnn
        :return: unnormalized logits of sentense words distribution probabilities
                    with shape of [batch_size, max_seq_len + 1, word_embed_size]
                 final rnn state with shape of [num_layers, batch_size, decoder_rnn_size]
        """

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'

        [batch_size, seq_len, _] = decoder_input.size()
        '''decoder rnn is conditioned on context via additional bias = W_cond * z applied to every input token'''
        z = z.unsqueeze(1).repeat(1, seq_len, 1)
        decoder_input = t.cat([decoder_input, z], 2)
        decoder_input = F.dropout(decoder_input, drop_prob, training=z is None)

        result, final_state = self.rnn(decoder_input, initial_state)

        result = result.contiguous().view(-1, self.params.decoder_rnn_size)
        if self.params.use_highway:
            result = self.highway(result)
        result = self.fc(result)
        if self.params.decoder_type == 'gru_emb':
            result = result.view(batch_size, seq_len,
                                 self.params.word_embed_size)
        else:
            result = result.view(batch_size, seq_len,
                                 self.params.word_vocab_size)

        return result, final_state
コード例 #8
0
    def forward(self, input, State):
        """
        :param input: [batch_size, seq_len, embed_size] tensor
        :return: context of input sentenses with shape of [batch_size, latent_variable_size]
        """

        # print "Three"
        [batch_size, seq_len, embed_size] = input.size()
        # input shape   32    ,    26     ,    825

        input = input.view(-1, embed_size)
        # input shape   832(=32*26),825

        input = self.hw1(input)
        # input shape 832(=32*26),825

        input = input.view(batch_size, seq_len, embed_size)
        # input shape 32    ,    26     ,    825

        assert parameters_allocation_check(
            self
        ), "Invalid CUDA options. Parameters should be allocated in the same memory"
        """ Unfold rnn with zero initial state and get its final state from the last layer
        """

        context_ = []
        for word_id in range(seq_len):
            encoder_outputs, (h_0, final_state) = self.rnn(
                input[:, word_id].unsqueeze(1), State)
            """Inputs: input, (h_0, c_0)
            - **input** of shape `(seq_len, batch, input_size)`: tensor containing the features
            of the input sequence.
            The input can also be a packed variable length sequence.
            Outputs: output, (h_n, c_n)
            - **output** of shape `(seq_len, batch, num_directions * hidden_size)`: tensor
            containing the output features `(h_t)` from the last layer of the LSTM,
            for each `t`. If a :class:`torch.nn.utils.rnn.PackedSequence` has been
            given as the input, the output will also be a packed sequence.
            """
            State = (h_0, final_state)

            c_0 = final_state

            final_state = final_state.view(self.params.encoder_num_layers, 2,
                                           batch_size,
                                           self.params.encoder_rnn_size)
            final_state = final_state[-1]
            h_1, h_2 = final_state[0], final_state[1]
            final_state = t.cat([h_1, h_2], 1)

            context_.append(final_state)

        return encoder_outputs, final_state, h_0, c_0, context_
コード例 #9
0
    def only_decoder_beam(self, decoder_input, z, drop_prob, encoder_outputs, initial_state=None):

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'
        [beam_batch_size, _, _] = decoder_input.size()
        '''
            decoder rnn is conditioned on context via additional bias = W_cond * z to every input token
        '''
        decoder_input = F.dropout(decoder_input, drop_prob)
        z = z.unsqueeze(0)
        z = t.cat([z] * beam_batch_size, 0)
        decoder_input = t.cat([decoder_input, z], 2)
        rnn_out, final_state = self.batch_unrolling(decoder_input,  initial_state, False)

        return rnn_out, final_state
コード例 #10
0
    def forward(self, decoder_input, z, drop_prob, initial_state=None):
        assert parameters_allocation_check(self)
        [batch_size, seq_len, _] = decoder_input.size()

        decoder_input = F.dropout(decoder_input, drop_prob)

        z = t.cat([z] * seq_len, 1).view(batch_size, seq_len, self.params.latent_variable_size)
        decoder_input = t.cat([decoder_input, z], 2)

        rnn_out, final_state = self.rnn(decoder_input, initial_state)
        rnn_out = rnn_out.contiguous().view(-1, self.params.decoder_rnn_size)

        result = self.fc(rnn_out)
        result = result.view(batch_size, seq_len, self.params.word_vocab_size)

        return result, final_state
コード例 #11
0
    def forward(self, decoder_input, z, drop_prob):
        """
        :param decoder_input: tensor with shape of [batch_size, seq_len, embed_size]
        :param z: sequence latent variable with shape of [batch_size, latent_variable_size]
        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :return: unnormalized logits of sentense words distribution probabilities
                 with shape of [batch_size, seq_len, word_vocab_size]
        """

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'

        [batch_size, seq_len, _] = decoder_input.size()
        '''
            decoder is conditioned on context via additional bias = W_cond * z to every input token
        '''

        z = t.cat([z] * seq_len, 1).view(batch_size, seq_len,
                                         self.params.latent_variable_size)
        decoder_input = t.cat([decoder_input, z], 2)
        decoder_input = F.dropout(decoder_input, drop_prob)

        # x is tensor with shape [batch_size, input_size=in_channels, seq_len=input_width]
        x = decoder_input.transpose(1, 2).contiguous()

        for layer, kernel in enumerate(self.kernels):
            # apply conv layer with non-linearity and drop last elements of sequence to perfrom input shifting
            x = F.conv1d(x,
                         kernel,
                         bias=self.biases[layer],
                         dilation=self.params.decoder_dilations[layer],
                         padding=self.params.decoder_paddings[layer])

            x_width = x.size()[2]
            x = x[:, :, :(x_width -
                          self.params.decoder_paddings[layer])].contiguous()

            x = F.relu(x)

        x = x.transpose(1, 2).contiguous()
        x = x.view(-1, self.out_size)
        x = self.fc(x)
        result = x.view(-1, seq_len, self.params.word_vocab_size)

        return result
コード例 #12
0
    def forward(self, decoder_input, z, drop_prob):
        """
        :param decoder_input: tensor with shape of [batch_size, seq_len, embed_size]
        :param z: sequence latent variable with shape of [batch_size, latent_variable_size]
        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :return: unnormalized logits of sentense words distribution probabilities
                 with shape of [batch_size, seq_len, word_vocab_size]
        """

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'

        [batch_size, seq_len, _] = decoder_input.size()

        '''
            decoder is conditioned on context via additional bias = W_cond * z to every input token
        '''

        z = t.cat([z] * seq_len, 1).view(batch_size, seq_len, self.params.latent_variable_size)
        decoder_input = t.cat([decoder_input, z], 2)
        decoder_input = F.dropout(decoder_input, drop_prob)

        # x is tensor with shape [batch_size, input_size=in_channels, seq_len=input_width]
        x = decoder_input.transpose(1, 2).contiguous()

        for layer, kernel in enumerate(self.kernels):
            # apply conv layer with non-linearity and drop last elements of sequence to perfrom input shifting
            x = F.conv1d(x, kernel,
                         bias=self.biases[layer],
                         dilation=self.params.decoder_dilations[layer],
                         padding=self.params.decoder_paddings[layer])

            x_width = x.size()[2]
            x = x[:, :, :(x_width - self.params.decoder_paddings[layer])].contiguous()

            x = F.relu(x)


        x = x.transpose(1, 2).contiguous()
        x = x.view(-1, self.out_size)
        x = self.fc(x)
        result = x.view(-1, seq_len, self.params.word_vocab_size)

        return result
コード例 #13
0
ファイル: decoder.py プロジェクト: Joise1/Homework
    def only_decoder_beam(self,
                          decoder_input,
                          z,
                          drop_prob,
                          initial_state=None):

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'

        #         print decoder_input.size()

        [beam_batch_size, _, _] = decoder_input.size()
        '''
            decoder rnn is conditioned on context via additional bias = W_cond * z to every input token
        '''
        decoder_input = F.dropout(decoder_input, drop_prob)

        z = z.unsqueeze(0)

        #         print z.size()

        z = t.cat([z] * beam_batch_size, 0)

        #         print z.size()
        #         z = z.contiguous().view(1, -1)

        #         z = z.view(beam_batch_size, self.params.latent_variable_size)

        #         print z.size()

        decoder_input = t.cat([decoder_input, z], 2)

        #         print "decoder_input:",decoder_input.size()

        rnn_out, final_state = self.rnn(decoder_input, initial_state)

        #         print "rnn_out:",rnn_out.size()
        #         print "final_state_1:",final_state[0].size()
        #         print "final_state_1:",final_state[1].size()

        return rnn_out, final_state
コード例 #14
0
    def forward(self, input_labes, out_labels, num_sampled):
        """
        :param input_labes: Tensor with shape of [batch_size] of Long type
        :param out_labels: Tensor with shape of [batch_size] of Long type
        :param num_sampled: An int. The number of sampled from noise examples
        :return: Loss estimation with shape of [batch_size]
            loss defined in Mikolov et al. Distributed Representations of Words and Phrases and their Compositionality
            papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf
        """

        assert parameters_allocation_check(self), \
            """
            Invalid CUDA options. out_embed and in_embed parameters both should be stored in the same memory
            got out_embed.is_cuda = {}, in_embed.is_cuda = {}
            """.format(self.out_embed.weight.is_cuda, self.in_embed.weight.is_cuda)

        use_cuda = self.out_embed.weight.is_cuda

        [batch_size] = input_labes.size()

        input = self.in_embed(input_labes)
        output = self.out_embed(out_labels)

        noise = Variable(
            t.Tensor(batch_size,
                     num_sampled).uniform_(0, self.num_classes - 1).long())
        if use_cuda:
            noise = noise.cuda()
        noise = self.out_embed(noise).neg()

        log_target = (input * output).sum(1).squeeze().sigmoid().log()
        ''' ∑[batch_size, num_sampled, embed_size] * [batch_size, embed_size, 1] ->
            ∑[batch_size, num_sampled] -> [batch_size] '''
        sum_log_sampled = t.bmm(
            noise, input.unsqueeze(2)).sigmoid().log().sum(1).squeeze()

        loss = log_target + sum_log_sampled

        return -loss
コード例 #15
0
    def forward(self,
                drop_prob,
                encoder_word_input=None,
                encoder_character_input=None,
                encoder_word_input_2=None,
                encoder_character_input_2=None,
                decoder_word_input_2=None,
                decoder_character_input_2=None,
                z=None,
                initial_state=None):

        assert parameters_allocation_check(self)
        use_cuda = self.embedding.word_embed.weight.is_cuda

        assert z is None and fold(lambda acc, parameter: acc and parameter is not None,
                                  [encoder_word_input, encoder_character_input, decoder_word_input_2],
                                  True) \
               or (z is not None and decoder_word_input_2 is not None)

        if z is None:
            [batch_size, _] = encoder_word_input.size()

            encoder_input = self.embedding(encoder_word_input,
                                           encoder_character_input)

            [batch_size_2, _] = encoder_word_input_2.size()

            encoder_input_2 = self.embedding_2(encoder_word_input_2,
                                               encoder_character_input_2)

            context, h_0, c_0 = self.encoder(encoder_input, None)

            State = (h_0, c_0)
            context_2, _, _ = self.encoder_2(encoder_input_2, State)

            mu = self.context_to_mu(context_2)
            logvar = self.context_to_logvar(context_2)
            std = t.exp(0.5 * logvar)

            z = Variable(
                t.randn([batch_size, self.params.latent_variable_size]))
            if use_cuda:
                z = z.cuda()

            z = z * std + mu

            kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1,
                                1)).mean().squeeze()

            # encoder_input = self.embedding(encoder_word_input, encoder_character_input)
            # _ , h_0 , c_0 = self.encoder_3(encoder_input, None)
            initial_state = State

        else:
            kld = None
            mu = None
            std = None

        decoder_input_2 = self.embedding_2.word_embed(decoder_word_input_2)
        out, final_state = self.decoder(decoder_input_2, z, drop_prob,
                                        initial_state)

        return out, final_state, kld, mu, std
コード例 #16
0
    def forward(self,
                drop_prob,
                encoder_word_input=None,
                encoder_character_input=None,
                encoder_word_input_2=None,
                encoder_character_input_2=None,
                decoder_word_input_2=None,
                decoder_character_input_2=None,
                z=None,
                initial_state=None):

        #Modified the parameters of forward function according to Encoder-2
        """
        :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type
        :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type
        :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type
        :param initial_state: initial state of decoder rnn in order to perform sampling

        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :param z: context if sampling is performing

        :return: unnormalized logits of sentence words distribution probabilities
                    with shape of [batch_size, seq_len, word_vocab_size]
                 final rnn state with shape of [num_layers, batch_size, decoder_rnn_size]
        """

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'
        use_cuda = self.embedding.word_embed.weight.is_cuda

        assert z is None and fold(lambda acc, parameter: acc and parameter is not None,
                                  [encoder_word_input, encoder_character_input, decoder_word_input_2],
                                  True) \
            or (z is not None and decoder_word_input_2 is not None), \
            "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments"

        if z is None:
            ''' Get context from encoder and sample z ~ N(mu, std)
            '''
            [batch_size, _] = encoder_word_input.size()

            encoder_input = self.embedding(encoder_word_input,
                                           encoder_character_input)
            ''' ===================================================Doing the same for encoder-2===================================================
            '''
            [batch_size_2, _] = encoder_word_input_2.size()

            encoder_input_2 = self.embedding_2(encoder_word_input_2,
                                               encoder_character_input_2)
            ''' ==================================================================================================================================
            '''

            context, h_0, c_0 = self.encoder(encoder_input, None)

            State = (h_0, c_0)  #Final state of Encoder-1
            context_2, _, _ = self.encoder_2(encoder_input_2,
                                             State)  #Encoder_2 for Ques_2

            mu = self.context_to_mu(context_2)
            logvar = self.context_to_logvar(context_2)
            std = t.exp(0.5 * logvar)

            z = Variable(
                t.randn([batch_size, self.params.latent_variable_size]))
            if use_cuda:
                z = z.cuda()

            z = z * std + mu

            kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1,
                                1)).mean().squeeze()

            # encoder_input = self.embedding(encoder_word_input, encoder_character_input)
            # _ , h_0 , c_0 = self.encoder_3(encoder_input, None)
            initial_state = State  #Final state of Encoder-1

        else:
            kld = None
            mu = None
            std = None

        decoder_input_2 = self.embedding_2.word_embed(
            decoder_word_input_2
        )  # What to do with this decoder input ? --> Slightly resolved
        out, final_state = self.decoder(
            decoder_input_2, z, drop_prob,
            initial_state)  # Take a look at the decoder

        return out, final_state, kld, mu, std
コード例 #17
0
    def forward(self,
                unk_idx: int,
                drop_prob: float,
                encoder_word_input: object = None,
                encoder_character_input: object = None,
                encoder_word_input_2: object = None,
                encoder_character_input_2: object = None,
                decoder_word_input_2: object = None,
                decoder_character_input_2: object = None,
                z: object = None,
                initial_state: tuple = None) -> tuple:
        """
        :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type
        :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type
        :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type
        :param initial_state: initial state of decoder rnn in order to perform sampling

        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :param z: context if sampling is performing

        :return: unnormalized logits of sentence words distribution probabilities
                    with shape of [batch_size, seq_len, word_vocab_size]
                 final rnn state with shape of [num_layers, batch_size, decoder_rnn_size]
        """

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'
        use_cuda = self.embedding.word_embed.weight.is_cuda

        assert z is None and fold(lambda acc, parameter: acc and parameter is not None,
                                  [encoder_word_input, encoder_character_input, decoder_word_input_2],
                                  True) \
            or (z is not None and decoder_word_input_2 is not None), \
            "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments"

        if z is None:
            ''' Get context from encoder and sample z ~ N(mu, std) '''
            [batch_size, _] = encoder_word_input.size()
            encoder_input = self.embedding(encoder_word_input,
                                           encoder_character_input, unk_idx,
                                           drop_prob)
            ''' ===================================================Doing the same for encoder-2=================================================== '''
            [batch_size_2, _] = encoder_word_input_2.size()
            encoder_input_2 = self.embedding_2(encoder_word_input_2,
                                               encoder_character_input_2,
                                               unk_idx, drop_prob)
            ''' ================================================================================================================================== '''
            enc_out_original, context, h_0, c_0, _ = self.encoder_original(
                encoder_input, None)
            state_original = (h_0, c_0)  # Final state of Encoder-1
            enc_out_paraphrase, context_2, h_0, c_0, context_ = self.encoder_paraphrase(
                encoder_input_2, state_original)  # Encoder_2 for Ques_2
            state_paraphrase = (h_0, c_0)  # Final state of Encoder-2

            if context_ is not None:

                mu_ = []
                logvar_ = []
                for entry in context_:
                    mu_.append(self.context_to_mu(entry))
                    logvar_.append(self.context_to_logvar(entry))

                std = t.exp(0.5 * logvar_[-1])

                z = Variable(
                    t.randn([batch_size, self.params.latent_variable_size]))
                if use_cuda:
                    z = z.cuda()

                z = z * std + mu_[-1]

                mu = t.stack(mu_)
                logvar = t.stack(logvar_)

                kld = -0.5 * t.sum(1 + logvar - mu.pow(2) - logvar.exp())
                kld = kld / mu.shape[0]

            else:

                mu = self.context_to_mu(context_2)
                logvar = self.context_to_logvar(context_2)
                std = t.exp(0.5 * logvar)

                z = Variable(
                    t.randn([batch_size, self.params.latent_variable_size]))
                if use_cuda:
                    z = z.cuda()

                z = z * std + mu

                kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1,
                                    1)).mean().squeeze()

        else:
            kld = None
            mu = None
            std = None

        decoder_input_2 = self.embedding_2.word_embed(decoder_word_input_2)
        out, final_state = self.decoder(decoder_input_2, z, drop_prob,
                                        enc_out_paraphrase, state_original)

        return out, final_state, kld, mu, std
コード例 #18
0
    def forward(
        self,
        unk_idx,
        drop_prob,
        encoder_word_input=None,
        encoder_character_input=None,
        encoder_word_input_2=None,
        encoder_character_input_2=None,
        decoder_word_input_2=None,
        decoder_character_input_2=None,
        z=None,
        initial_state=None,
    ):

        # Modified the parameters of forward function according to Encoder-2
        """
        :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type
        :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type
        :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type
        :param initial_state: initial state of decoder rnn in order to perform sampling

        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :param z: context if sampling is performing

        :return: unnormalized logits of sentence words distribution probabilities
                    with shape of [batch_size, seq_len, word_vocab_size]
                 final rnn state with shape of [num_layers, batch_size, decoder_rnn_size]
        """

        assert parameters_allocation_check(
            self
        ), "Invalid CUDA options. Parameters should be allocated in the same memory"
        use_cuda = self.embedding.word_embed.weight.is_cuda

        assert (
            z is None and fold(
                lambda acc, parameter: acc and parameter is not None,
                [
                    encoder_word_input, encoder_character_input,
                    decoder_word_input_2
                ],
                True,
            ) or (z is not None and decoder_word_input_2 is not None)
        ), "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments"

        if z is None:
            """Get context from encoder and sample z ~ N(mu, std)"""  # 把word和character拼接成一个向量
            [batch_size, _] = encoder_word_input.size()

            encoder_input = self.embedding(encoder_word_input,
                                           encoder_character_input, unk_idx,
                                           drop_prob)
            """ ===================================================Doing the same for encoder-2===================================================
            """
            [batch_size_2, _] = encoder_word_input_2.size()

            encoder_input_2 = self.embedding_2(encoder_word_input_2,
                                               encoder_character_input_2,
                                               unk_idx, drop_prob)
            """ ==================================================================================================================================
            """

            enc_out_original, context, h_0, c_0, _ = self.encoder_original(
                encoder_input, None)
            state_original = (h_0, c_0)  # Final state of Encoder-1 原始句子编码
            # state_original = context
            enc_out_paraphrase, context_2, h_0, c_0, context_ = self.encoder_paraphrase(
                encoder_input_2,
                state_original)  # Encoder_2 for Ques_2  接下去跟释义句编码
            state_paraphrase = (h_0, c_0)  # Final state of Encoder-2 原始句子编码
            # state_paraphrase = context_2

            if context_ is not None:

                mu_ = []
                logvar_ = []
                for entry in context_:
                    mu_.append(self.context_to_mu(entry))
                    logvar_.append(self.context_to_logvar(entry))

                z_sampled = self.sample_gaussian(batch_size)
                if use_cuda:
                    z_sampled = z_sampled.cuda()

                mu = t.stack(mu_)
                logvar = t.stack(logvar_)

                if self.params.wae:
                    z_tilda = self.sample_z_tilda_from_posterior(
                        z_sampled, logvar_[-1], mu_[-1], 1).cuda()
                    p = t.distributions.Normal(mu, t.exp(logvar))
                    q = t.distributions.Normal(mu,
                                               t.ones(logvar.size()).cuda())
                    kld = t.sum(t.distributions.kl_divergence(p, q))
                    kld = kld / mu.shape[0]
                    kld = 0
                    for i in range(len(mu_)):
                        p = t.distributions.Normal(mu_[i], t.exp(logvar_[i]))
                        q = t.distributions.Normal(
                            mu_[i],
                            t.ones(logvar.size()).cuda())
                        kld += t.sum(t.distributions.kl_divergence(p, q))
                    kld = kld / len(mu_)
                    wasserstein_loss = self.imq_kernel(
                        z_sampled, z_tilda, self.params.latent_variable_size)
                    kld = 0.01 * kld + 10 * wasserstein_loss
                else:
                    z_tilda = self.sample_z_tilda_from_posterior(
                        z_sampled, logvar_[-1], mu_[-1], 0.5).cuda()
                    kld = 0
                    for i in range(len(mu_)):
                        kld += (-0.5 * t.sum(
                            logvar_[i] - t.pow(mu_[i], 2) - t.exp(logvar_[i]) +
                            1, 1)).mean().squeeze()
                    kld = kld / len(mu_)

            else:

                mu = self.context_to_mu(context_2)
                logvar = self.context_to_logvar(context_2)

                z_sampled = self.sample_gaussian(batch_size)
                if use_cuda:
                    z_sampled = z_sampled.cuda()

                if self.params.wae:
                    z_tilda = self.sample_z_tilda_from_posterior(
                        z_sampled, logvar, mu, 1).cuda()
                    p = t.distributions.Normal(mu, t.exp(logvar))
                    q = t.distributions.Normal(mu,
                                               t.ones(logvar.size()).cuda())
                    kld = t.sum(t.distributions.kl_divergence(p, q))
                    wasserstein_loss = self.imq_kernel(
                        z_sampled, z_tilda, self.params.latent_variable_size)
                    kld = 0.01 * kld + 10 * wasserstein_loss
                else:
                    z_tilda = self.sample_z_tilda_from_posterior(
                        z_sampled, logvar, mu, 0.5).cuda()
                    kld = (-0.5 *
                           t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1,
                                 1)).mean().squeeze()
        else:
            kld = None
            mu = None
            std = None

        # What to do with this decoder input ? --> Slightly resolved
        decoder_input_2 = self.embedding_2.word_embed(decoder_word_input_2)
        # if context_ is not None:
        #     decoder_input_2 = t.ones(decoder_input_2.size()).cuda()
        out, final_state = self.decoder(decoder_input_2, z_tilda, drop_prob,
                                        enc_out_paraphrase, state_original)

        return out, final_state, kld, mu, None