예제 #1
0
    def representation(self) -> tf.Tensor:
        """
        Computes the hidden representation of the input sequences.

        The hidden representation of an input sequence is computed by applying a linear transformation with hyperbolic
        tangent activation to the final state of the encoder. The output size of the linear transformation matches the
        state vector size of the decoder.

        Returns
        -------
        tf.Tensor
            The hidden representation of the input sequences, of shape [batch_size, decoder_state_size]
        """

        # shape: [max_time * batch_size, encoder.state_size]
        internal_rep = tf.tanh(
            linear(self.encoder.final_state,
                   self.decoder_architecture.state_size))

        # shape: [max_time, batch_size, encoder.state_size]
        rep = tf.reshape(internal_rep, [
            self.max_time, self.batch_size,
            self.encoder_architecture.state_size
        ])
        # shape: [batch_size, max_time, encoder.state_size]
        rep = tf.transpose(rep, perm=[1, 0, 2])

        tf.add_to_collection("representation", rep)
        summaries.variable_summaries(rep)

        return internal_rep
예제 #2
0
    def reconstruction(self) -> tf.Tensor:
        """
        Computes the reconstruction of the input sequence.
        
        Although an output projection is implicitly added by the decoder RNN, another linear transformation of the 
        decoder output is performed. This is done to preserve consistency of models between the unidirectional and
        bidirectional cases, since in the latter case the decoder output sequence has twice as many features as the
        input sequence. Thus, another linear projection is required in the bidirectional case.
        
        Returns
        -------
        tf.Tensor
            The reconstruction of the input sequence, of shape [max_time, batch_size, num_features]
        """

        # shape: [num_windows, max_time * batch_size, decoder_frequency.output_size]
        decoder_output = self.decoder.output
        num_windows = decoder_output.shape.as_list()[
            0]  # must be known at graph-construction time

        # shape: [max_time * batch_size, num_windows, decoder_frequency.output_size]
        decoder_output = tf.transpose(decoder_output, [1, 0, 2])
        # shape: [max_time * batch_size, num_windows * decoder_frequency.output_size]
        decoder_output = tf.reshape(decoder_output, [
            self.max_time * self.batch_size,
            num_windows * self.decoder.output_size
        ])

        # shape: [max_time * batch_size, num_features]
        reconstruction = tf.tanh(
            linear(decoder_output, output_size=self.num_features))

        reconstruction = restore_time(inputs=reconstruction,
                                      max_time=self.max_time,
                                      batch_size=self.batch_size,
                                      num_features=self.num_features)

        tf.add_to_collection("reconstruction", reconstruction)
        summaries.reconstruction_summaries(reconstruction, self.targets)

        return reconstruction
    def decoder_frequency_initial_state(self) -> tf.Tensor:
        """
        The initial states of the frequency decoder RNN.
        
        The outputs of the time decoder RNN at each time step are passed through a linear transformation layer with
        hyperbolic tangent activation, and used as the initial states of the frequency decoder RNN.
        
        Returns
        -------
        tf.Tensor
            The initial states of the frequency decoder RNN
        """
        # shape: [max_time, batch_size, decoder_time.output_size]
        decoder_frequency_initial_state = self.decoder_time.output
        # shape: [max_time * batch_size, decoder_time.output_size]
        decoder_frequency_initial_state = flatten_time(decoder_frequency_initial_state)

        decoder_frequency_initial_state = tf.tanh(linear(decoder_frequency_initial_state,
                                                         output_size=self.f_decoder_architecture.state_size))

        return decoder_frequency_initial_state
    def representation(self) -> tf.Tensor:
        """
        Computes the hidden representation of the input sequences.
        
        The hidden representation of an input sequence is computed by applying a linear transformation with hyperbolic
        tangent activation to the final state of the time encoder. The output size of the linear transformation matches 
        the state vector size of the time decoder.
        
        Returns
        -------
        tf.Tensor
            The hidden representation of the input sequences, of shape [batch_size, time_decoder_state_size]
        """

        representation = tf.tanh(linear(input=self.encoder_time.final_state,
                                        output_size=self.t_decoder_architecture.state_size))

        tf.add_to_collection("representation", representation)
        summaries.variable_summaries(representation)

        return representation