def decoder_frequency_inputs(self) -> tf.Tensor:
        """
        Computes the frequency decoder RNN input sequences.
        
        At each time step, the input to the frequency decoder RNN is the expected output at the previous frequency step. 
        Thus, the decoder input sequences are the frequency encoder input sequences shifted by one step along the 
        frequency axis.
        
        Returns
        -------
        tf.Tensor
           The frequency decoder RNN input sequences, of shape [num_windows, batch_size*max_time, window_width]
        """
        # shape: [max_time * batch_size, num_features]
        decoder_frequency_inputs = flatten_time(self.targets)
        # shape: [num_windows, max_time * batch_size, window_width]
        decoder_frequency_inputs = window_features(inputs=decoder_frequency_inputs,
                                                   window_width=self.frequency_window_width,
                                                   window_overlap=self.frequency_window_overlap)

        num_windows = decoder_frequency_inputs.shape.as_list()[0]

        decoder_frequency_inputs = decoder_frequency_inputs[:num_windows - 1, :, :]
        decoder_frequency_inputs = tf.pad(decoder_frequency_inputs, paddings=[[1, 0], [0, 0], [0, 0]], mode="constant")

        return decoder_frequency_inputs
Example #2
0
    def encoder_inputs(self) -> tf.Tensor:
        """
        Returns the input sequences for the encoder.
        
        The encoder input sequences are built by splitting the input spectrograms into windows of width 
        `frequency_window_width` and overlap `frequency_window_overlap` along the frequency axis. These windows are then
        fed in order to the encoder RNN.
        
        Returns
        -------
        tf.Tensor
            The input sequences for the encoder
        """
        # shape: [max_time * batch_size, num_features]
        inputs_flat = flatten_time(self.inputs)

        # shape: [num_windows, max_time * batch_size, window_width]
        return window_features(inputs=inputs_flat,
                               window_width=self.frequency_window_width,
                               window_overlap=self.frequency_window_overlap)
    def decoder_frequency_initial_state(self) -> tf.Tensor:
        """
        The initial states of the frequency decoder RNN.
        
        The outputs of the time decoder RNN at each time step are passed through a linear transformation layer with
        hyperbolic tangent activation, and used as the initial states of the frequency decoder RNN.
        
        Returns
        -------
        tf.Tensor
            The initial states of the frequency decoder RNN
        """
        # shape: [max_time, batch_size, decoder_time.output_size]
        decoder_frequency_initial_state = self.decoder_time.output
        # shape: [max_time * batch_size, decoder_time.output_size]
        decoder_frequency_initial_state = flatten_time(decoder_frequency_initial_state)

        decoder_frequency_initial_state = tf.tanh(linear(decoder_frequency_initial_state,
                                                         output_size=self.f_decoder_architecture.state_size))

        return decoder_frequency_initial_state
Example #4
0
    def _feed_previous_rnn(self, cell, initial_state,
                           reverse: bool) -> (tf.Tensor, Any):
        """
        Implements a unidirectional RNN with stochastic feeding of previous RNN outputs.
        
        Since the required functionality is not directly supported by the `tf.contrib.rnn` module, this method
        implements a custom loop function used with the `raw_rnn` function.
        
        Parameters
        ----------
        cell
            The RNN cell to use
        initial_state
            A possibly nested tuple of initial states for the RNN cells
        reverse: bool
            Whether to reverse the input sequence

        Returns
        -------
        output: tf.Tensor
            The output sequence of the RNN after applying the output projection
        final_state: Any
            A possible nested tuple of final states of the RNN cells, with the same structure as the `initial_state` 
            tuple
        """
        # input sequence
        if reverse:
            input_sequence = tf.reverse(self.inputs, axis=[0])
        else:
            input_sequence = self.inputs

        # output projection
        weights = tf.get_variable(name="weights",
                                  shape=[self.num_units, self.num_features],
                                  dtype=tf.float32)
        bias = tf.get_variable(name="bias", shape=[self.num_features])

        # feed mask
        if self.feed_previous_prob is None:
            feed_mask = tf.fill(dims=[self.max_step, self.batch_size],
                                value=False)
        else:
            feed_mask = tf.random_uniform([self.max_step, self.batch_size],
                                          minval=0,
                                          maxval=1)
            feed_mask = feed_mask < self.feed_previous_prob

        def loop_fn_initial():
            initial_elements_finished = (0 >= self.sequence_length)

            initial_input = tf.zeros([self.batch_size, self.num_features],
                                     dtype=tf.float32)
            initial_cell_state = initial_state
            initial_cell_output = None
            initial_loop_state = None  # we don't need to pass any additional information

            return (initial_elements_finished, initial_input,
                    initial_cell_state, initial_cell_output,
                    initial_loop_state)

        def loop_fn_transition(time, cell_output, cell_state,
                               previous_loop_state):
            elements_finished = (time >= self.sequence_length)

            next_input = tf.where(
                feed_mask[time - 1],
                x=tf.tanh(tf.matmul(cell_output, weights) + bias),
                y=input_sequence[time - 1])
            loop_state = None

            return (elements_finished, next_input, cell_state, cell_output,
                    loop_state)

        def loop_fn(time, previous_output, previous_state,
                    previous_loop_state):
            if previous_state is None:  # time == 0
                assert previous_output is None and previous_state is None
                return loop_fn_initial()
            else:
                return loop_fn_transition(time, previous_output,
                                          previous_state, previous_loop_state)

        outputs_ta, final_state, _ = tf.nn.raw_rnn(cell=cell,
                                                   loop_fn=loop_fn,
                                                   swap_memory=True)
        outputs = outputs_ta.stack()
        outputs = flatten_time(outputs)
        outputs = tf.tanh(tf.matmul(outputs, weights) + bias)
        outputs = restore_time(outputs,
                               max_time=self.max_step,
                               batch_size=self.batch_size,
                               num_features=self.num_features)
        outputs.set_shape(
            [self.inputs.shape[0], self.inputs.shape[1], self.num_features])

        return outputs, final_state