def decoder_frequency_inputs(self) -> tf.Tensor:
        """
        Computes the frequency decoder RNN input sequences.
        
        At each time step, the input to the frequency decoder RNN is the expected output at the previous frequency step. 
        Thus, the decoder input sequences are the frequency encoder input sequences shifted by one step along the 
        frequency axis.
        
        Returns
        -------
        tf.Tensor
           The frequency decoder RNN input sequences, of shape [num_windows, batch_size*max_time, window_width]
        """
        # shape: [max_time * batch_size, num_features]
        decoder_frequency_inputs = flatten_time(self.targets)
        # shape: [num_windows, max_time * batch_size, window_width]
        decoder_frequency_inputs = window_features(inputs=decoder_frequency_inputs,
                                                   window_width=self.frequency_window_width,
                                                   window_overlap=self.frequency_window_overlap)

        num_windows = decoder_frequency_inputs.shape.as_list()[0]

        decoder_frequency_inputs = decoder_frequency_inputs[:num_windows - 1, :, :]
        decoder_frequency_inputs = tf.pad(decoder_frequency_inputs, paddings=[[1, 0], [0, 0], [0, 0]], mode="constant")

        return decoder_frequency_inputs
Exemple #2
0
    def encoder_inputs(self) -> tf.Tensor:
        """
        Returns the input sequences for the encoder.
        
        The encoder input sequences are built by splitting the input spectrograms into windows of width 
        `frequency_window_width` and overlap `frequency_window_overlap` along the frequency axis. These windows are then
        fed in order to the encoder RNN.
        
        Returns
        -------
        tf.Tensor
            The input sequences for the encoder
        """
        # shape: [max_time * batch_size, num_features]
        inputs_flat = flatten_time(self.inputs)

        # shape: [num_windows, max_time * batch_size, window_width]
        return window_features(inputs=inputs_flat,
                               window_width=self.frequency_window_width,
                               window_overlap=self.frequency_window_overlap)