def decoder_frequency_inputs(self) -> tf.Tensor: """ Computes the frequency decoder RNN input sequences. At each time step, the input to the frequency decoder RNN is the expected output at the previous frequency step. Thus, the decoder input sequences are the frequency encoder input sequences shifted by one step along the frequency axis. Returns ------- tf.Tensor The frequency decoder RNN input sequences, of shape [num_windows, batch_size*max_time, window_width] """ # shape: [max_time * batch_size, num_features] decoder_frequency_inputs = flatten_time(self.targets) # shape: [num_windows, max_time * batch_size, window_width] decoder_frequency_inputs = window_features(inputs=decoder_frequency_inputs, window_width=self.frequency_window_width, window_overlap=self.frequency_window_overlap) num_windows = decoder_frequency_inputs.shape.as_list()[0] decoder_frequency_inputs = decoder_frequency_inputs[:num_windows - 1, :, :] decoder_frequency_inputs = tf.pad(decoder_frequency_inputs, paddings=[[1, 0], [0, 0], [0, 0]], mode="constant") return decoder_frequency_inputs
def encoder_inputs(self) -> tf.Tensor: """ Returns the input sequences for the encoder. The encoder input sequences are built by splitting the input spectrograms into windows of width `frequency_window_width` and overlap `frequency_window_overlap` along the frequency axis. These windows are then fed in order to the encoder RNN. Returns ------- tf.Tensor The input sequences for the encoder """ # shape: [max_time * batch_size, num_features] inputs_flat = flatten_time(self.inputs) # shape: [num_windows, max_time * batch_size, window_width] return window_features(inputs=inputs_flat, window_width=self.frequency_window_width, window_overlap=self.frequency_window_overlap)
def decoder_frequency_initial_state(self) -> tf.Tensor: """ The initial states of the frequency decoder RNN. The outputs of the time decoder RNN at each time step are passed through a linear transformation layer with hyperbolic tangent activation, and used as the initial states of the frequency decoder RNN. Returns ------- tf.Tensor The initial states of the frequency decoder RNN """ # shape: [max_time, batch_size, decoder_time.output_size] decoder_frequency_initial_state = self.decoder_time.output # shape: [max_time * batch_size, decoder_time.output_size] decoder_frequency_initial_state = flatten_time(decoder_frequency_initial_state) decoder_frequency_initial_state = tf.tanh(linear(decoder_frequency_initial_state, output_size=self.f_decoder_architecture.state_size)) return decoder_frequency_initial_state
def _feed_previous_rnn(self, cell, initial_state, reverse: bool) -> (tf.Tensor, Any): """ Implements a unidirectional RNN with stochastic feeding of previous RNN outputs. Since the required functionality is not directly supported by the `tf.contrib.rnn` module, this method implements a custom loop function used with the `raw_rnn` function. Parameters ---------- cell The RNN cell to use initial_state A possibly nested tuple of initial states for the RNN cells reverse: bool Whether to reverse the input sequence Returns ------- output: tf.Tensor The output sequence of the RNN after applying the output projection final_state: Any A possible nested tuple of final states of the RNN cells, with the same structure as the `initial_state` tuple """ # input sequence if reverse: input_sequence = tf.reverse(self.inputs, axis=[0]) else: input_sequence = self.inputs # output projection weights = tf.get_variable(name="weights", shape=[self.num_units, self.num_features], dtype=tf.float32) bias = tf.get_variable(name="bias", shape=[self.num_features]) # feed mask if self.feed_previous_prob is None: feed_mask = tf.fill(dims=[self.max_step, self.batch_size], value=False) else: feed_mask = tf.random_uniform([self.max_step, self.batch_size], minval=0, maxval=1) feed_mask = feed_mask < self.feed_previous_prob def loop_fn_initial(): initial_elements_finished = (0 >= self.sequence_length) initial_input = tf.zeros([self.batch_size, self.num_features], dtype=tf.float32) initial_cell_state = initial_state initial_cell_output = None initial_loop_state = None # we don't need to pass any additional information return (initial_elements_finished, initial_input, initial_cell_state, initial_cell_output, initial_loop_state) def loop_fn_transition(time, cell_output, cell_state, previous_loop_state): elements_finished = (time >= self.sequence_length) next_input = tf.where( feed_mask[time - 1], x=tf.tanh(tf.matmul(cell_output, weights) + bias), y=input_sequence[time - 1]) loop_state = None return (elements_finished, next_input, cell_state, cell_output, loop_state) def loop_fn(time, previous_output, previous_state, previous_loop_state): if previous_state is None: # time == 0 assert previous_output is None and previous_state is None return loop_fn_initial() else: return loop_fn_transition(time, previous_output, previous_state, previous_loop_state) outputs_ta, final_state, _ = tf.nn.raw_rnn(cell=cell, loop_fn=loop_fn, swap_memory=True) outputs = outputs_ta.stack() outputs = flatten_time(outputs) outputs = tf.tanh(tf.matmul(outputs, weights) + bias) outputs = restore_time(outputs, max_time=self.max_step, batch_size=self.batch_size, num_features=self.num_features) outputs.set_shape( [self.inputs.shape[0], self.inputs.shape[1], self.num_features]) return outputs, final_state