def _create_single_cell(cell_fn, num_units, is_residual=False, is_dropout=False, keep_prob=None): """Create single RNN cell based on cell_fn.""" cell = cell_fn(num_units) if is_dropout: cell = DropoutWrapper(cell, input_keep_prob=keep_prob) if is_residual: cell = ResidualWrapper(cell) return cell
def attention_decoder(inputs, memory, num_units=None, batch_size=1, inputs_length=None, n_mels=80, reduction=1, default_max_iters=200, is_training=True, scope='attention_decoder', reuse=None): """ Applies a GRU to 'inputs', while attending 'memory'. :param inputs: A 3d tensor with shape of [N, T', C']. Decoder inputs. :param memory: A 3d tensor with shape of [N, T, C]. Outputs of encoder network. :param num_units: An int. Attention size. :param batch_size: An int. Batch size. :param inputs_length: An int. Memory length. :param n_mels: An int. Number of Mel banks to generate. :param reduction: An int. Reduction factor. Paper => 2, 3, 5. :param default_max_iters: Default max iteration of decoding. :param is_training: running mode. :param scope: Optional scope for `variable_scope`. :param reuse: Boolean, whether to reuse the weights of a previous layer by the same name. :return: A 3d tensor with shape of [N, T, num_units]. """ with tf.variable_scope(scope, reuse=reuse): # params setting if is_training: max_iters = None else: max_iters = default_max_iters # max_iters = default_max_iters if num_units is None: num_units = inputs.get_shape().as_list()[-1] # Decoder cell decoder_cell = tf.nn.rnn_cell.GRUCell(num_units) # Attention # [N, T_in, attention_depth] attention_cell = AttentionWrapper(decoder_cell, BahdanauAttention(num_units, memory), alignment_history=True) # Concatenate attention context vector and RNN cell output into a 2*attention_depth=512D vector. # [N, T_in, 2*attention_depth] concat_cell = ConcatOutputAndAttentionWrapper(attention_cell) # Decoder (layers specified bottom to top): # [N, T_in, decoder_depth] decoder_cell = MultiRNNCell([ OutputProjectionWrapper(concat_cell, num_units), ResidualWrapper(GRUCell(num_units)), ResidualWrapper(GRUCell(num_units)) ], state_is_tuple=True) # Project onto r mel spectrogram (predict r outputs at each RNN step): output_cell = OutputProjectionWrapper(decoder_cell, n_mels * reduction) decoder_init_state = output_cell.zero_state(batch_size=batch_size, dtype=tf.float32) if is_training: # helper = TacotronTrainingHelper(batch_size, n_mels, reduction, inputs) helper = tf.contrib.seq2seq.TrainingHelper( inputs=inputs, sequence_length=inputs_length, time_major=False) else: helper = TacotronInferenceHelper(batch_size, n_mels, reduction) decoder = BasicDecoder(output_cell, helper, decoder_init_state) # [N, T_out/r, M*r] (decoder_outputs, _), final_decoder_state, _ = dynamic_decode( decoder, maximum_iterations=max_iters) return decoder_outputs, final_decoder_state