def test_dynamic_bigru_state_consumed_only(self): units = 5 batch_size = 1 x_val = np.array([[1., 1.], [2., 2.], [3., 3.]], dtype=np.float32) x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") initializer = init_ops.constant_initializer(0.5) # bigru, no scope cell1 = cudnn_rnn.CudnnCompatibleGRUCell( units, kernel_initializer=initializer) cell2 = cudnn_rnn.CudnnCompatibleGRUCell( units, kernel_initializer=initializer) _, cell_state = tf.nn.bidirectional_dynamic_rnn(cell1, cell2, x, dtype=tf.float32) _ = tf.identity(cell_state, name="cell_state") feed_dict = {"input_1:0": x_val} input_names_with_port = ["input_1:0"] output_names_with_port = ["cell_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-3, atol=1e-06, graph_validator=lambda g: check_gru_count(g, 1))
def __init__(self, config): self.config = config h_dim = self.config["gnn_h_size"] num_edge_types = self.config["num_edge_types"] self.weights = {} edge_weights = tf.Variable(glorot_init([num_edge_types * h_dim, h_dim]), name="edge_weights") self.weights["edge_weights"] = tf.reshape( edge_weights, [num_edge_types, h_dim, h_dim]) if self.config["use_edge_bias"] == 1: self.weights["edge_biases"] = tf.Variable( np.zeros([num_edge_types, h_dim], dtype=np.float32), name="gnn_edge_biases", ) cell_type = config["graph_rnn_cell"] activation_fun = tf.nn.tanh if cell_type == "gru": cell = tf.compat.v1.keras.layers.GRUCell(h_dim, activation=activation_fun) elif cell_type == "cudnncompatiblegrucell": import tensorflow.contrib.cudnn_rnn as cudnn_rnn cell = cudnn_rnn.CudnnCompatibleGRUCell(h_dim) elif cell_type == "rnn": cell = tf.nn.rnn_cell.BasicRNNCell(h_dim, activation=activation_fun) else: raise Exception("Unknown RNN cell type '%s'." % cell_type) self.weights["rnn_cells"] = cell
def __init__(self, config): self.config = config h_dim = self.config['gnn_h_size'] num_edge_types = self.config['num_edge_types'] self.weights = {} edge_weights = tf.Variable(glorot_init([num_edge_types * h_dim, h_dim]), name='edge_weights') self.weights['edge_weights'] = tf.reshape( edge_weights, [num_edge_types, h_dim, h_dim]) if self.config['use_edge_bias'] == 1: self.weights['edge_biases'] = tf.Variable(np.zeros( [num_edge_types, h_dim], dtype=np.float32), name='gnn_edge_biases') cell_type = self.config['graph_rnn_cell'].lower() activation_fun = tf.nn.tanh if cell_type == 'gru': cell = tf.nn.rnn_cell.GRUCell(h_dim, activation=activation_fun) elif cell_type == 'cudnncompatiblegrucell': import tensorflow.contrib.cudnn_rnn as cudnn_rnn cell = cudnn_rnn.CudnnCompatibleGRUCell(h_dim) elif cell_type == 'rnn': cell = tf.nn.rnn_cell.BasicRNNCell(h_dim, activation=activation_fun) else: raise Exception("Unknown RNN cell type '%s'." % cell_type) self.weights['rnn_cells'] = cell
def test_single_dynamic_gru_seq_length_is_const(self): units = 5 batch_size = 1 x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.], [5., 5.]], dtype=np.float32) x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") initializer = init_ops.constant_initializer(0.5) # no scope cell = cudnn_rnn.CudnnCompatibleGRUCell(units, kernel_initializer=initializer) outputs, cell_state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32, sequence_length=[5]) _ = tf.identity(outputs, name="output") _ = tf.identity(cell_state, name="cell_state") feed_dict = {"input_1:0": x_val} input_names_with_port = ["input_1:0"] output_names_with_port = ["output:0", "cell_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-3, atol=1e-06, graph_validator=lambda g: check_gru_count(g, 1))
def test_dynamic_bidirectional_but_one_gru_and_output_consumed_only(self): units = 5 batch_size = 1 x_val = np.array([[1., 1.], [2., 2.], [3., 3.]], dtype=np.float32) x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") # bigru, no scope cell = cudnn_rnn.CudnnCompatibleGRUCell(units) outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell, cell, x, dtype=tf.float32) _ = tf.identity(outputs, name="output") feed_dict = {"input_1:0": x_val} input_names_with_port = ["input_1:0"] output_names_with_port = ["output:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-3, atol=1e-06, graph_validator=lambda g: check_gru_count(g, 1))
def test_single_dynamic_gru_random_weights2(self): hidden_size = 128 batch_size = 1 x_val = np.random.randn(1, 133).astype('f') x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") initializer = tf.random_uniform_initializer(0.0, 1.0) # no scope cell = cudnn_rnn.CudnnCompatibleGRUCell(hidden_size, kernel_initializer=initializer) outputs, cell_state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32) _ = tf.identity(outputs, name="output") _ = tf.identity(cell_state, name="cell_state") feed_dict = {"input_1:0": x_val} input_names_with_port = ["input_1:0"] output_names_with_port = ["output:0", "cell_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, 0.01, graph_validator=lambda g: check_gru_count(g, 1))
def test_single_dynamic_gru_placeholder_input(self): units = 5 x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]], dtype=np.float32) x_val = np.stack([x_val] * 1) x = tf.placeholder(tf.float32, shape=(None, 4, 2), name="input_1") initializer = init_ops.constant_initializer(0.5) # no scope cell = cudnn_rnn.CudnnCompatibleGRUCell(units, kernel_initializer=initializer) outputs, cell_state = tf.nn.dynamic_rnn( cell, x, dtype=tf.float32) # by default zero initializer is used _ = tf.identity(outputs, name="output") _ = tf.identity(cell_state, name="cell_state") feed_dict = {"input_1:0": x_val} input_names_with_port = ["input_1:0"] output_names_with_port = ["output:0", "cell_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-03, atol=1e-06, graph_validator=lambda g: check_gru_count(g, 1))
def bigru(self, x, seq_len, lstm_output_dims=None, lstm_layer_count=1, keep_prob=1.0, name="bigru"): x_shape = x.get_shape() input_dims = int(x_shape[-1]) max_seq_len = int(x_shape[-2]) u = int(input_dims / 2) if lstm_output_dims is None else lstm_output_dims with tf.variable_scope(name, reuse=tf.AUTO_REUSE): if len(x_shape) >= 4: x = tf.reshape(x, [-1, max_seq_len, input_dims]) seq_len = tf.reshape(seq_len, [-1]) for i in range(lstm_layer_count): with tf.variable_scope("lstm_layer_" + str(i + 1), reuse=tf.AUTO_REUSE): cell_fw = cudnn_rnn.CudnnCompatibleGRUCell(num_units=u) cell_bw = cudnn_rnn.CudnnCompatibleGRUCell(num_units=u) if keep_prob < 1.0 and self.is_training: cell_fw = tf.nn.rnn_cell.DropoutWrapper( cell_fw, output_keep_prob=keep_prob) cell_bw = tf.nn.rnn_cell.DropoutWrapper( cell_bw, output_keep_prob=keep_prob) outputs, _ = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, x, sequence_length=seq_len, dtype=tf.float32) x = tf.concat(outputs, axis=-1) if len(x_shape) >= 4: return tf.reshape( x, [-1 if s is None else s for s in x_shape.as_list()[:-2]] + [max_seq_len, u * 2]) else: return x
def make_encoder(time_inputs, encoder_features_depth, is_train, hparams, seed, transpose_output=True): """ Builds encoder, using CUDA RNN :param time_inputs: Input tensor, shape [batch, time, features] :param encoder_features_depth: Static size for features dimension :param is_train: :param hparams: :param seed: :param transpose_output: Transform RNN output to batch-first shape :return: """ def build_rnn(): return RNN( num_layers=hparams.encoder_rnn_layers, num_units=hparams.rnn_depth, #input_size=encoder_features_depth, kernel_initializer=tf.initializers.random_uniform( minval=-0.05, maxval=0.05, seed=seed + 1 if seed else None), direction='unidirectional', dropout=hparams.encoder_dropout if is_train else 0, seed=seed) cuda_model = build_rnn() # [batch, time, features] -> [time, batch, features] time_first = tf.transpose(time_inputs, [1, 0, 2]) rnn_time_input = time_first if RNN == tf.contrib.cudnn_rnn.CudnnLSTM: rnn_out, (rnn_state, c_state) = cuda_model(inputs=rnn_time_input) else: if USE_COMPATIBLE: with tf.variable_scope('cudnn_gru'): single_cell = lambda: cudnn_rnn.CudnnCompatibleGRUCell( num_units=hparams.rnn_depth, reuse=None, kernel_initializer=tf.initializers.random_uniform( minval=-0.05, maxval=0.05, seed=seed + 1 if seed else None)) cell = tf.nn.rnn_cell.MultiRNNCell( [single_cell() for _ in range(hparams.encoder_rnn_layers)]) rnn_out_wrong_order, ( rnn_state_wrong_order, ) = tf.nn.dynamic_rnn( cell, time_inputs, dtype=tf.float32) rnn_out = tf.transpose(rnn_out_wrong_order, [1, 0, 2]) rnn_state = tf.expand_dims(rnn_state_wrong_order, 0) else: rnn_out, (rnn_state, ) = cuda_model(inputs=rnn_time_input) c_state = None if transpose_output: rnn_out = tf.transpose(rnn_out, [1, 0, 2]) return rnn_out, rnn_state, c_state
def prepare_specific_graph_model(self) -> None: h_dim = self.params['hidden_size_node'] activation_name = self.params['graph_rnn_activation'].lower() if activation_name == 'tanh': activation_fun = tf.nn.tanh elif activation_name == 'relu': activation_fun = tf.nn.relu else: raise Exception("Unknown activation function type '%s'." % activation_name) self.GGNNWeights = collections.namedtuple('GGNNWeights', ['edge_weights', 'edge_biases', 'edge_type_attention_weights', 'rnn_cells', ]) # Generate per-layer values for edge weights, biases and gated units: self.weights = {} # Used by super-class to place generic things self.gnn_weights = { 'edge_weights': [], 'edge_biases': [], 'rnn_cells': [] } for layer_idx in range(len(self.params['layer_timesteps'])): with tf.variable_scope('gnn_layer_%i' % layer_idx): edge_weights = tf.Variable(utils.glorot_init([self.params['num_edge_types'] * h_dim, h_dim]), name='gnn_edge_weights_%i' % layer_idx) edge_weights = tf.reshape(edge_weights, [self.params['num_edge_types'], h_dim, h_dim]) edge_weights = tf.nn.dropout(edge_weights, keep_prob=self.placeholders['edge_weight_dropout_keep_prob']) self.gnn_weights['edge_weights'].append(edge_weights) if self.params['use_propagation_attention']: self.setup_attention_weights(layer_idx) if self.params['use_edge_bias']: self.gnn_weights['edge_biases'].append( tf.Variable(np.zeros([self.params['num_edge_types'], h_dim], dtype=np.float32), name='gnn_edge_biases_%i' % layer_idx)) cell_type = self.params['graph_rnn_cell'].lower() if cell_type == 'gru': cell = tf.nn.rnn_cell.GRUCell(h_dim, activation=activation_fun) elif cell_type == 'cudnncompatiblegrucell': assert (activation_name == 'tanh') import tensorflow.contrib.cudnn_rnn as cudnn_rnn cell = cudnn_rnn.CudnnCompatibleGRUCell(h_dim) elif cell_type == 'rnn': cell = tf.nn.rnn_cell.BasicRNNCell(h_dim, activation=activation_fun) else: raise Exception("Unknown RNN cell type '%s'." % cell_type) cell = tf.nn.rnn_cell.DropoutWrapper(cell, state_keep_prob=self.placeholders['graph_state_keep_prob']) self.gnn_weights['rnn_cells'].append(cell)
def test_multiple_dynamic_gru(self): units = 5 batch_size = 1 x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]], dtype=np.float32) x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") _ = tf.placeholder(tf.float32, x_val.shape, name="input_2") gru_output_list = [] gru_cell_state_list = [] # no scope cell = cudnn_rnn.CudnnCompatibleGRUCell(units) outputs, cell_state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32) gru_output_list.append(outputs) gru_cell_state_list.append(cell_state) # given scope cell = cudnn_rnn.CudnnCompatibleGRUCell(units) with variable_scope.variable_scope("root1") as scope: outputs, cell_state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32, sequence_length=[4], scope=scope) gru_output_list.append(outputs) gru_cell_state_list.append(cell_state) _ = tf.identity(gru_output_list, name="output") _ = tf.identity(gru_cell_state_list, name="cell_state") feed_dict = {"input_1:0": x_val} input_names_with_port = ["input_1:0"] output_names_with_port = ["output:0", "cell_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-3, atol=1e-06, graph_validator=lambda g: check_gru_count(g, 2))
def test_dynamic_gru_output_consumed_only(self): units = 5 batch_size = 6 x_val = np.array([[1., 1.], [2., 2.], [3., 3.]], dtype=np.float32) x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") initializer = tf.random_uniform_initializer(-1.0, 1.0) cell1 = cudnn_rnn.CudnnCompatibleGRUCell( units, kernel_initializer=initializer) outputs, _ = tf.nn.dynamic_rnn(cell1, x, dtype=tf.float32) _ = tf.identity(outputs, name="output") feed_dict = {"input_1:0": x_val} input_names_with_port = ["input_1:0"] output_names_with_port = ["output:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, 0.0001, graph_validator=lambda g: check_gru_count(g, 1))
def prepare_specific_graph_model(self) -> None: """ Prepare specific GGNN model. :return: none. """ h_dim = self.params['hidden_size'] self.placeholders['initial_node_representation'] = tf.placeholder( tf.float32, [None, h_dim], name='node_features') self.placeholders['adjacency_lists'] = [ tf.placeholder(tf.int32, [None, 2], name='adjacency_e%s' % e) for e in range(self.num_edge_types) ] self.placeholders['num_incoming_edges_per_type'] = tf.placeholder( tf.float32, [None, self.num_edge_types], name='num_incoming_edges_per_type') self.placeholders['graph_nodes_list'] = tf.placeholder( tf.int32, [None], name='graph_nodes_list') self.placeholders['graph_state_keep_prob'] = tf.placeholder( tf.float32, None, name='graph_state_keep_prob') self.placeholders['edge_weight_dropout_keep_prob'] = tf.placeholder( tf.float32, None, name='edge_weight_dropout_keep_prob') activation_name = self.params['graph_rnn_activation'].lower() if activation_name == 'tanh': activation_fun = tf.nn.tanh elif activation_name == 'relu': activation_fun = tf.nn.relu else: raise Exception("Unknown activation function type '%s'." % activation_name) # Generate per-layer values for edge weights, biases and gated units: self.weights = {} # Used by super-class to place generic things self.gnn_weights = GGNNWeights([], [], [], []) for layer_idx in range(len(self.params['layer_timesteps'])): with tf.variable_scope('gnn_layer_%i' % layer_idx): edge_weights = tf.Variable( glorot_init([self.num_edge_types * h_dim, h_dim]), name='gnn_edge_weights_%i' % layer_idx) edge_weights = tf.reshape(edge_weights, [self.num_edge_types, h_dim, h_dim]) edge_weights = tf.nn.dropout( edge_weights, keep_prob=self. placeholders['edge_weight_dropout_keep_prob']) self.gnn_weights.edge_weights.append(edge_weights) # Note we did not use propagation attention. if self.params['use_propagation_attention']: self.gnn_weights.edge_type_attention_weights.append( tf.Variable(np.ones([self.num_edge_types], dtype=np.float32), name='edge_type_attention_weights_%i' % layer_idx)) # Note we did not use edge biases. if self.params['use_edge_bias']: self.gnn_weights.edge_biases.append( tf.Variable(np.zeros([self.num_edge_types, h_dim], dtype=np.float32), name='gnn_edge_biases_%i' % layer_idx)) cell_type = self.params['graph_rnn_cell'].lower() if cell_type == 'gru': cell = tf.nn.rnn_cell.GRUCell(h_dim, activation=activation_fun) elif cell_type == 'cudnncompatiblegrucell': assert (activation_name == 'tanh') import tensorflow.contrib.cudnn_rnn as cudnn_rnn cell = cudnn_rnn.CudnnCompatibleGRUCell(h_dim) elif cell_type == 'rnn': cell = tf.nn.rnn_cell.BasicRNNCell( h_dim, activation=activation_fun) else: raise Exception("Unknown RNN cell type '%s'." % cell_type) cell = tf.nn.rnn_cell.DropoutWrapper( cell, state_keep_prob=self.placeholders['graph_state_keep_prob']) self.gnn_weights.rnn_cells.append(cell)
def prepare_specific_graph_model(self) -> None: word_dim = self.params['word_embedding_size'] type_dim = self.params['type_embedding_size'] h_dim = self.params['hidden_size'] self.placeholders['initial_word_ids'] = tf.placeholder(tf.int32, [None, self.params['max_node_length']], name='word_ids') self.placeholders['initial_type_ids'] = tf.placeholder(tf.int32, [None, self.params['max_node_length']], name='type_ids') self.placeholders['candidates'] = tf.placeholder(tf.float32, [None, 1], name='candidates') self.placeholders['slots'] = tf.placeholder(tf.int32, [None], name='slots') self.placeholders['num_candidates_per_graph'] = tf.placeholder(tf.int32, [self.params['batch_size']], name='num_candidates') self.placeholders['adjacency_lists'] = [tf.placeholder(tf.int32, [None, 2], name='adjacency_e%s' % e) for e in range(self.num_edge_types)] self.placeholders['num_incoming_edges_per_type'] = tf.placeholder(tf.float32, [None, self.num_edge_types], name='num_incoming_edges_per_type') self.placeholders['graph_nodes_list'] = tf.placeholder(tf.int32, [None], name='graph_nodes_list') self.placeholders['graph_state_keep_prob'] = tf.placeholder(tf.float32, None, name='graph_state_keep_prob') self.placeholders['edge_weight_dropout_keep_prob'] = tf.placeholder(tf.float32, None, name='edge_weight_dropout_keep_prob') activation_name = self.params['graph_rnn_activation'].lower() if activation_name == 'tanh': activation_fun = tf.nn.tanh elif activation_name == 'relu': activation_fun = tf.nn.relu else: raise Exception("Unknown activation function type '%s'." % activation_name) # create embeddings with tf.variable_scope('embedding_layers'): self.word_embedding = tf.Variable(glorot_init([len(self.vocabs) + 2, word_dim]), name='word_embed') self.type_embedding = tf.Variable(glorot_init([len(self.type_hierarchy[0]['types']) + 2, type_dim]), name='type_embed') self.init_node_weights = tf.Variable(glorot_init([word_dim + type_dim + 1, h_dim]), name='embed_layer_w') self.init_node_bias = tf.Variable(np.zeros([h_dim]), name='embed_layer_b', dtype=tf.float32) # Generate per-layer values for edge weights, biases and gated units: self.weights = {} # Used by super-class to place generic things self.gnn_weights = GGNNWeights([], [], [], []) for layer_idx in range(len(self.params['layer_timesteps'])): with tf.variable_scope('gnn_layer_%i' % layer_idx): edge_weights = tf.Variable(glorot_init([self.num_edge_types * h_dim, h_dim]), name='gnn_edge_weights_%i' % layer_idx) edge_weights = tf.reshape(edge_weights, [self.num_edge_types, h_dim, h_dim]) edge_weights = tf.nn.dropout(edge_weights, keep_prob=self.placeholders['edge_weight_dropout_keep_prob']) self.gnn_weights.edge_weights.append(edge_weights) if self.params['use_propagation_attention']: self.gnn_weights.edge_type_attention_weights.append( tf.Variable(np.ones([self.num_edge_types], dtype=np.float32), name='edge_type_attention_weights_%i' % layer_idx)) if self.params['use_edge_bias']: self.gnn_weights.edge_biases.append( tf.Variable(np.zeros([self.num_edge_types, h_dim], dtype=np.float32), name='gnn_edge_biases_%i' % layer_idx)) cell_type = self.params['graph_rnn_cell'].lower() if cell_type == 'gru': cell = tf.nn.rnn_cell.GRUCell(h_dim, activation=activation_fun) elif cell_type == 'cudnncompatiblegrucell': assert (activation_name == 'tanh') import tensorflow.contrib.cudnn_rnn as cudnn_rnn cell = cudnn_rnn.CudnnCompatibleGRUCell(h_dim) elif cell_type == 'rnn': cell = tf.nn.rnn_cell.BasicRNNCell(h_dim, activation=activation_fun) else: raise Exception("Unknown RNN cell type '%s'." % cell_type) cell = tf.nn.rnn_cell.DropoutWrapper(cell, state_keep_prob=self.placeholders['graph_state_keep_prob']) self.gnn_weights.rnn_cells.append(cell)
def decoder(self, memory): """ Implementation of the Tacotron decoder network. Arguments: memory (tf.Tensor): The output states of the encoder RNN concatenated over time. Its shape is expected to be shape=(B, T_sent, 2 * encoder.n_gru_units) with B being the batch size, T_sent being the number of tokens in the sentence including the EOS token. Returns: tf.tensor: Generated reduced Mel. spectrogram. The shape is shape=(B, T_spec // r, n_mels * r), with B being the batch size, T_spec being the number of frames in the spectrogram and r being the reduction factor. """ with tf.variable_scope('decoder2'): # Query the current batch size. batch_size = tf.shape(memory)[0] # Query the number of layers for the decoder RNN. n_decoder_layers = self.hparams.decoder.n_gru_layers # Query the number of units for the decoder cells. n_decoder_units = self.hparams.decoder.n_decoder_gru_units # Query the number of units for the attention cell. n_attention_units = self.hparams.decoder.n_attention_units # General attention mechanism parameters that are the same for all mechanisms. mechanism_params = { 'num_units': n_attention_units, 'memory': memory, } if model_params.attention.mechanism == LocalLuongAttention: # Update the parameters with additional parameters for the local attention case. mechanism_params.update({ 'attention_mode': model_params.attention.luong_local_mode, 'score_mode': model_params.attention.luong_local_score, 'd': model_params.attention.luong_local_window_D, 'force_gaussian': model_params.attention.luong_force_gaussian, 'const_batch_size': 16 }) # Create the attention mechanism. attention_mechanism = model_params.attention.mechanism( **mechanism_params) # Create the attention RNN cell. if model_params.force_cudnn: attention_cell = tfcrnn.CudnnCompatibleGRUCell( num_units=n_attention_units) else: attention_cell = tf.nn.rnn_cell.GRUCell( num_units=n_attention_units) # Apply the pre-net to each decoder input as show in [1], figure 1. attention_cell = PrenetWrapper(attention_cell, self.hparams.decoder.pre_net_layers, self.is_training()) # Select the attention wrapper needed for the current attention mechanism. if model_params.attention.mechanism == LocalLuongAttention: wrapper = AdvancedAttentionWrapper else: wrapper = tfc.seq2seq.AttentionWrapper # Connect the attention cell with the attention mechanism. wrapped_attention_cell = wrapper( cell=attention_cell, attention_mechanism=attention_mechanism, attention_layer_size=n_attention_units, alignment_history=True, output_attention=True, initial_cell_state=None ) # => (B, T_sent, n_attention_units) = (B, T_sent, 256) # Stack several GRU cells and apply a residual connection after each cell. # Before the input reaches the decoder RNN it passes through the attention cell. cells = [wrapped_attention_cell] for i in range(n_decoder_layers): # Create a decoder GRU cell. if model_params.force_cudnn: # => (B, T_spec, n_decoder_units) = (B, T_spec, 256) cell = tfcrnn.CudnnCompatibleGRUCell( num_units=n_decoder_units) else: # => (B, T_spec, n_decoder_units) = (B, T_spec, 256) cell = tf.nn.rnn_cell.GRUCell(num_units=n_decoder_units) # => (B, T_spec, n_decoder_units) = (B, T_spec, 256) cell = tf.nn.rnn_cell.ResidualWrapper(cell) cells.append(cell) # => (B, T_spec, n_decoder_units) = (B, T_spec, 256) decoder_cell = tf.nn.rnn_cell.MultiRNNCell(cells, state_is_tuple=True) # Project the final cells output to the decoder target size. # => (B, T_spec, target_size * reduction) = (B, T_spec, 80 * reduction) output_cell = tfc.rnn.OutputProjectionWrapper( cell=decoder_cell, output_size=self.hparams.decoder.target_size * self.hparams.reduction, # activation=tf.nn.sigmoid ) decoder_initial_state = output_cell.zero_state( batch_size=batch_size, dtype=tf.float32) if self.is_training(): # During training we do not stop decoding manually. The decoder automatically # decodes as many time steps as are contained in the ground truth data. maximum_iterations = None # Unfold the reduced spectrogram in order to grab the r'th ground truth frames. mel_targets = tf.reshape(self.inp_mel_spec, [batch_size, -1, self.hparams.n_mels]) # Create a custom training helper for feeding ground truth frames during training. helper = TacotronTrainingHelper( batch_size=batch_size, outputs=mel_targets, input_size=self.hparams.decoder.target_size, reduction_factor=self.hparams.reduction, ) elif self._mode == Mode.EVAL: # During evaluation we stop decoding after the same number of frames the ground # truth has. maximum_iterations = tf.shape(self.inp_mel_spec)[1] # Create a custom inference helper that handles proper evaluation data feeding. helper = TacotronInferenceHelper( batch_size=batch_size, input_size=self.hparams.decoder.target_size) else: # During inference we stop decoding after `maximum_iterations` frames. maximum_iterations = self.hparams.decoder.maximum_iterations // self.hparams.reduction # Create a custom inference helper that handles proper inference data feeding. helper = TacotronInferenceHelper( batch_size=batch_size, input_size=self.hparams.decoder.target_size) decoder = seq2seq.BasicDecoder(cell=output_cell, helper=helper, initial_state=decoder_initial_state) # Start decoding. decoder_outputs, final_state, final_sequence_lengths = seq2seq.dynamic_decode( decoder=decoder, output_time_major=False, impute_finished=False, maximum_iterations=maximum_iterations) # decoder_outputs => type=BasicDecoderOutput, (rnn_output, _) # final_state => type=AttentionWrapperState, (attention_wrapper_state, _, _) # final_sequence_lengths.shape = (B) # Create an attention alignment summary image. self.alignment_history = final_state[0].alignment_history.stack() # shape => (B, T_spec // r, n_mels * r) return decoder_outputs.rnn_output