def _build_train(self, config):
     # decode
     if config.model_name == "fasttext_flat":
         self.logits = tf.contrib.layers.fully_connected(
             self.first_attention, config.fn_classes, activation_fn=None)
         print("logits:", self.logits.get_shape())
         self.logits = tf.reshape(self.logits, [-1, config.fn_classes])
     elif config.model_name == "RCNN_flat":
         self.logits = tf.contrib.layers.fully_connected(self.xx_final,
                                                         config.fn_classes,
                                                         activation_fn=None)
         print("logits:", self.logits.get_shape())
         self.logits = tf.reshape(self.logits, [-1, config.fn_classes])
     else:
         encoder_state = rnn.LSTMStateTuple(self.xx_final, self.xx_final)
         if config.use_att:
             attention_mechanism = BahdanauAttention(
                 config.decode_size,
                 memory=self.xx_context,
                 memory_sequence_length=self.x_seq_length)
             cell = AttentionWrapper(self.lstm,
                                     attention_mechanism,
                                     output_attention=False)
             cell_state = cell.zero_state(dtype=tf.float32,
                                          batch_size=config.batch_size)
             cell_state = cell_state.clone(cell_state=encoder_state,
                                           attention=self.first_attention)
             train_helper = TrainingHelper(self.yy, self.y_seq_length)
             train_decoder = BasicDecoder(cell,
                                          train_helper,
                                          cell_state,
                                          output_layer=self.output_l)
             self.decoder_outputs_train, decoder_state_train, decoder_seq_train = dynamic_decode(
                 train_decoder, impute_finished=True)
             self.logits = self.decoder_outputs_train.rnn_output
             # self.logits = tf.reshape(self.logits, [-1, config.max_seq_length, config.hn_classes])
             print("logits:", self.logits.get_shape())
         else:
             cell = self.lstm
             train_helper = TrainingHelper(self.yy, self.y_seq_length)
             train_decoder = BasicDecoder(cell,
                                          train_helper,
                                          encoder_state,
                                          output_layer=self.output_l)
             self.decoder_outputs_train, decoder_state_train, decoder_seq_train = dynamic_decode(
                 train_decoder, impute_finished=True)
             self.logits = self.decoder_outputs_train.rnn_output
             # self.logits = tf.reshape(self.logits, [-1, config.max_seq_length, config.hn_classes])
             print("logits:", self.logits.get_shape())
예제 #2
0
    def call(self, source, is_training=None, is_validation=None, teacher_forcing=False, memory_sequence_length=None,
             target=None):
        assert is_training is not None

        prenets = tuple([PreNet(out_unit, is_training, self._drop_rate)
                         for out_unit in self._prenet_out_units])

        batch_size = tf.shape(source)[0]
        attention_cell = AttentionRNNV1(self.attention_out_units, prenets, source, memory_sequence_length)
        decoder_cell = DecoderRNNV1(self.decoder_out_units, attention_cell)
        output_and_done_cell = OutputAndStopTokenWrapper(decoder_cell, self.num_mels * self.outputs_per_step)

        decoder_initial_state = output_and_done_cell.zero_state(batch_size, dtype=tf.float32)

        helper = TrainingHelper(target,
                                self.num_mels,
                                self.outputs_per_step,
                                n_feed_frame=self.n_feed_frame) if is_training \
            else ValidationHelper(target, batch_size,
                                  self.num_mels,
                                  self.outputs_per_step,
                                  n_feed_frame=self.n_feed_frame,
                                  teacher_forcing=teacher_forcing) if is_validation \
            else StopTokenBasedInferenceHelper(batch_size,
                                               self.num_mels,
                                               self.outputs_per_step,
                                               n_feed_frame=self.n_feed_frame)

        ((decoder_outputs, stop_token), _), final_decoder_state, _ = tf.contrib.seq2seq.dynamic_decode(
            BasicDecoder(output_and_done_cell, helper, decoder_initial_state), maximum_iterations=self.max_iters)

        mel_output = tf.reshape(decoder_outputs, [batch_size, -1, self.num_mels])
        return mel_output, stop_token, final_decoder_state
예제 #3
0
    def update(self, hparams):
        with tf.variable_scope('inference') as scope:
            self._hparams = hparams
            hp = self._hparams
            (decoder_outputs,
             _), final_decoder_state, _ = tf.contrib.seq2seq.dynamic_decode(
                 BasicDecoder(self.output_cell, self.helper,
                              self.decoder_init_state),
                 maximum_iterations=hp.max_iters)  # [N, T_out/r, M*r]

            # Reshape outputs to be one output per entry
            mel_outputs = tf.reshape(
                decoder_outputs,
                [self.batch_size, -1, hp.num_mels])  # [N, T_out, M]

            # Add post-processing CBHG:
            post_outputs = post_cbhg(mel_outputs,
                                     hp.num_mels,
                                     is_training=False,
                                     is_updating=True)  # [N, T_out, 256]
            linear_outputs = tf.layers.dense(post_outputs,
                                             hp.num_freq,
                                             reuse=True)  # [N, T_out, F]

            # Grab alignments from the final decoder state:
            alignments = tf.transpose(
                final_decoder_state[0].alignment_history.stack(), [1, 2, 0])

            self.mel_outputs = mel_outputs
            self.linear_outputs = linear_outputs
            self.alignments = alignments
            log('Updated Tacotron model.')
예제 #4
0
    def decoder_train(self, decoder_cell, decoder_initial_state, output_layer):
        '''
        创建train的decoder部分
        :param encoder_outputs: encoder的输出
        :param encoder_state: encoder的state
        :return: decoder_logits_train: decoder的predict
        '''
        ending = tf.strided_slice(self.decoder_targets, [0, 0],
                                  [self.batch_size, -1], [1, 1])
        decoder_input = tf.concat(
            [tf.fill([self.batch_size, 1], self.word_to_idx['<GO>']), ending],
            1)
        decoder_inputs_embedded = tf.nn.embedding_lookup(
            self.embedding, decoder_input)

        training_helper = TrainingHelper(
            inputs=decoder_inputs_embedded,
            sequence_length=self.decoder_targets_length,
            time_major=False,
            name='training_helper')
        training_decoder = BasicDecoder(cell=decoder_cell,
                                        helper=training_helper,
                                        initial_state=decoder_initial_state,
                                        output_layer=output_layer)
        decoder_outputs, _, _ = dynamic_decode(
            decoder=training_decoder,
            impute_finished=True,
            maximum_iterations=self.max_target_sequence_length)
        decoder_logits_train = tf.identity(decoder_outputs.rnn_output)
        return decoder_logits_train
예제 #5
0
    def decoder_decode(self, decoder_cell, decoder_initial_state,
                       output_layer):
        start_tokens = tf.ones([
            self.batch_size,
        ], tf.int32) * self.word_to_idx['<GO>']
        end_token = self.word_to_idx['<EOS>']

        if self.beam_search:
            inference_decoder = BeamSearchDecoder(
                cell=decoder_cell,
                embedding=self.embedding,
                start_tokens=start_tokens,
                end_token=end_token,
                initial_state=decoder_initial_state,
                beam_width=self.beam_size,
                output_layer=output_layer)
        else:
            decoding_helper = GreedyEmbeddingHelper(embedding=self.embedding,
                                                    start_tokens=start_tokens,
                                                    end_token=end_token)
            inference_decoder = BasicDecoder(
                cell=decoder_cell,
                helper=decoding_helper,
                initial_state=decoder_initial_state,
                output_layer=output_layer)

        decoder_outputs, _, _ = dynamic_decode(decoder=inference_decoder,
                                               maximum_iterations=50)
        if self.beam_search:
            decoder_predict_decode = decoder_outputs.predicted_ids
        else:
            decoder_predict_decode = tf.expand_dims(decoder_outputs.sample_id,
                                                    -1)
        return decoder_predict_decode
예제 #6
0
 def build_decoder(self, encoder_outputs, encoder_final_state):
     """
     构建完整解码器
     :return:
     """
     with tf.variable_scope("decode"):
         decoder_cell, decoder_initial_state = self.build_decoder_cell(
             encoder_outputs, encoder_final_state, self.hidden_size,
             self.cell_type, self.layer_size)
         # 输出层投影
         decoder_output_projection = layers.Dense(
             self.decoder_vocab_size,
             dtype=tf.float32,
             use_bias=False,
             kernel_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                stddev=0.1),
             name='decoder_output_projection')
         if self.mode == 'train':
             # 训练模式
             decoder_inputs_embdedded = tf.nn.embedding_lookup(
                 self.decoder_embeddings, self.decoder_inputs_train)
             training_helper = TrainingHelper(
                 inputs=decoder_inputs_embdedded,
                 sequence_length=self.decoder_inputs_length,
                 name='training_helper')
             training_decoder = BasicDecoder(decoder_cell, training_helper,
                                             decoder_initial_state,
                                             decoder_output_projection)
             max_decoder_length = tf.reduce_max(self.decoder_inputs_length)
             training_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                 training_decoder, maximum_iterations=max_decoder_length)
             self.masks = tf.sequence_mask(self.decoder_inputs_length,
                                           maxlen=max_decoder_length,
                                           dtype=tf.float32,
                                           name='masks')
             self.loss = tf.contrib.seq2seq.sequence_loss(
                 logits=training_decoder_output.rnn_output,
                 targets=self.decoder_inputs,
                 weights=self.masks,
                 average_across_timesteps=True,
                 average_across_batch=True)
         else:
             # 预测模式
             start_token = [DataUnit.START_INDEX] * self.batch_size
             end_token = DataUnit.END_INDEX
             inference_decoder = BeamSearchDecoder(
                 cell=decoder_cell,
                 embedding=lambda x: tf.nn.embedding_lookup(
                     self.decoder_embeddings, x),
                 start_tokens=start_token,
                 end_token=end_token,
                 initial_state=decoder_initial_state,
                 beam_width=self.beam_width,
                 output_layer=decoder_output_projection)
             inference_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                 inference_decoder, maximum_iterations=self.max_decode_step)
             self.decoder_pred_decode = inference_decoder_output.predicted_ids
             self.decoder_pred_decode = tf.transpose(
                 self.decoder_pred_decode, perm=[0, 2, 1])
예제 #7
0
def decoder(x, decoder_inputs, keep_prob, sequence_length, memory,
            memory_length, first_attention):
    with tf.variable_scope("Decoder") as scope:
        label_embeddings = tf.get_variable(name="embeddings",
                                           shape=[n_classes, embedding_size],
                                           dtype=tf.float32)
        train_inputs_embedded = tf.nn.embedding_lookup(label_embeddings,
                                                       decoder_inputs)
        lstm = rnn.LayerNormBasicLSTMCell(n_hidden,
                                          dropout_keep_prob=keep_prob)
        output_l = layers_core.Dense(n_classes, use_bias=True)
        encoder_state = rnn.LSTMStateTuple(x, x)
        attention_mechanism = BahdanauAttention(
            embedding_size,
            memory=memory,
            memory_sequence_length=memory_length)
        cell = AttentionWrapper(lstm,
                                attention_mechanism,
                                output_attention=False)
        cell_state = cell.zero_state(dtype=tf.float32,
                                     batch_size=train_batch_size)
        cell_state = cell_state.clone(cell_state=encoder_state,
                                      attention=first_attention)
        train_helper = TrainingHelper(train_inputs_embedded, sequence_length)
        train_decoder = BasicDecoder(cell,
                                     train_helper,
                                     cell_state,
                                     output_layer=output_l)
        decoder_outputs_train, decoder_state_train, decoder_seq_train = dynamic_decode(
            train_decoder, impute_finished=True)
        tiled_inputs = tile_batch(memory, multiplier=beam_width)
        tiled_sequence_length = tile_batch(memory_length,
                                           multiplier=beam_width)
        tiled_first_attention = tile_batch(first_attention,
                                           multiplier=beam_width)
        attention_mechanism = BahdanauAttention(
            embedding_size,
            memory=tiled_inputs,
            memory_sequence_length=tiled_sequence_length)
        x2 = tile_batch(x, beam_width)
        encoder_state2 = rnn.LSTMStateTuple(x2, x2)
        cell = AttentionWrapper(lstm,
                                attention_mechanism,
                                output_attention=False)
        cell_state = cell.zero_state(dtype=tf.float32,
                                     batch_size=test_batch_size * beam_width)
        cell_state = cell_state.clone(cell_state=encoder_state2,
                                      attention=tiled_first_attention)
        infer_decoder = BeamSearchDecoder(cell,
                                          embedding=label_embeddings,
                                          start_tokens=[GO] * test_len,
                                          end_token=EOS,
                                          initial_state=cell_state,
                                          beam_width=beam_width,
                                          output_layer=output_l)
        decoder_outputs_infer, decoder_state_infer, decoder_seq_infer = dynamic_decode(
            infer_decoder, maximum_iterations=4)
        return decoder_outputs_train, decoder_outputs_infer, decoder_state_infer
예제 #8
0
    def build_train_decoder(self):
        print('Building train decoder...')

        ending = tf.strided_slice(self.decoder_targets, [0, 0], [self.batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([self.batch_size, 1], self.word_to_id['<GO>']), ending], 1)
        decoder_inputs_embedded = tf.nn.embedding_lookup(self.embedding, decoder_input)

        if self.teacher_forcing:
            training_helper = ScheduledEmbeddingTrainingHelper(
                inputs=decoder_inputs_embedded,
                sequence_length=self.decoder_targets_length,
                embedding=self.embedding,
                sampling_probability=self.teacher_forcing_probability,
                time_major=False,
                name='teacher_forcing_training_helper'
            )
        else:
            training_helper = TrainingHelper(
                inputs=decoder_inputs_embedded,
                sequence_length=self.decoder_targets_length,
                time_major=False,
                name='training_helper'
            )

        training_decoder = BasicDecoder(
            cell=self.decoder_cell,
            helper=training_helper,
            initial_state=self.decoder_initial_state,
            output_layer=self.output_layer
        )

        decoder_outputs, _, _ = dynamic_decode(
            decoder=training_decoder,
            impute_finished=True,
            maximum_iterations=self.max_target_sequence_length
        )

        self.decoder_logits_train = tf.identity(decoder_outputs.rnn_output)

        # loss
                #This is the weighted cross-entropy loss for a sequence of logits.
                #Param:
                    #logits: [batch_size, sequence_length, num_decoder_symbols].
                    #        The logits is the prediction across all classes at each timestep.
                    #targets: [batch_size, sequence_length], representing true class at each time step
                    #weights: [batch_size, sequence_length], This is the weighting of each prediction in the sequence. 
      
        self.loss = sequence_loss(
            logits=self.decoder_logits_train,
            targets=self.decoder_targets,
            weights=self.mask
        )

         # summary
        tf.summary.scalar('loss', self.loss) #Outputs a Summary protocol buffer containing a single scalar value.
        self.summary_op = tf.summary.merge_all() #Merges all summaries collected in the default graph.

        self.build_optimizer()
예제 #9
0
    def decoder(self,
                initial_state,
                x_dec_onehot,
                len_dec,
                is_teacher_forcing=False,
                reuse=False):
        # decoder
        with tf.variable_scope("decoder", reuse=reuse):
            dropout_keep_prob = self.config.word_dropout_keep_prob
            is_argmax_sampling = self.config.is_argmax_sampling
            in_dec = self._soft_embedding_lookup(self.embed, x_dec_onehot)

            initial_state = dense(inputs=initial_state,
                                  units=self.config.hidden_size,
                                  activation=None,
                                  use_bias=True,
                                  trainable=True,
                                  name='initial_layer')

            if is_teacher_forcing:  # for training
                assert (dropout_keep_prob is not None)
                helper = WordDropoutTrainingHelper(
                    inputs=in_dec,
                    sequence_length=len_dec,
                    embedding=self.embed,
                    dropout_keep_prob=dropout_keep_prob,
                    drop_token_id=UNK_ID)
            else:  # for sampling
                SamplingHelper = (GreedyEmbeddingHelper \
                    if is_argmax_sampling else SampleEmbeddingHelper)
                start_tokens = tf.tile([EOS_ID], [self.config.batch_size])

                helper = SamplingHelper(embedding=self.embed,
                                        start_tokens=start_tokens,
                                        end_token=EOS_ID)
            # projection layer
            output_layer = Dense(units=self.config.vocab_num,
                                 activation=None,
                                 use_bias=True,
                                 trainable=True,
                                 name='output_layer')

            # decoder
            decoder = BasicDecoder(cell=self.cell(reuse),
                                   helper=helper,
                                   initial_state=initial_state,
                                   output_layer=output_layer)

            # dynamic_decode
            out_tuple = dynamic_decode(
                decoder=decoder,
                output_time_major=False,  # speed
                impute_finished=True)
            return out_tuple
예제 #10
0
def training_decoding_layer(decoding_embed_input, en_len, decoding_cell,
                            initial_state, op_layer, v_size, max_en_len):
    helper = TrainingHelper(inputs=decoding_embed_input,
                            sequence_length=en_len,
                            time_major=False)
    dec = BasicDecoder(decoding_cell, helper, initial_state, op_layer)
    logits, _, _ = dynamic_decode(dec,
                                  output_time_major=False,
                                  impute_finished=True,
                                  maximum_iterations=max_en_len)
    return logits
예제 #11
0
    def decoder_train(self, decoder_cell, decoder_initial_state, output_layer):
        '''
        创建train的decoder部分
        :param encoder_outputs: encoder的输出
        :param encoder_state: encoder的state
        :return: decoder_logits_train: decoder的predict
        '''
        # tf.strided_slice(data,begin,end,stride):对数据进行跨步切片,起始位置,截止位置,步长,各个维度对应。
        # 这里对真实的输出进行batch_size长的切片操作,-1:后面在每一行最前面加了一个<GO>。
        ending = tf.strided_slice(self.decoder_targets, [0, 0],
                                  [self.batch_size, -1], [1, 1])
        # 每一行最前面加一个<GO>,tf.fill(dim,value),dim:维度,value:值。
        decoder_input = tf.concat(
            [tf.fill([self.batch_size, 1], self.word_to_idx['<GO>']), ending],
            1)
        # 将每一行的句子embeding。
        decoder_inputs_embedded = tf.nn.embedding_lookup(
            self.embedding, decoder_input)

        # TrainingHelper:封装好的训练帮助类。训练时最常用的Helper,下一时刻的输入就是上一时刻的真实值。
        # time_major:是否调换维度,时间步(即max_input_length)是否为第一维。加速训练?
        # False:shape(batch_size,max_input_length,embedding_size),
        # True:shape(max_input_length,batch_size,embedding_size) ,
        training_helper = TrainingHelper(
            inputs=decoder_inputs_embedded,
            sequence_length=self.decoder_targets_length,
            time_major=False,
            name='training_helper')
        # BasicDecoder
        # 参数:
        # cell: 一个 `RNNCell` 实例.
        # helper: 一个 `Helper` 实例.
        # initial_state: 一个 (可能组成一个tulpe)tensors 和 TensorArrays.RNNCell 的初始状态.
        # output_layer: (可选) 一个 `tf.layers.Layer` 实例, 例如:`tf.layers.Dense`. 应用于RNN 输出层之前的可选层,用于存储结果或者采样.
        # Raises:TypeError: 如果 `cell`, `helper` 或 `output_layer` 的类型不正确.
        training_decoder = BasicDecoder(cell=decoder_cell,
                                        helper=training_helper,
                                        initial_state=decoder_initial_state,
                                        output_layer=output_layer)
        # dynamic_decode
        # 参数:
        # decoder: BasicDecoder、BeamSearchDecoder或者自己定义的decoder类对象
        # output_time_major: 见RNN,为真时step*batch_size*...,为假时batch_size*step*...
        # impute_finished: Boolean,为真时会拷贝最后一个时刻的状态并将输出置零,程序运行更稳定,使最终状态和输出具有正确的值,在反向传播时忽略最后一个完成步。但是会降低程序运行速度。
        # maximum_iterations: 最大解码步数,一般训练设置为decoder_inputs_length,预测时设置一个想要的最大序列长度即可。程序会在产生<eos>或者到达最大步数处停止。
        decoder_outputs, _, _ = dynamic_decode(
            decoder=training_decoder,
            impute_finished=True,
            maximum_iterations=self.max_target_sequence_length)
        # TODO:identity作用?
        decoder_logits_train = tf.identity(decoder_outputs.rnn_output)
        return decoder_logits_train
예제 #12
0
    def build_train_decoder(self):
        print('Building train decoder...')

        ending = tf.strided_slice(self.decoder_targets, [0, 0], [self.batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([self.batch_size, 1], self.word_to_id['<GO>']), ending], 1)
        decoder_inputs_embedded = tf.nn.embedding_lookup(self.embedding, decoder_input)

        if self.teacher_forcing:
            training_helper = ScheduledEmbeddingTrainingHelper(
                inputs=decoder_inputs_embedded,
                sequence_length=self.decoder_targets_length,
                embedding=self.embedding,
                sampling_probability=self.teacher_forcing_probability,
                time_major=False,
                name='teacher_forcing_training_helper'
            )
        else:
            training_helper = TrainingHelper(
                inputs=decoder_inputs_embedded,
                sequence_length=self.decoder_targets_length,
                time_major=False,
                name='training_helper'
            )

        training_decoder = BasicDecoder(
            cell=self.decoder_cell,
            helper=training_helper,
            initial_state=self.decoder_initial_state,
            output_layer=self.output_layer
        )

        decoder_outputs, _, _ = dynamic_decode(
            decoder=training_decoder,
            impute_finished=True,
            maximum_iterations=self.max_target_sequence_length
        )

        decoder_logits_train = tf.identity(decoder_outputs.rnn_output)

        # loss
        self.loss = sequence_loss(
            logits=decoder_logits_train,
            targets=self.decoder_targets,
            weights=self.mask
        )

        # summary
        tf.summary.scalar('loss', self.loss)
        self.summary_op = tf.summary.merge_all()
        self.writer = tf.summary.FileWriter('log/train', self.sess.graph)

        self.build_optimizer()
예제 #13
0
    def build_train_decoder(self, decoder_targets, decoder_targets_length, max_target_sequence_length, mask, name):
        ending = tf.strided_slice(decoder_targets, [0, 0], [self.batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([self.batch_size, 1], self.word_to_id['<GO>']), ending], 1)
        decoder_inputs_embedded = tf.nn.embedding_lookup(self.embedding, decoder_input)

        decoder_cell, deocder_initial_state = self.build_decoder_cell()
        output_layer = tf.layers.Dense(
            self.vocab_size,
            kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1)
        )

        if self.teacher_forcing:
            training_helper = ScheduledEmbeddingTrainingHelper(
                inputs=decoder_inputs_embedded,
                sequence_length=decoder_targets_length,
                embedding=self.embedding,
                sampling_probability=self.teacher_forcing_probility,
                time_major=False,
                name='teacher_forcing_training_helper_' + name
            )
        else:
            training_helper = TrainingHelper(
                inputs=decoder_inputs_embedded,
                sequence_length=decoder_targets_length,
                time_major=False,
                name='training_helper_' + name
            )

        training_decoder = BasicDecoder(
            cell=decoder_cell,
            helper=training_helper,
            initial_state=deocder_initial_state,
            output_layer=output_layer
        )

        decoder_outputs, _, _ = dynamic_decode(
            decoder=training_decoder,
            impute_finished=True,
            maximum_iterations=max_target_sequence_length
        )

        decoder_logits_train = tf.identity(decoder_outputs.rnn_output)

        # loss
        loss = sequence_loss(
            logits=decoder_logits_train,
            targets=decoder_targets,
            weights=mask
        )

        return loss
예제 #14
0
def inference_decoding_layer(embeddings, start_token, end_token, decoding_cell,
                             initial_state, op_layer, max_en_len, batch_size):

    start_tokens = tf.tile(tf.constant([start_token], dtype=tf.int32),
                           [batch_size],
                           name='start_tokens')
    inf_helper = GreedyEmbeddingHelper(embeddings, start_tokens, end_token)
    inf_decoder = BasicDecoder(decoding_cell, inf_helper, initial_state,
                               op_layer)
    inf_logits, _, _ = dynamic_decode(inf_decoder,
                                      output_time_major=False,
                                      impute_finished=True,
                                      maximum_iterations=max_en_len)
    return inf_logits
예제 #15
0
    def _init(self, sequence, targets, authors):
        batch_size = tf.shape(sequence)[0]

        sequence_lengths = tf.cast(tf.count_nonzero(sequence, axis=1), tf.int32)
        embedding = tf.Variable(
            tf.random_normal((self._vocab_size, self._embed_size)),
            name='char_embedding'
        )
        context = tf.Variable(
            tf.random_normal((self._author_size, self._ctx_size)),
            name='ctx_embedding'
        )

        embedded_sequence = tf.nn.embedding_lookup(embedding, sequence)
        embedded_authors = tf.nn.embedding_lookup(context, authors)

        gpu = lambda x: '/gpu:{}'.format(x % self._num_gpu)

        if self._training:
            dropout = lambda x: DropoutWrapper(
                x, 1.0-self._input_dropout, 1.0-self._output_dropout)
            helper = TrainingHelper(embedded_sequence, sequence_lengths)
        else:
            dropout = lambda x: x
            helper = SampleEmbeddingHelper(embedding, sequence[:,0], 2)

        base = lambda x: ContextWrapper(self._cell(x), embedded_authors)
        wrap = lambda i, cell: DeviceWrapper(dropout(cell), gpu(i))
        cells = [wrap(i, base(self._cell_size)) for i in range(self._cell_num)]
        cell = MultiRNNCell(cells)

        init_state = cell.zero_state(batch_size, tf.float32)
        dense = tf.layers.Dense(
            self._vocab_size, self._activation, name='fully_connected'
        )
        decoder = BasicDecoder(cell, helper, init_state, dense)
        output, _, _ = dynamic_decode(decoder, swap_memory=True)
        logits = output.rnn_output

        weights = tf.sequence_mask(sequence_lengths, dtype=tf.float32)
        loss = tf.contrib.seq2seq.sequence_loss(
            logits,
            targets,
            weights
        )

        out = output.sample_id

        return targets, loss, out
예제 #16
0
 def decoder(self, encoder_outputs, encoder_states):
     decoder_cell, decoder_init_state = self.add_decoder_cell(
         encoder_outputs, encoder_states, self.hidden_size, self.cell_type,
         self.num_layers)
     output_proj = tf.layers.Dense(
         self.tgt_vcb_size,
         dtype=tf.float32,
         use_bias=False,
         kernel_initializer=tf.truncated_normal_initializer(stddev=0.1),
         name='output_proj')
     if self.mode == 'train':
         target_embedding = tf.nn.embedding_lookup(self.decoder_embeddings,
                                                   self.decoder_input_train)
         training_helper = TrainingHelper(target_embedding,
                                          self.target_len,
                                          name='training_helper')
         training_decoder = BasicDecoder(decoder_cell, training_helper,
                                         decoder_init_state, output_proj)
         max_dec_len = tf.reduce_max(self.target_len)
         output, _, _ = tf.contrib.seq2seq.dynamic_decode(
             training_decoder, maximum_iterations=max_dec_len)
         self.d_masks = tf.sequence_mask(self.target_len,
                                         max_dec_len,
                                         dtype=tf.float32,
                                         name='d_masks')
         self.prob = output.rnn_output
         self.loss = tf.contrib.seq2seq.sequence_loss(
             logits=self.prob,
             targets=self.target,
             weights=self.d_masks,
             average_across_timesteps=True,
             average_across_batch=True)
     else:
         start_token = [DataUnit.START_INDEX] * self.batch_size
         end_token = DataUnit.END_INDEX
         inference_decoder = BeamSearchDecoder(
             cell=decoder_cell,
             embedding=lambda x: tf.nn.embedding_lookup(
                 self.decoder_embeddings, x),
             start_tokens=start_token,
             end_token=end_token,
             initial_state=decoder_init_state,
             beam_width=self.beam_size,
             output_layer=output_proj)
         output, _, _ = tf.contrib.seq2seq.dynamic_decode(
             inference_decoder, maximum_iterations=self.max_decode_step)
         output_pred_ = output.predicted_ids
         self.decoder_output = tf.transpose(output_pred_, perm=[0, 2, 1])
예제 #17
0
    def setup_decoder(self):
        output_embed = tf.nn.embedding_lookup([self.embedding],
                                              self.att_label[:, :-1])
        decoder_lengths = tf.tile([self.max_dec_iteration[0] - 1],
                                  [self.batch_size])
        helper = ScheduledEmbeddingTrainingHelper(output_embed,
                                                  decoder_lengths,
                                                  self.embedding, 0.1)

        output_layer = Dense(units=self.vocab_size)
        self.decoder = BasicDecoder(cell=self.cell,
                                    helper=helper,
                                    initial_state=self.cell.zero_state(
                                        dtype=tf.float32,
                                        batch_size=self.batch_size),
                                    output_layer=output_layer)
예제 #18
0
def attention_alignment(inputs, input_lengths, memory, memory_lengths, n_layers, n_units,
                        dropout_prob, cell_type=GRUCell, attention_mechanism=BahdanauAttention, is_training=True):
    """Performs alignment over inputs, attending over memory

    Args:
        inputs (tensor):              Input sequence, with the shape of [Batch x seq_length x dimension]
        input_lengths (tensor):       The length of input sequences. Used for dynamic unrolling
        memory (tensor):              Sequence to attend
        memory_lengths (tensor):      The length of memory. Used for dynamic unrolling
        n_layers (int):               Number of layers in RNN
        n_units  (int):               Number of units in RNN
        dropout_prob (float):         Drop out rate for RNN cell
        cell_type (method):           Type of RNN cell, GRU by default
        attention_mechanism (method): Type of attention mechanism, Bahdanau by default
        is_training (bool):           Whether the model is training or testing

    returns:
        (tensor, tensor, tensor):
    """
    # get tensor dimensions
    batch_size, seq_length, dim = inputs.get_shape().as_list()
    # create a attention over the memory
    attention = attention_mechanism(n_units, memory, memory_sequence_length=memory_lengths, dtype=tf.float32)
    # build an encoder RNN over the input sequence
    dropout_prob = 0 if not is_training else dropout_prob
    if n_layers > 1:
        attention_cell = MultiRNNCell([DropoutWrapper(cell_type(n_units), output_keep_prob=1-dropout_prob)
                                       for _ in range(n_layers)])
    else:
        attention_cell = cell_type(n_units)
        attention_cell = DropoutWrapper(attention_cell, output_keep_prob=1-dropout_prob)
    # for each input to the next RNN cell, wire the attention mechanism
    a_cell = AttentionWrapper(attention_cell, attention, alignment_history=True)
    # define the initial state
    # TODO: Do we ever feed an init state?
    attention_state = a_cell.zero_state(batch_size, dtype=tf.float32)
    # read input while attending over memory
    helper = TrainingHelper(inputs=inputs, sequence_length=input_lengths)
    decoder = BasicDecoder(a_cell, helper, attention_state)
    # output of the decoder is a new representation of input sentence with attention over the question
    outputs, states, _ = tf.contrib.seq2seq.dynamic_decode(decoder, maximum_iterations=seq_length, impute_finished=True)
    outputs = tf.pad(outputs.rnn_output, [[0, 0], [0, seq_length - tf.reduce_max(input_lengths)], [0, 0]])
    outputs = tf.reshape(outputs, [batch_size, seq_length, dim])
    # attention matrix for visualizing heatmap
    aligned = tf.transpose(states.alignment_history.stack(), [1, 0, 2])
    return outputs, states, aligned
def build_rnn(train_or_test, cell, rnn_train_inputs, start_code_embed,
              batch_size, target_lengths, embedding, encoder_out, name):

    if train_or_test:
        helper = DefaultZeroInputTrainingHelper(rnn_train_inputs,
                                                target_lengths, encoder_out,
                                                start_code_embed)
    else:
        helper = TestEmbeddingConcatHelper(batch_size, embedding, encoder_out,
                                           start_code_embed)

    initial_state = cell.zero_state(batch_size=batch_size, dtype=tf.float32)

    (decoder_outputs, decoder_samples), final_decoder_state, _ = \
            tf.contrib.seq2seq.dynamic_decode(
                    BasicDecoder(cell, helper, initial_state), scope=name)

    return decoder_outputs, decoder_samples
예제 #20
0
    def decoder_decode(self, decoder_cell, decoder_initial_state,
                       output_layer):
        # 每句的开始用<GO>标记
        start_tokens = tf.ones([
            self.batch_size,
        ], tf.int32) * self.word_to_idx['<GO>']
        # 每句的结束用<EOS>标记
        end_token = self.word_to_idx['<EOS>']

        # 如果使用BeamSearch,使用BeamSearchDecoder进行解码.
        if self.beam_search:
            inference_decoder = BeamSearchDecoder(
                cell=decoder_cell,
                embedding=self.embedding,
                start_tokens=start_tokens,
                end_token=end_token,
                initial_state=decoder_initial_state,
                beam_width=self.beam_size,
                output_layer=output_layer)
        else:  # 不使用BeamSearch,使用GreedyEmbeddingHelper帮助类.
            decoding_helper = GreedyEmbeddingHelper(embedding=self.embedding,
                                                    start_tokens=start_tokens,
                                                    end_token=end_token)
            # 用BasicDecoder进行解码.
            inference_decoder = BasicDecoder(
                cell=decoder_cell,
                helper=decoding_helper,
                initial_state=decoder_initial_state,
                output_layer=output_layer)

        # dynamic_decode
        # 参数:
        # decoder: BasicDecoder、BeamSearchDecoder或者自己定义的decoder类对象
        # output_time_major: 见RNN,为真时step*batch_size*...,为假时batch_size*step*...
        # impute_finished: Boolean,为真时会拷贝最后一个时刻的状态并将输出置零,程序运行更稳定,使最终状态和输出具有正确的值,在反向传播时忽略最后一个完成步。但是会降低程序运行速度。
        # maximum_iterations: 最大解码步数,一般训练设置为decoder_inputs_length,预测时设置一个想要的最大序列长度即可。程序会在产生<eos>或者到达最大步数处停止。
        decoder_outputs, _, _ = dynamic_decode(decoder=inference_decoder,
                                               maximum_iterations=50)
        if self.beam_search:  # 如果使用BeamSearch,输出为预测的predicted_ids
            decoder_predict_decode = decoder_outputs.predicted_ids
        else:  # 扩充一个维度,即在最后添加一列 TODO:干什么?
            decoder_predict_decode = tf.expand_dims(decoder_outputs.sample_id,
                                                    -1)
        return decoder_predict_decode
예제 #21
0
    def build_predict_decoder(self):
        start_tokens = tf.ones([
            self.batch_size,
        ], tf.int32) * self.word_to_id['<GO>']
        end_token = self.word_to_id['<EOS>']

        decoder_cell, deocder_initial_state = self.build_decoder_cell()
        output_layer = tf.layers.Dense(
            self.vocab_size,
            kernel_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                               stddev=0.1))

        if self.beam_search:
            inference_decoder = BeamSearchDecoder(
                cell=decoder_cell,
                embedding=self.embedding,
                start_tokens=start_tokens,
                end_token=end_token,
                initial_state=deocder_initial_state,
                beam_width=self.beam_size,
                output_layer=output_layer)

        else:
            decoding_helper = GreedyEmbeddingHelper(embedding=self.embedding,
                                                    start_tokens=start_tokens,
                                                    end_token=end_token)
            inference_decoder = BasicDecoder(
                cell=decoder_cell,
                helper=decoding_helper,
                initial_state=deocder_initial_state,
                output_layer=output_layer)

        decoder_outputs, _, _ = dynamic_decode(decoder=inference_decoder,
                                               maximum_iterations=50)

        if self.beam_search:
            decoder_predict_decode = decoder_outputs.predicted_ids
        else:
            decoder_predict_decode = tf.expand_dims(decoder_outputs.sample_id,
                                                    -1)

        return decoder_predict_decode
예제 #22
0
    def decode(self, cell_dec, enc_final_state, output_size, output_embed_matrix, training, grammar_helper=None):
        linear_layer = tf_core_layers.Dense(output_size, use_bias=False)

        go_vector = tf.ones((self.batch_size,), dtype=tf.int32) * self.config.grammar.start
        if training:
            output_ids_with_go = tf.concat([tf.expand_dims(go_vector, axis=1), self.output_placeholder], axis=1)
            outputs = tf.nn.embedding_lookup([output_embed_matrix], output_ids_with_go)
            helper = TrainingHelper(outputs, self.output_length_placeholder+1)
        else:
            helper = GreedyEmbeddingHelper(output_embed_matrix, go_vector, self.config.grammar.end)
        
        if self.config.use_grammar_constraints:
            decoder = GrammarBasicDecoder(self.config.grammar, cell_dec, helper, enc_final_state, output_layer = linear_layer, training_output = self.output_placeholder if training else None,
                                          grammar_helper=grammar_helper)
        else:
            decoder = BasicDecoder(cell_dec, helper, enc_final_state, output_layer = linear_layer)

        final_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, impute_finished=True, maximum_iterations=self.max_length)
        
        return final_outputs
def training_decoding_layer(decoding_embed_input, en_len, decoding_cell,
                            encoding_op, encoding_st, op_layer, v_size, fr_len,
                            max_en_len):
    with variable_scope.variable_scope(
            "decoder", initializer=init_ops.constant_initializer(0.1)) as vs:

        print("args:", args)
        if (args.attention_architecture is not None):
            decoding_cell, encoding_st = create_attention(
                decoding_cell, encoding_op, encoding_st, fr_len)

        helper = TrainingHelper(inputs=decoding_embed_input,
                                sequence_length=en_len,
                                time_major=False)
        dec = BasicDecoder(decoding_cell, helper, encoding_st, op_layer)
        logits, _, _ = dynamic_decode(dec,
                                      output_time_major=False,
                                      impute_finished=True,
                                      maximum_iterations=max_en_len)
        return logits
예제 #24
0
    def decoder_ops(self, decoder_emb_inp, encoder_outputs, encoder_state,
                    hparams):
        """

        :param decoder_emb_inp:
        :return:
        """
        decoder_cell = self._build_cell(self.cell_type, self.num_units,
                                        self.num_layers)
        helper = TrainingHelper(decoder_emb_inp,
                                self.target_seq_length,
                                time_major=True)
        decoder = BasicDecoder(decoder_cell,
                               helper,
                               encoder_state,
                               output_layer=project_layer)

        # 动态 decoding
        outputs, _ = dynamic_decode((decoder))
        logits = outputs.rnn_output
        core.Dense()
예제 #25
0
    def build_predict_decoder(self):
        print('Building predict decoder...')

        start_tokens = tf.ones([self.batch_size, ], tf.int32) * self.word_to_id['<GO>']
        end_token = self.word_to_id['<EOS>']

        if self.beam_search:
            inference_decoder = BeamSearchDecoder(
                cell=self.decoder_cell,
                embedding=self.embedding,
                start_tokens=start_tokens,
                end_token=end_token,
                initial_state=self.decoder_initial_state,
                beam_width=self.beam_size,
                output_layer=self.output_layer
            )

        else:
            decoding_helper = GreedyEmbeddingHelper(
                embedding=self.embedding,
                start_tokens=start_tokens,
                end_token=end_token
            )
            ##Uses the argmax of the output (treated as logits) and passes the result through an embedding layer to get the next input.
            ##embedding: A callable that takes a vector tensor of ids (argmax ids), or the params argument for embedding_lookup. The returned tensor will be passed to the decoder input.
            ##start_tokens: int32 vector shaped [batch_size], the start tokens.
            ##end_token: int32 scalar, the token that marks end of decoding.
            inference_decoder = BasicDecoder(
                cell=self.decoder_cell,
                helper=decoding_helper,
                initial_state=self.decoder_initial_state,
                output_layer=self.output_layer
            )

        decoder_outputs, _, _ = dynamic_decode(decoder=inference_decoder, maximum_iterations=50)
        ##predicted_ids: Final outputs returned by the beam search after all decoding is finished. A tensor of shape [batch_size, num_steps, beam_width] (or [num_steps, batch_size, beam_width] if output_time_major is True). Beams are ordered from best to worst.
        if self.beam_search:
            self.decoder_predict_decode = decoder_outputs.predicted_ids
        else:
            self.decoder_predict_decode = tf.expand_dims(decoder_outputs.sample_id, -1)
예제 #26
0
def pointer_net(inputs, input_lengths, n_pointers, word_matrix, cell_type, n_layers, n_units,
                dropout_prob, is_training=True):
    """Pointer network.

    Args:
        inputs (tensor):        Inputs to pointer network (typically output of previous RNN)
        input_lengths (tensor): Actual non-padded lengths of each input sequence
        n_pointers (int):       Number of pointers to generate
        word_matrix (tensor):   Embedding matrix of word vectors
        cell_type (method):     Cell type to use
        n_layers (int):         Number of layers in RNN (same for encoder & decoder)
        n_units (int):          Number of units in RNN cell (same for encoder & decoder)
        dropout_prob (float):   Dropout probability
        is_training (bool):     Whether the model is training or testing
    """
    batch_size, seq_length, _ = inputs.get_shape().as_list()
    vocab_size = word_matrix.get_shape().as_list()[0]

    # instantiate RNN cell; only use dropout during training
    def _rnn_cell():
        keep_prob = 1 - dropout_prob if is_training else 1
        return DropoutWrapper(cell_type(n_units), output_keep_prob=keep_prob)

    enc_cell = MultiRNNCell([_rnn_cell() for _ in range(n_layers)]) if n_layers > 1 else _rnn_cell()
    encoded, _ = tf.nn.dynamic_rnn(enc_cell, inputs, input_lengths, dtype=tf.float32)

    attention = BahdanauAttention(n_units, encoded, memory_sequence_length=input_lengths)
    # TODO: find permanent solution (InferenceHelper?)
    start_tokens = tf.constant(START_TOKEN, shape=[batch_size], dtype=tf.int32)
    helper = GreedyEmbeddingHelper(word_matrix, start_tokens, END_TOKEN)

    dec_cell = MultiRNNCell([_rnn_cell() for _ in range(n_layers)]) if n_layers > 1 else _rnn_cell()
    attn_cell = AttentionWrapper(dec_cell, attention, alignment_history=True)
    out_cell = tf.contrib.rnn.OutputProjectionWrapper(attn_cell, vocab_size)
    decoder = BasicDecoder(out_cell, helper, attn_cell.zero_state(batch_size, tf.float32))
    _, states, _ = dynamic_decode(decoder, maximum_iterations=n_pointers, impute_finished=True)
    probs = tf.reshape(states.alignment_history.stack(), [n_pointers, batch_size, seq_length])
    return probs
예제 #27
0
 def build_train_decoder(self):
     with tf.name_scope('train_decoder'):
         training_helper = TrainingHelper(
             inputs=self.inputs_dense,
             sequence_length=self.inputs_length,
             time_major=False,
             name='training_helper')
         with tf.name_scope('basic_decoder'):
             training_decoder = BasicDecoder(
                 cell=self.cell,
                 helper=training_helper,
                 initial_state=self.initial_state,
                 output_layer=self.output_layer)
         with tf.name_scope('dynamic_decode'):
             (outputs, self.last_state,
              self.outputs_length) = (seq2seq.dynamic_decode(
                  decoder=training_decoder,
                  output_time_major=False,
                  impute_finished=True,
                  maximum_iterations=self.inputs_max_length))
             self.logits = tf.identity(outputs.rnn_output)
             self.log_probs = tf.nn.log_softmax(self.logits)
             self.gs_hypotheses = tf.argmax(self.log_probs, -1)
def training_decode(enc_outputs, seq_len, helper, out_dim):
    dec_prenet_outputs = DecoderPrenetWrapper(GRUCell(hp.embed_size),
                                              is_training=True,
                                              prenet_sizes=hp.embed_size,
                                              dropout_prob=hp.dropout)
    attention_mechanism = BahdanauAttention(hp.embed_size,
                                            enc_outputs,
                                            normalize=True,
                                            memory_sequence_length=seq_len,
                                            probability_fn=tf.nn.softmax)
    attn_cell = AttentionWrapper(dec_prenet_outputs,
                                 attention_mechanism,
                                 alignment_history=True,
                                 output_attention=False)
    concat_cell = ConcatOutputAndAttentionWrapper(attn_cell)
    decoder_cell = MultiRNNCell([
        OutputProjectionWrapper(concat_cell, hp.embed_size),
        ResidualWrapper(GRUCell(hp.embed_size)),
        ResidualWrapper(GRUCell(hp.embed_size))
    ],
                                state_is_tuple=True)

    output_cell = OutputProjectionWrapper(decoder_cell, out_dim)
    initial_state = output_cell.zero_state(batch_size=tf.shape(enc_outputs)[0],
                                           dtype=tf.float32)

    decoder = BasicDecoder(cell=output_cell,
                           helper=helper,
                           initial_state=initial_state)

    (outputs, _), last_state, _ = tf.contrib.seq2seq.dynamic_decode(
        decoder=decoder, maximum_iterations=hp.max_len)
    # for attention plot
    alignments = tf.transpose(last_state[0].alignment_history.stack(),
                              [1, 2, 0])
    return outputs, alignments
예제 #29
0
        def decode(helper, scope, reuse=None):
            with tf.variable_scope(scope, reuse=reuse):
                rnn_layers = []
                for i in range(n_decoder_layers):
                    # Create GRUCell with dropout. Do not forget to set the reuse flag properly.
                    cell = tf.nn.rnn_cell.GRUCell(hidden_size, reuse=reuse)
                    cell = tf.nn.rnn_cell.DropoutWrapper(
                        cell, input_keep_prob=self.dropout_ph)
                    rnn_layers.append(cell)

                decoder_cell = MultiRNNCell(rnn_layers)

                # Create a projection wrapper
                decoder_cell = OutputProjectionWrapper(decoder_cell,
                                                       vocab_size,
                                                       reuse=reuse)

                # Create BasicDecoder, pass the defined cell, a helper, and initial state
                # The initial state should be equal to the final state of the encoder!
                initial_state = decoder_cell.zero_state(batch_size=batch_size,
                                                        dtype=tf.float32)
                decoder = BasicDecoder(decoder_cell,
                                       helper,
                                       initial_state=initial_state)

                # The first returning argument of dynamic_decode contains two fields:
                #   * rnn_output (predicted logits)
                #   * sample_id (predictions)
                max_iters = tf.reduce_max(self.ground_truth_lengths)
                # max_iters = max_iter
                outputs, _, _ = dynamic_decode(decoder=decoder,
                                               maximum_iterations=max_iters,
                                               output_time_major=False,
                                               impute_finished=True)

                return outputs
예제 #30
0
    def initialize(self,
                   inputs,
                   input_lengths,
                   mel_targets=None,
                   linear_targets=None):
        '''Initializes the model for inference.

    Sets "mel_outputs", "linear_outputs", and "alignments" fields.

    Args:
      inputs: int32 Tensor with shape [N, T_in] where N is batch size, T_in is number of
        steps in the input time series, and values are character IDs
      input_lengths: int32 Tensor with shape [N] where N is batch size and values are the lengths
        of each sequence in inputs.
      mel_targets: float32 Tensor with shape [N, T_out, M] where N is batch size, T_out is number
        of steps in the output time series, M is num_mels, and values are entries in the mel
        spectrogram. Only needed for training.
      linear_targets: float32 Tensor with shape [N, T_out, F] where N is batch_size, T_out is number
        of steps in the output time series, F is num_freq, and values are entries in the linear
        spectrogram. Only needed for training.
    '''
        with tf.variable_scope('inference') as scope:
            is_training = linear_targets is not None
            batch_size = tf.shape(inputs)[0]
            hp = self._hparams

            # Embeddings

            symbols_length = 149  # BASED ON PREVIOUS LENGTH OF LIST

            embedding_table = tf.get_variable(
                'embedding', [symbols_length, hp.embed_depth],
                dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=0.5))
            embedded_inputs = tf.nn.embedding_lookup(
                embedding_table, inputs)  # [N, T_in, embed_depth=256]

            # Encoder
            prenet_outputs = prenet(
                embedded_inputs, is_training,
                hp.prenet_depths)  # [N, T_in, prenet_depths[-1]=128]
            encoder_outputs = encoder_cbhg(
                prenet_outputs,
                input_lengths,
                is_training,  # [N, T_in, encoder_depth=256]
                hp.encoder_depth)

            # Attention
            attention_cell = AttentionWrapper(
                GRUCell(hp.attention_depth),
                BahdanauAttention(hp.attention_depth, encoder_outputs),
                alignment_history=True,
                output_attention=False)  # [N, T_in, attention_depth=256]

            # Apply prenet before concatenation in AttentionWrapper.
            attention_cell = DecoderPrenetWrapper(attention_cell, is_training,
                                                  hp.prenet_depths)

            # Concatenate attention context vector and RNN cell output into a 2*attention_depth=512D vector.
            concat_cell = ConcatOutputAndAttentionWrapper(
                attention_cell)  # [N, T_in, 2*attention_depth=512]

            # Decoder (layers specified bottom to top):
            decoder_cell = MultiRNNCell(
                [
                    OutputProjectionWrapper(concat_cell, hp.decoder_depth),
                    ResidualWrapper(GRUCell(hp.decoder_depth)),
                    ResidualWrapper(GRUCell(hp.decoder_depth))
                ],
                state_is_tuple=True)  # [N, T_in, decoder_depth=256]

            # Project onto r mel spectrograms (predict r outputs at each RNN step):
            output_cell = OutputProjectionWrapper(
                decoder_cell, hp.num_mels * hp.outputs_per_step)
            decoder_init_state = output_cell.zero_state(batch_size=batch_size,
                                                        dtype=tf.float32)

            helper = TacoTestHelper(batch_size, hp.num_mels,
                                    hp.outputs_per_step)

            (decoder_outputs,
             _), final_decoder_state, _ = tf.contrib.seq2seq.dynamic_decode(
                 BasicDecoder(output_cell, helper, decoder_init_state),
                 maximum_iterations=hp.max_iters)  # [N, T_out/r, M*r]

            # Reshape outputs to be one output per entry
            mel_outputs = tf.reshape(
                decoder_outputs,
                [batch_size, -1, hp.num_mels])  # [N, T_out, M]

            # Add post-processing CBHG:
            post_outputs = post_cbhg(
                mel_outputs,
                hp.num_mels,
                is_training,  # [N, T_out, postnet_depth=256]
                hp.postnet_depth)
            linear_outputs = tf.layers.dense(post_outputs,
                                             hp.num_freq)  # [N, T_out, F]

            # Grab alignments from the final decoder state:
            alignments = tf.transpose(
                final_decoder_state[0].alignment_history.stack(), [1, 2, 0])

            self.inputs = inputs
            self.input_lengths = input_lengths
            self.mel_outputs = mel_outputs
            self.linear_outputs = linear_outputs
            self.alignments = alignments
            self.mel_targets = mel_targets
            self.linear_targets = linear_targets