def _build_train(self, config): # decode if config.model_name == "fasttext_flat": self.logits = tf.contrib.layers.fully_connected( self.first_attention, config.fn_classes, activation_fn=None) print("logits:", self.logits.get_shape()) self.logits = tf.reshape(self.logits, [-1, config.fn_classes]) elif config.model_name == "RCNN_flat": self.logits = tf.contrib.layers.fully_connected(self.xx_final, config.fn_classes, activation_fn=None) print("logits:", self.logits.get_shape()) self.logits = tf.reshape(self.logits, [-1, config.fn_classes]) else: encoder_state = rnn.LSTMStateTuple(self.xx_final, self.xx_final) if config.use_att: attention_mechanism = BahdanauAttention( config.decode_size, memory=self.xx_context, memory_sequence_length=self.x_seq_length) cell = AttentionWrapper(self.lstm, attention_mechanism, output_attention=False) cell_state = cell.zero_state(dtype=tf.float32, batch_size=config.batch_size) cell_state = cell_state.clone(cell_state=encoder_state, attention=self.first_attention) train_helper = TrainingHelper(self.yy, self.y_seq_length) train_decoder = BasicDecoder(cell, train_helper, cell_state, output_layer=self.output_l) self.decoder_outputs_train, decoder_state_train, decoder_seq_train = dynamic_decode( train_decoder, impute_finished=True) self.logits = self.decoder_outputs_train.rnn_output # self.logits = tf.reshape(self.logits, [-1, config.max_seq_length, config.hn_classes]) print("logits:", self.logits.get_shape()) else: cell = self.lstm train_helper = TrainingHelper(self.yy, self.y_seq_length) train_decoder = BasicDecoder(cell, train_helper, encoder_state, output_layer=self.output_l) self.decoder_outputs_train, decoder_state_train, decoder_seq_train = dynamic_decode( train_decoder, impute_finished=True) self.logits = self.decoder_outputs_train.rnn_output # self.logits = tf.reshape(self.logits, [-1, config.max_seq_length, config.hn_classes]) print("logits:", self.logits.get_shape())
def call(self, source, is_training=None, is_validation=None, teacher_forcing=False, memory_sequence_length=None, target=None): assert is_training is not None prenets = tuple([PreNet(out_unit, is_training, self._drop_rate) for out_unit in self._prenet_out_units]) batch_size = tf.shape(source)[0] attention_cell = AttentionRNNV1(self.attention_out_units, prenets, source, memory_sequence_length) decoder_cell = DecoderRNNV1(self.decoder_out_units, attention_cell) output_and_done_cell = OutputAndStopTokenWrapper(decoder_cell, self.num_mels * self.outputs_per_step) decoder_initial_state = output_and_done_cell.zero_state(batch_size, dtype=tf.float32) helper = TrainingHelper(target, self.num_mels, self.outputs_per_step, n_feed_frame=self.n_feed_frame) if is_training \ else ValidationHelper(target, batch_size, self.num_mels, self.outputs_per_step, n_feed_frame=self.n_feed_frame, teacher_forcing=teacher_forcing) if is_validation \ else StopTokenBasedInferenceHelper(batch_size, self.num_mels, self.outputs_per_step, n_feed_frame=self.n_feed_frame) ((decoder_outputs, stop_token), _), final_decoder_state, _ = tf.contrib.seq2seq.dynamic_decode( BasicDecoder(output_and_done_cell, helper, decoder_initial_state), maximum_iterations=self.max_iters) mel_output = tf.reshape(decoder_outputs, [batch_size, -1, self.num_mels]) return mel_output, stop_token, final_decoder_state
def update(self, hparams): with tf.variable_scope('inference') as scope: self._hparams = hparams hp = self._hparams (decoder_outputs, _), final_decoder_state, _ = tf.contrib.seq2seq.dynamic_decode( BasicDecoder(self.output_cell, self.helper, self.decoder_init_state), maximum_iterations=hp.max_iters) # [N, T_out/r, M*r] # Reshape outputs to be one output per entry mel_outputs = tf.reshape( decoder_outputs, [self.batch_size, -1, hp.num_mels]) # [N, T_out, M] # Add post-processing CBHG: post_outputs = post_cbhg(mel_outputs, hp.num_mels, is_training=False, is_updating=True) # [N, T_out, 256] linear_outputs = tf.layers.dense(post_outputs, hp.num_freq, reuse=True) # [N, T_out, F] # Grab alignments from the final decoder state: alignments = tf.transpose( final_decoder_state[0].alignment_history.stack(), [1, 2, 0]) self.mel_outputs = mel_outputs self.linear_outputs = linear_outputs self.alignments = alignments log('Updated Tacotron model.')
def decoder_train(self, decoder_cell, decoder_initial_state, output_layer): ''' 创建train的decoder部分 :param encoder_outputs: encoder的输出 :param encoder_state: encoder的state :return: decoder_logits_train: decoder的predict ''' ending = tf.strided_slice(self.decoder_targets, [0, 0], [self.batch_size, -1], [1, 1]) decoder_input = tf.concat( [tf.fill([self.batch_size, 1], self.word_to_idx['<GO>']), ending], 1) decoder_inputs_embedded = tf.nn.embedding_lookup( self.embedding, decoder_input) training_helper = TrainingHelper( inputs=decoder_inputs_embedded, sequence_length=self.decoder_targets_length, time_major=False, name='training_helper') training_decoder = BasicDecoder(cell=decoder_cell, helper=training_helper, initial_state=decoder_initial_state, output_layer=output_layer) decoder_outputs, _, _ = dynamic_decode( decoder=training_decoder, impute_finished=True, maximum_iterations=self.max_target_sequence_length) decoder_logits_train = tf.identity(decoder_outputs.rnn_output) return decoder_logits_train
def decoder_decode(self, decoder_cell, decoder_initial_state, output_layer): start_tokens = tf.ones([ self.batch_size, ], tf.int32) * self.word_to_idx['<GO>'] end_token = self.word_to_idx['<EOS>'] if self.beam_search: inference_decoder = BeamSearchDecoder( cell=decoder_cell, embedding=self.embedding, start_tokens=start_tokens, end_token=end_token, initial_state=decoder_initial_state, beam_width=self.beam_size, output_layer=output_layer) else: decoding_helper = GreedyEmbeddingHelper(embedding=self.embedding, start_tokens=start_tokens, end_token=end_token) inference_decoder = BasicDecoder( cell=decoder_cell, helper=decoding_helper, initial_state=decoder_initial_state, output_layer=output_layer) decoder_outputs, _, _ = dynamic_decode(decoder=inference_decoder, maximum_iterations=50) if self.beam_search: decoder_predict_decode = decoder_outputs.predicted_ids else: decoder_predict_decode = tf.expand_dims(decoder_outputs.sample_id, -1) return decoder_predict_decode
def build_decoder(self, encoder_outputs, encoder_final_state): """ 构建完整解码器 :return: """ with tf.variable_scope("decode"): decoder_cell, decoder_initial_state = self.build_decoder_cell( encoder_outputs, encoder_final_state, self.hidden_size, self.cell_type, self.layer_size) # 输出层投影 decoder_output_projection = layers.Dense( self.decoder_vocab_size, dtype=tf.float32, use_bias=False, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), name='decoder_output_projection') if self.mode == 'train': # 训练模式 decoder_inputs_embdedded = tf.nn.embedding_lookup( self.decoder_embeddings, self.decoder_inputs_train) training_helper = TrainingHelper( inputs=decoder_inputs_embdedded, sequence_length=self.decoder_inputs_length, name='training_helper') training_decoder = BasicDecoder(decoder_cell, training_helper, decoder_initial_state, decoder_output_projection) max_decoder_length = tf.reduce_max(self.decoder_inputs_length) training_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode( training_decoder, maximum_iterations=max_decoder_length) self.masks = tf.sequence_mask(self.decoder_inputs_length, maxlen=max_decoder_length, dtype=tf.float32, name='masks') self.loss = tf.contrib.seq2seq.sequence_loss( logits=training_decoder_output.rnn_output, targets=self.decoder_inputs, weights=self.masks, average_across_timesteps=True, average_across_batch=True) else: # 预测模式 start_token = [DataUnit.START_INDEX] * self.batch_size end_token = DataUnit.END_INDEX inference_decoder = BeamSearchDecoder( cell=decoder_cell, embedding=lambda x: tf.nn.embedding_lookup( self.decoder_embeddings, x), start_tokens=start_token, end_token=end_token, initial_state=decoder_initial_state, beam_width=self.beam_width, output_layer=decoder_output_projection) inference_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode( inference_decoder, maximum_iterations=self.max_decode_step) self.decoder_pred_decode = inference_decoder_output.predicted_ids self.decoder_pred_decode = tf.transpose( self.decoder_pred_decode, perm=[0, 2, 1])
def decoder(x, decoder_inputs, keep_prob, sequence_length, memory, memory_length, first_attention): with tf.variable_scope("Decoder") as scope: label_embeddings = tf.get_variable(name="embeddings", shape=[n_classes, embedding_size], dtype=tf.float32) train_inputs_embedded = tf.nn.embedding_lookup(label_embeddings, decoder_inputs) lstm = rnn.LayerNormBasicLSTMCell(n_hidden, dropout_keep_prob=keep_prob) output_l = layers_core.Dense(n_classes, use_bias=True) encoder_state = rnn.LSTMStateTuple(x, x) attention_mechanism = BahdanauAttention( embedding_size, memory=memory, memory_sequence_length=memory_length) cell = AttentionWrapper(lstm, attention_mechanism, output_attention=False) cell_state = cell.zero_state(dtype=tf.float32, batch_size=train_batch_size) cell_state = cell_state.clone(cell_state=encoder_state, attention=first_attention) train_helper = TrainingHelper(train_inputs_embedded, sequence_length) train_decoder = BasicDecoder(cell, train_helper, cell_state, output_layer=output_l) decoder_outputs_train, decoder_state_train, decoder_seq_train = dynamic_decode( train_decoder, impute_finished=True) tiled_inputs = tile_batch(memory, multiplier=beam_width) tiled_sequence_length = tile_batch(memory_length, multiplier=beam_width) tiled_first_attention = tile_batch(first_attention, multiplier=beam_width) attention_mechanism = BahdanauAttention( embedding_size, memory=tiled_inputs, memory_sequence_length=tiled_sequence_length) x2 = tile_batch(x, beam_width) encoder_state2 = rnn.LSTMStateTuple(x2, x2) cell = AttentionWrapper(lstm, attention_mechanism, output_attention=False) cell_state = cell.zero_state(dtype=tf.float32, batch_size=test_batch_size * beam_width) cell_state = cell_state.clone(cell_state=encoder_state2, attention=tiled_first_attention) infer_decoder = BeamSearchDecoder(cell, embedding=label_embeddings, start_tokens=[GO] * test_len, end_token=EOS, initial_state=cell_state, beam_width=beam_width, output_layer=output_l) decoder_outputs_infer, decoder_state_infer, decoder_seq_infer = dynamic_decode( infer_decoder, maximum_iterations=4) return decoder_outputs_train, decoder_outputs_infer, decoder_state_infer
def build_train_decoder(self): print('Building train decoder...') ending = tf.strided_slice(self.decoder_targets, [0, 0], [self.batch_size, -1], [1, 1]) decoder_input = tf.concat([tf.fill([self.batch_size, 1], self.word_to_id['<GO>']), ending], 1) decoder_inputs_embedded = tf.nn.embedding_lookup(self.embedding, decoder_input) if self.teacher_forcing: training_helper = ScheduledEmbeddingTrainingHelper( inputs=decoder_inputs_embedded, sequence_length=self.decoder_targets_length, embedding=self.embedding, sampling_probability=self.teacher_forcing_probability, time_major=False, name='teacher_forcing_training_helper' ) else: training_helper = TrainingHelper( inputs=decoder_inputs_embedded, sequence_length=self.decoder_targets_length, time_major=False, name='training_helper' ) training_decoder = BasicDecoder( cell=self.decoder_cell, helper=training_helper, initial_state=self.decoder_initial_state, output_layer=self.output_layer ) decoder_outputs, _, _ = dynamic_decode( decoder=training_decoder, impute_finished=True, maximum_iterations=self.max_target_sequence_length ) self.decoder_logits_train = tf.identity(decoder_outputs.rnn_output) # loss #This is the weighted cross-entropy loss for a sequence of logits. #Param: #logits: [batch_size, sequence_length, num_decoder_symbols]. # The logits is the prediction across all classes at each timestep. #targets: [batch_size, sequence_length], representing true class at each time step #weights: [batch_size, sequence_length], This is the weighting of each prediction in the sequence. self.loss = sequence_loss( logits=self.decoder_logits_train, targets=self.decoder_targets, weights=self.mask ) # summary tf.summary.scalar('loss', self.loss) #Outputs a Summary protocol buffer containing a single scalar value. self.summary_op = tf.summary.merge_all() #Merges all summaries collected in the default graph. self.build_optimizer()
def decoder(self, initial_state, x_dec_onehot, len_dec, is_teacher_forcing=False, reuse=False): # decoder with tf.variable_scope("decoder", reuse=reuse): dropout_keep_prob = self.config.word_dropout_keep_prob is_argmax_sampling = self.config.is_argmax_sampling in_dec = self._soft_embedding_lookup(self.embed, x_dec_onehot) initial_state = dense(inputs=initial_state, units=self.config.hidden_size, activation=None, use_bias=True, trainable=True, name='initial_layer') if is_teacher_forcing: # for training assert (dropout_keep_prob is not None) helper = WordDropoutTrainingHelper( inputs=in_dec, sequence_length=len_dec, embedding=self.embed, dropout_keep_prob=dropout_keep_prob, drop_token_id=UNK_ID) else: # for sampling SamplingHelper = (GreedyEmbeddingHelper \ if is_argmax_sampling else SampleEmbeddingHelper) start_tokens = tf.tile([EOS_ID], [self.config.batch_size]) helper = SamplingHelper(embedding=self.embed, start_tokens=start_tokens, end_token=EOS_ID) # projection layer output_layer = Dense(units=self.config.vocab_num, activation=None, use_bias=True, trainable=True, name='output_layer') # decoder decoder = BasicDecoder(cell=self.cell(reuse), helper=helper, initial_state=initial_state, output_layer=output_layer) # dynamic_decode out_tuple = dynamic_decode( decoder=decoder, output_time_major=False, # speed impute_finished=True) return out_tuple
def training_decoding_layer(decoding_embed_input, en_len, decoding_cell, initial_state, op_layer, v_size, max_en_len): helper = TrainingHelper(inputs=decoding_embed_input, sequence_length=en_len, time_major=False) dec = BasicDecoder(decoding_cell, helper, initial_state, op_layer) logits, _, _ = dynamic_decode(dec, output_time_major=False, impute_finished=True, maximum_iterations=max_en_len) return logits
def decoder_train(self, decoder_cell, decoder_initial_state, output_layer): ''' 创建train的decoder部分 :param encoder_outputs: encoder的输出 :param encoder_state: encoder的state :return: decoder_logits_train: decoder的predict ''' # tf.strided_slice(data,begin,end,stride):对数据进行跨步切片,起始位置,截止位置,步长,各个维度对应。 # 这里对真实的输出进行batch_size长的切片操作,-1:后面在每一行最前面加了一个<GO>。 ending = tf.strided_slice(self.decoder_targets, [0, 0], [self.batch_size, -1], [1, 1]) # 每一行最前面加一个<GO>,tf.fill(dim,value),dim:维度,value:值。 decoder_input = tf.concat( [tf.fill([self.batch_size, 1], self.word_to_idx['<GO>']), ending], 1) # 将每一行的句子embeding。 decoder_inputs_embedded = tf.nn.embedding_lookup( self.embedding, decoder_input) # TrainingHelper:封装好的训练帮助类。训练时最常用的Helper,下一时刻的输入就是上一时刻的真实值。 # time_major:是否调换维度,时间步(即max_input_length)是否为第一维。加速训练? # False:shape(batch_size,max_input_length,embedding_size), # True:shape(max_input_length,batch_size,embedding_size) , training_helper = TrainingHelper( inputs=decoder_inputs_embedded, sequence_length=self.decoder_targets_length, time_major=False, name='training_helper') # BasicDecoder # 参数: # cell: 一个 `RNNCell` 实例. # helper: 一个 `Helper` 实例. # initial_state: 一个 (可能组成一个tulpe)tensors 和 TensorArrays.RNNCell 的初始状态. # output_layer: (可选) 一个 `tf.layers.Layer` 实例, 例如:`tf.layers.Dense`. 应用于RNN 输出层之前的可选层,用于存储结果或者采样. # Raises:TypeError: 如果 `cell`, `helper` 或 `output_layer` 的类型不正确. training_decoder = BasicDecoder(cell=decoder_cell, helper=training_helper, initial_state=decoder_initial_state, output_layer=output_layer) # dynamic_decode # 参数: # decoder: BasicDecoder、BeamSearchDecoder或者自己定义的decoder类对象 # output_time_major: 见RNN,为真时step*batch_size*...,为假时batch_size*step*... # impute_finished: Boolean,为真时会拷贝最后一个时刻的状态并将输出置零,程序运行更稳定,使最终状态和输出具有正确的值,在反向传播时忽略最后一个完成步。但是会降低程序运行速度。 # maximum_iterations: 最大解码步数,一般训练设置为decoder_inputs_length,预测时设置一个想要的最大序列长度即可。程序会在产生<eos>或者到达最大步数处停止。 decoder_outputs, _, _ = dynamic_decode( decoder=training_decoder, impute_finished=True, maximum_iterations=self.max_target_sequence_length) # TODO:identity作用? decoder_logits_train = tf.identity(decoder_outputs.rnn_output) return decoder_logits_train
def build_train_decoder(self): print('Building train decoder...') ending = tf.strided_slice(self.decoder_targets, [0, 0], [self.batch_size, -1], [1, 1]) decoder_input = tf.concat([tf.fill([self.batch_size, 1], self.word_to_id['<GO>']), ending], 1) decoder_inputs_embedded = tf.nn.embedding_lookup(self.embedding, decoder_input) if self.teacher_forcing: training_helper = ScheduledEmbeddingTrainingHelper( inputs=decoder_inputs_embedded, sequence_length=self.decoder_targets_length, embedding=self.embedding, sampling_probability=self.teacher_forcing_probability, time_major=False, name='teacher_forcing_training_helper' ) else: training_helper = TrainingHelper( inputs=decoder_inputs_embedded, sequence_length=self.decoder_targets_length, time_major=False, name='training_helper' ) training_decoder = BasicDecoder( cell=self.decoder_cell, helper=training_helper, initial_state=self.decoder_initial_state, output_layer=self.output_layer ) decoder_outputs, _, _ = dynamic_decode( decoder=training_decoder, impute_finished=True, maximum_iterations=self.max_target_sequence_length ) decoder_logits_train = tf.identity(decoder_outputs.rnn_output) # loss self.loss = sequence_loss( logits=decoder_logits_train, targets=self.decoder_targets, weights=self.mask ) # summary tf.summary.scalar('loss', self.loss) self.summary_op = tf.summary.merge_all() self.writer = tf.summary.FileWriter('log/train', self.sess.graph) self.build_optimizer()
def build_train_decoder(self, decoder_targets, decoder_targets_length, max_target_sequence_length, mask, name): ending = tf.strided_slice(decoder_targets, [0, 0], [self.batch_size, -1], [1, 1]) decoder_input = tf.concat([tf.fill([self.batch_size, 1], self.word_to_id['<GO>']), ending], 1) decoder_inputs_embedded = tf.nn.embedding_lookup(self.embedding, decoder_input) decoder_cell, deocder_initial_state = self.build_decoder_cell() output_layer = tf.layers.Dense( self.vocab_size, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1) ) if self.teacher_forcing: training_helper = ScheduledEmbeddingTrainingHelper( inputs=decoder_inputs_embedded, sequence_length=decoder_targets_length, embedding=self.embedding, sampling_probability=self.teacher_forcing_probility, time_major=False, name='teacher_forcing_training_helper_' + name ) else: training_helper = TrainingHelper( inputs=decoder_inputs_embedded, sequence_length=decoder_targets_length, time_major=False, name='training_helper_' + name ) training_decoder = BasicDecoder( cell=decoder_cell, helper=training_helper, initial_state=deocder_initial_state, output_layer=output_layer ) decoder_outputs, _, _ = dynamic_decode( decoder=training_decoder, impute_finished=True, maximum_iterations=max_target_sequence_length ) decoder_logits_train = tf.identity(decoder_outputs.rnn_output) # loss loss = sequence_loss( logits=decoder_logits_train, targets=decoder_targets, weights=mask ) return loss
def inference_decoding_layer(embeddings, start_token, end_token, decoding_cell, initial_state, op_layer, max_en_len, batch_size): start_tokens = tf.tile(tf.constant([start_token], dtype=tf.int32), [batch_size], name='start_tokens') inf_helper = GreedyEmbeddingHelper(embeddings, start_tokens, end_token) inf_decoder = BasicDecoder(decoding_cell, inf_helper, initial_state, op_layer) inf_logits, _, _ = dynamic_decode(inf_decoder, output_time_major=False, impute_finished=True, maximum_iterations=max_en_len) return inf_logits
def _init(self, sequence, targets, authors): batch_size = tf.shape(sequence)[0] sequence_lengths = tf.cast(tf.count_nonzero(sequence, axis=1), tf.int32) embedding = tf.Variable( tf.random_normal((self._vocab_size, self._embed_size)), name='char_embedding' ) context = tf.Variable( tf.random_normal((self._author_size, self._ctx_size)), name='ctx_embedding' ) embedded_sequence = tf.nn.embedding_lookup(embedding, sequence) embedded_authors = tf.nn.embedding_lookup(context, authors) gpu = lambda x: '/gpu:{}'.format(x % self._num_gpu) if self._training: dropout = lambda x: DropoutWrapper( x, 1.0-self._input_dropout, 1.0-self._output_dropout) helper = TrainingHelper(embedded_sequence, sequence_lengths) else: dropout = lambda x: x helper = SampleEmbeddingHelper(embedding, sequence[:,0], 2) base = lambda x: ContextWrapper(self._cell(x), embedded_authors) wrap = lambda i, cell: DeviceWrapper(dropout(cell), gpu(i)) cells = [wrap(i, base(self._cell_size)) for i in range(self._cell_num)] cell = MultiRNNCell(cells) init_state = cell.zero_state(batch_size, tf.float32) dense = tf.layers.Dense( self._vocab_size, self._activation, name='fully_connected' ) decoder = BasicDecoder(cell, helper, init_state, dense) output, _, _ = dynamic_decode(decoder, swap_memory=True) logits = output.rnn_output weights = tf.sequence_mask(sequence_lengths, dtype=tf.float32) loss = tf.contrib.seq2seq.sequence_loss( logits, targets, weights ) out = output.sample_id return targets, loss, out
def decoder(self, encoder_outputs, encoder_states): decoder_cell, decoder_init_state = self.add_decoder_cell( encoder_outputs, encoder_states, self.hidden_size, self.cell_type, self.num_layers) output_proj = tf.layers.Dense( self.tgt_vcb_size, dtype=tf.float32, use_bias=False, kernel_initializer=tf.truncated_normal_initializer(stddev=0.1), name='output_proj') if self.mode == 'train': target_embedding = tf.nn.embedding_lookup(self.decoder_embeddings, self.decoder_input_train) training_helper = TrainingHelper(target_embedding, self.target_len, name='training_helper') training_decoder = BasicDecoder(decoder_cell, training_helper, decoder_init_state, output_proj) max_dec_len = tf.reduce_max(self.target_len) output, _, _ = tf.contrib.seq2seq.dynamic_decode( training_decoder, maximum_iterations=max_dec_len) self.d_masks = tf.sequence_mask(self.target_len, max_dec_len, dtype=tf.float32, name='d_masks') self.prob = output.rnn_output self.loss = tf.contrib.seq2seq.sequence_loss( logits=self.prob, targets=self.target, weights=self.d_masks, average_across_timesteps=True, average_across_batch=True) else: start_token = [DataUnit.START_INDEX] * self.batch_size end_token = DataUnit.END_INDEX inference_decoder = BeamSearchDecoder( cell=decoder_cell, embedding=lambda x: tf.nn.embedding_lookup( self.decoder_embeddings, x), start_tokens=start_token, end_token=end_token, initial_state=decoder_init_state, beam_width=self.beam_size, output_layer=output_proj) output, _, _ = tf.contrib.seq2seq.dynamic_decode( inference_decoder, maximum_iterations=self.max_decode_step) output_pred_ = output.predicted_ids self.decoder_output = tf.transpose(output_pred_, perm=[0, 2, 1])
def setup_decoder(self): output_embed = tf.nn.embedding_lookup([self.embedding], self.att_label[:, :-1]) decoder_lengths = tf.tile([self.max_dec_iteration[0] - 1], [self.batch_size]) helper = ScheduledEmbeddingTrainingHelper(output_embed, decoder_lengths, self.embedding, 0.1) output_layer = Dense(units=self.vocab_size) self.decoder = BasicDecoder(cell=self.cell, helper=helper, initial_state=self.cell.zero_state( dtype=tf.float32, batch_size=self.batch_size), output_layer=output_layer)
def attention_alignment(inputs, input_lengths, memory, memory_lengths, n_layers, n_units, dropout_prob, cell_type=GRUCell, attention_mechanism=BahdanauAttention, is_training=True): """Performs alignment over inputs, attending over memory Args: inputs (tensor): Input sequence, with the shape of [Batch x seq_length x dimension] input_lengths (tensor): The length of input sequences. Used for dynamic unrolling memory (tensor): Sequence to attend memory_lengths (tensor): The length of memory. Used for dynamic unrolling n_layers (int): Number of layers in RNN n_units (int): Number of units in RNN dropout_prob (float): Drop out rate for RNN cell cell_type (method): Type of RNN cell, GRU by default attention_mechanism (method): Type of attention mechanism, Bahdanau by default is_training (bool): Whether the model is training or testing returns: (tensor, tensor, tensor): """ # get tensor dimensions batch_size, seq_length, dim = inputs.get_shape().as_list() # create a attention over the memory attention = attention_mechanism(n_units, memory, memory_sequence_length=memory_lengths, dtype=tf.float32) # build an encoder RNN over the input sequence dropout_prob = 0 if not is_training else dropout_prob if n_layers > 1: attention_cell = MultiRNNCell([DropoutWrapper(cell_type(n_units), output_keep_prob=1-dropout_prob) for _ in range(n_layers)]) else: attention_cell = cell_type(n_units) attention_cell = DropoutWrapper(attention_cell, output_keep_prob=1-dropout_prob) # for each input to the next RNN cell, wire the attention mechanism a_cell = AttentionWrapper(attention_cell, attention, alignment_history=True) # define the initial state # TODO: Do we ever feed an init state? attention_state = a_cell.zero_state(batch_size, dtype=tf.float32) # read input while attending over memory helper = TrainingHelper(inputs=inputs, sequence_length=input_lengths) decoder = BasicDecoder(a_cell, helper, attention_state) # output of the decoder is a new representation of input sentence with attention over the question outputs, states, _ = tf.contrib.seq2seq.dynamic_decode(decoder, maximum_iterations=seq_length, impute_finished=True) outputs = tf.pad(outputs.rnn_output, [[0, 0], [0, seq_length - tf.reduce_max(input_lengths)], [0, 0]]) outputs = tf.reshape(outputs, [batch_size, seq_length, dim]) # attention matrix for visualizing heatmap aligned = tf.transpose(states.alignment_history.stack(), [1, 0, 2]) return outputs, states, aligned
def build_rnn(train_or_test, cell, rnn_train_inputs, start_code_embed, batch_size, target_lengths, embedding, encoder_out, name): if train_or_test: helper = DefaultZeroInputTrainingHelper(rnn_train_inputs, target_lengths, encoder_out, start_code_embed) else: helper = TestEmbeddingConcatHelper(batch_size, embedding, encoder_out, start_code_embed) initial_state = cell.zero_state(batch_size=batch_size, dtype=tf.float32) (decoder_outputs, decoder_samples), final_decoder_state, _ = \ tf.contrib.seq2seq.dynamic_decode( BasicDecoder(cell, helper, initial_state), scope=name) return decoder_outputs, decoder_samples
def decoder_decode(self, decoder_cell, decoder_initial_state, output_layer): # 每句的开始用<GO>标记 start_tokens = tf.ones([ self.batch_size, ], tf.int32) * self.word_to_idx['<GO>'] # 每句的结束用<EOS>标记 end_token = self.word_to_idx['<EOS>'] # 如果使用BeamSearch,使用BeamSearchDecoder进行解码. if self.beam_search: inference_decoder = BeamSearchDecoder( cell=decoder_cell, embedding=self.embedding, start_tokens=start_tokens, end_token=end_token, initial_state=decoder_initial_state, beam_width=self.beam_size, output_layer=output_layer) else: # 不使用BeamSearch,使用GreedyEmbeddingHelper帮助类. decoding_helper = GreedyEmbeddingHelper(embedding=self.embedding, start_tokens=start_tokens, end_token=end_token) # 用BasicDecoder进行解码. inference_decoder = BasicDecoder( cell=decoder_cell, helper=decoding_helper, initial_state=decoder_initial_state, output_layer=output_layer) # dynamic_decode # 参数: # decoder: BasicDecoder、BeamSearchDecoder或者自己定义的decoder类对象 # output_time_major: 见RNN,为真时step*batch_size*...,为假时batch_size*step*... # impute_finished: Boolean,为真时会拷贝最后一个时刻的状态并将输出置零,程序运行更稳定,使最终状态和输出具有正确的值,在反向传播时忽略最后一个完成步。但是会降低程序运行速度。 # maximum_iterations: 最大解码步数,一般训练设置为decoder_inputs_length,预测时设置一个想要的最大序列长度即可。程序会在产生<eos>或者到达最大步数处停止。 decoder_outputs, _, _ = dynamic_decode(decoder=inference_decoder, maximum_iterations=50) if self.beam_search: # 如果使用BeamSearch,输出为预测的predicted_ids decoder_predict_decode = decoder_outputs.predicted_ids else: # 扩充一个维度,即在最后添加一列 TODO:干什么? decoder_predict_decode = tf.expand_dims(decoder_outputs.sample_id, -1) return decoder_predict_decode
def build_predict_decoder(self): start_tokens = tf.ones([ self.batch_size, ], tf.int32) * self.word_to_id['<GO>'] end_token = self.word_to_id['<EOS>'] decoder_cell, deocder_initial_state = self.build_decoder_cell() output_layer = tf.layers.Dense( self.vocab_size, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1)) if self.beam_search: inference_decoder = BeamSearchDecoder( cell=decoder_cell, embedding=self.embedding, start_tokens=start_tokens, end_token=end_token, initial_state=deocder_initial_state, beam_width=self.beam_size, output_layer=output_layer) else: decoding_helper = GreedyEmbeddingHelper(embedding=self.embedding, start_tokens=start_tokens, end_token=end_token) inference_decoder = BasicDecoder( cell=decoder_cell, helper=decoding_helper, initial_state=deocder_initial_state, output_layer=output_layer) decoder_outputs, _, _ = dynamic_decode(decoder=inference_decoder, maximum_iterations=50) if self.beam_search: decoder_predict_decode = decoder_outputs.predicted_ids else: decoder_predict_decode = tf.expand_dims(decoder_outputs.sample_id, -1) return decoder_predict_decode
def decode(self, cell_dec, enc_final_state, output_size, output_embed_matrix, training, grammar_helper=None): linear_layer = tf_core_layers.Dense(output_size, use_bias=False) go_vector = tf.ones((self.batch_size,), dtype=tf.int32) * self.config.grammar.start if training: output_ids_with_go = tf.concat([tf.expand_dims(go_vector, axis=1), self.output_placeholder], axis=1) outputs = tf.nn.embedding_lookup([output_embed_matrix], output_ids_with_go) helper = TrainingHelper(outputs, self.output_length_placeholder+1) else: helper = GreedyEmbeddingHelper(output_embed_matrix, go_vector, self.config.grammar.end) if self.config.use_grammar_constraints: decoder = GrammarBasicDecoder(self.config.grammar, cell_dec, helper, enc_final_state, output_layer = linear_layer, training_output = self.output_placeholder if training else None, grammar_helper=grammar_helper) else: decoder = BasicDecoder(cell_dec, helper, enc_final_state, output_layer = linear_layer) final_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, impute_finished=True, maximum_iterations=self.max_length) return final_outputs
def training_decoding_layer(decoding_embed_input, en_len, decoding_cell, encoding_op, encoding_st, op_layer, v_size, fr_len, max_en_len): with variable_scope.variable_scope( "decoder", initializer=init_ops.constant_initializer(0.1)) as vs: print("args:", args) if (args.attention_architecture is not None): decoding_cell, encoding_st = create_attention( decoding_cell, encoding_op, encoding_st, fr_len) helper = TrainingHelper(inputs=decoding_embed_input, sequence_length=en_len, time_major=False) dec = BasicDecoder(decoding_cell, helper, encoding_st, op_layer) logits, _, _ = dynamic_decode(dec, output_time_major=False, impute_finished=True, maximum_iterations=max_en_len) return logits
def decoder_ops(self, decoder_emb_inp, encoder_outputs, encoder_state, hparams): """ :param decoder_emb_inp: :return: """ decoder_cell = self._build_cell(self.cell_type, self.num_units, self.num_layers) helper = TrainingHelper(decoder_emb_inp, self.target_seq_length, time_major=True) decoder = BasicDecoder(decoder_cell, helper, encoder_state, output_layer=project_layer) # 动态 decoding outputs, _ = dynamic_decode((decoder)) logits = outputs.rnn_output core.Dense()
def build_predict_decoder(self): print('Building predict decoder...') start_tokens = tf.ones([self.batch_size, ], tf.int32) * self.word_to_id['<GO>'] end_token = self.word_to_id['<EOS>'] if self.beam_search: inference_decoder = BeamSearchDecoder( cell=self.decoder_cell, embedding=self.embedding, start_tokens=start_tokens, end_token=end_token, initial_state=self.decoder_initial_state, beam_width=self.beam_size, output_layer=self.output_layer ) else: decoding_helper = GreedyEmbeddingHelper( embedding=self.embedding, start_tokens=start_tokens, end_token=end_token ) ##Uses the argmax of the output (treated as logits) and passes the result through an embedding layer to get the next input. ##embedding: A callable that takes a vector tensor of ids (argmax ids), or the params argument for embedding_lookup. The returned tensor will be passed to the decoder input. ##start_tokens: int32 vector shaped [batch_size], the start tokens. ##end_token: int32 scalar, the token that marks end of decoding. inference_decoder = BasicDecoder( cell=self.decoder_cell, helper=decoding_helper, initial_state=self.decoder_initial_state, output_layer=self.output_layer ) decoder_outputs, _, _ = dynamic_decode(decoder=inference_decoder, maximum_iterations=50) ##predicted_ids: Final outputs returned by the beam search after all decoding is finished. A tensor of shape [batch_size, num_steps, beam_width] (or [num_steps, batch_size, beam_width] if output_time_major is True). Beams are ordered from best to worst. if self.beam_search: self.decoder_predict_decode = decoder_outputs.predicted_ids else: self.decoder_predict_decode = tf.expand_dims(decoder_outputs.sample_id, -1)
def pointer_net(inputs, input_lengths, n_pointers, word_matrix, cell_type, n_layers, n_units, dropout_prob, is_training=True): """Pointer network. Args: inputs (tensor): Inputs to pointer network (typically output of previous RNN) input_lengths (tensor): Actual non-padded lengths of each input sequence n_pointers (int): Number of pointers to generate word_matrix (tensor): Embedding matrix of word vectors cell_type (method): Cell type to use n_layers (int): Number of layers in RNN (same for encoder & decoder) n_units (int): Number of units in RNN cell (same for encoder & decoder) dropout_prob (float): Dropout probability is_training (bool): Whether the model is training or testing """ batch_size, seq_length, _ = inputs.get_shape().as_list() vocab_size = word_matrix.get_shape().as_list()[0] # instantiate RNN cell; only use dropout during training def _rnn_cell(): keep_prob = 1 - dropout_prob if is_training else 1 return DropoutWrapper(cell_type(n_units), output_keep_prob=keep_prob) enc_cell = MultiRNNCell([_rnn_cell() for _ in range(n_layers)]) if n_layers > 1 else _rnn_cell() encoded, _ = tf.nn.dynamic_rnn(enc_cell, inputs, input_lengths, dtype=tf.float32) attention = BahdanauAttention(n_units, encoded, memory_sequence_length=input_lengths) # TODO: find permanent solution (InferenceHelper?) start_tokens = tf.constant(START_TOKEN, shape=[batch_size], dtype=tf.int32) helper = GreedyEmbeddingHelper(word_matrix, start_tokens, END_TOKEN) dec_cell = MultiRNNCell([_rnn_cell() for _ in range(n_layers)]) if n_layers > 1 else _rnn_cell() attn_cell = AttentionWrapper(dec_cell, attention, alignment_history=True) out_cell = tf.contrib.rnn.OutputProjectionWrapper(attn_cell, vocab_size) decoder = BasicDecoder(out_cell, helper, attn_cell.zero_state(batch_size, tf.float32)) _, states, _ = dynamic_decode(decoder, maximum_iterations=n_pointers, impute_finished=True) probs = tf.reshape(states.alignment_history.stack(), [n_pointers, batch_size, seq_length]) return probs
def build_train_decoder(self): with tf.name_scope('train_decoder'): training_helper = TrainingHelper( inputs=self.inputs_dense, sequence_length=self.inputs_length, time_major=False, name='training_helper') with tf.name_scope('basic_decoder'): training_decoder = BasicDecoder( cell=self.cell, helper=training_helper, initial_state=self.initial_state, output_layer=self.output_layer) with tf.name_scope('dynamic_decode'): (outputs, self.last_state, self.outputs_length) = (seq2seq.dynamic_decode( decoder=training_decoder, output_time_major=False, impute_finished=True, maximum_iterations=self.inputs_max_length)) self.logits = tf.identity(outputs.rnn_output) self.log_probs = tf.nn.log_softmax(self.logits) self.gs_hypotheses = tf.argmax(self.log_probs, -1)
def training_decode(enc_outputs, seq_len, helper, out_dim): dec_prenet_outputs = DecoderPrenetWrapper(GRUCell(hp.embed_size), is_training=True, prenet_sizes=hp.embed_size, dropout_prob=hp.dropout) attention_mechanism = BahdanauAttention(hp.embed_size, enc_outputs, normalize=True, memory_sequence_length=seq_len, probability_fn=tf.nn.softmax) attn_cell = AttentionWrapper(dec_prenet_outputs, attention_mechanism, alignment_history=True, output_attention=False) concat_cell = ConcatOutputAndAttentionWrapper(attn_cell) decoder_cell = MultiRNNCell([ OutputProjectionWrapper(concat_cell, hp.embed_size), ResidualWrapper(GRUCell(hp.embed_size)), ResidualWrapper(GRUCell(hp.embed_size)) ], state_is_tuple=True) output_cell = OutputProjectionWrapper(decoder_cell, out_dim) initial_state = output_cell.zero_state(batch_size=tf.shape(enc_outputs)[0], dtype=tf.float32) decoder = BasicDecoder(cell=output_cell, helper=helper, initial_state=initial_state) (outputs, _), last_state, _ = tf.contrib.seq2seq.dynamic_decode( decoder=decoder, maximum_iterations=hp.max_len) # for attention plot alignments = tf.transpose(last_state[0].alignment_history.stack(), [1, 2, 0]) return outputs, alignments
def decode(helper, scope, reuse=None): with tf.variable_scope(scope, reuse=reuse): rnn_layers = [] for i in range(n_decoder_layers): # Create GRUCell with dropout. Do not forget to set the reuse flag properly. cell = tf.nn.rnn_cell.GRUCell(hidden_size, reuse=reuse) cell = tf.nn.rnn_cell.DropoutWrapper( cell, input_keep_prob=self.dropout_ph) rnn_layers.append(cell) decoder_cell = MultiRNNCell(rnn_layers) # Create a projection wrapper decoder_cell = OutputProjectionWrapper(decoder_cell, vocab_size, reuse=reuse) # Create BasicDecoder, pass the defined cell, a helper, and initial state # The initial state should be equal to the final state of the encoder! initial_state = decoder_cell.zero_state(batch_size=batch_size, dtype=tf.float32) decoder = BasicDecoder(decoder_cell, helper, initial_state=initial_state) # The first returning argument of dynamic_decode contains two fields: # * rnn_output (predicted logits) # * sample_id (predictions) max_iters = tf.reduce_max(self.ground_truth_lengths) # max_iters = max_iter outputs, _, _ = dynamic_decode(decoder=decoder, maximum_iterations=max_iters, output_time_major=False, impute_finished=True) return outputs
def initialize(self, inputs, input_lengths, mel_targets=None, linear_targets=None): '''Initializes the model for inference. Sets "mel_outputs", "linear_outputs", and "alignments" fields. Args: inputs: int32 Tensor with shape [N, T_in] where N is batch size, T_in is number of steps in the input time series, and values are character IDs input_lengths: int32 Tensor with shape [N] where N is batch size and values are the lengths of each sequence in inputs. mel_targets: float32 Tensor with shape [N, T_out, M] where N is batch size, T_out is number of steps in the output time series, M is num_mels, and values are entries in the mel spectrogram. Only needed for training. linear_targets: float32 Tensor with shape [N, T_out, F] where N is batch_size, T_out is number of steps in the output time series, F is num_freq, and values are entries in the linear spectrogram. Only needed for training. ''' with tf.variable_scope('inference') as scope: is_training = linear_targets is not None batch_size = tf.shape(inputs)[0] hp = self._hparams # Embeddings symbols_length = 149 # BASED ON PREVIOUS LENGTH OF LIST embedding_table = tf.get_variable( 'embedding', [symbols_length, hp.embed_depth], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.5)) embedded_inputs = tf.nn.embedding_lookup( embedding_table, inputs) # [N, T_in, embed_depth=256] # Encoder prenet_outputs = prenet( embedded_inputs, is_training, hp.prenet_depths) # [N, T_in, prenet_depths[-1]=128] encoder_outputs = encoder_cbhg( prenet_outputs, input_lengths, is_training, # [N, T_in, encoder_depth=256] hp.encoder_depth) # Attention attention_cell = AttentionWrapper( GRUCell(hp.attention_depth), BahdanauAttention(hp.attention_depth, encoder_outputs), alignment_history=True, output_attention=False) # [N, T_in, attention_depth=256] # Apply prenet before concatenation in AttentionWrapper. attention_cell = DecoderPrenetWrapper(attention_cell, is_training, hp.prenet_depths) # Concatenate attention context vector and RNN cell output into a 2*attention_depth=512D vector. concat_cell = ConcatOutputAndAttentionWrapper( attention_cell) # [N, T_in, 2*attention_depth=512] # Decoder (layers specified bottom to top): decoder_cell = MultiRNNCell( [ OutputProjectionWrapper(concat_cell, hp.decoder_depth), ResidualWrapper(GRUCell(hp.decoder_depth)), ResidualWrapper(GRUCell(hp.decoder_depth)) ], state_is_tuple=True) # [N, T_in, decoder_depth=256] # Project onto r mel spectrograms (predict r outputs at each RNN step): output_cell = OutputProjectionWrapper( decoder_cell, hp.num_mels * hp.outputs_per_step) decoder_init_state = output_cell.zero_state(batch_size=batch_size, dtype=tf.float32) helper = TacoTestHelper(batch_size, hp.num_mels, hp.outputs_per_step) (decoder_outputs, _), final_decoder_state, _ = tf.contrib.seq2seq.dynamic_decode( BasicDecoder(output_cell, helper, decoder_init_state), maximum_iterations=hp.max_iters) # [N, T_out/r, M*r] # Reshape outputs to be one output per entry mel_outputs = tf.reshape( decoder_outputs, [batch_size, -1, hp.num_mels]) # [N, T_out, M] # Add post-processing CBHG: post_outputs = post_cbhg( mel_outputs, hp.num_mels, is_training, # [N, T_out, postnet_depth=256] hp.postnet_depth) linear_outputs = tf.layers.dense(post_outputs, hp.num_freq) # [N, T_out, F] # Grab alignments from the final decoder state: alignments = tf.transpose( final_decoder_state[0].alignment_history.stack(), [1, 2, 0]) self.inputs = inputs self.input_lengths = input_lengths self.mel_outputs = mel_outputs self.linear_outputs = linear_outputs self.alignments = alignments self.mel_targets = mel_targets self.linear_targets = linear_targets