def get(attention_type, num_units, memory, memory_sequence_length, scope=None, reuse=None): """Returns attention mechanism according to the specified type.""" with tf.variable_scope(scope, reuse=reuse): if attention_type == U.ATT_LUONG: attention_mechanism = contrib_seq2seq.LuongAttention( num_units=num_units, memory=memory, memory_sequence_length=memory_sequence_length) elif attention_type == U.ATT_LUONG_SCALED: attention_mechanism = contrib_seq2seq.LuongAttention( num_units=num_units, memory=memory, memory_sequence_length=memory_sequence_length, scale=True) elif attention_type == U.ATT_BAHDANAU: attention_mechanism = contrib_seq2seq.BahdanauAttention( num_units=num_units, memory=memory, memory_sequence_length=memory_sequence_length) elif attention_type == U.ATT_BAHDANAU_NORM: attention_mechanism = contrib_seq2seq.BahdanauAttention( num_units=num_units, memory=memory, memory_sequence_length=memory_sequence_length, normalize=True) else: raise ValueError("Unknown attention type: %s" % attention_type) return attention_mechanism
def create_decoder_cell(agenda, base_sent_embeds, mev_st, mev_ts, base_length, iw_length, dw_length, attn_dim, hidden_dim, num_layer, enable_alignment_history=False, enable_dropout=False, dropout_keep=0.1, no_insert_delete_attn=False): base_attn = seq2seq.BahdanauAttention(attn_dim, base_sent_embeds, base_length, name='src_attn') cnx_src, micro_evs_st = mev_st mev_st_attn = seq2seq.BahdanauAttention(attn_dim, cnx_src, iw_length, name='mev_st_attn') mev_st_attn._values = micro_evs_st attns = [base_attn, mev_st_attn] if not no_insert_delete_attn: cnx_tgt, micro_evs_ts = mev_ts mev_ts_attn = seq2seq.BahdanauAttention(attn_dim, cnx_tgt, dw_length, name='mev_ts_attn') mev_ts_attn._values = micro_evs_ts attns += [mev_ts_attn] bottom_cell = tf_rnn.LSTMCell(hidden_dim, name='bottom_cell') bottom_attn_cell = seq2seq.AttentionWrapper( bottom_cell, tuple(attns), output_attention=False, alignment_history=enable_alignment_history, name='att_bottom_cell') all_cells = [bottom_attn_cell] num_layer -= 1 for i in range(num_layer): cell = tf_rnn.LSTMCell(hidden_dim, name='layer_%s' % (i + 1)) if enable_dropout and dropout_keep < 1.: cell = tf_rnn.DropoutWrapper(cell, output_keep_prob=dropout_keep) all_cells.append(cell) decoder_cell = AttentionAugmentRNNCell(all_cells) decoder_cell.set_agenda(agenda) return decoder_cell
def _create_attention_mechanism(self, attention_type, num_units, memory, memory_sequence_length): if attention_type == 'bahdanau': attention_mechanism = seq2seq.BahdanauAttention( num_units=num_units, memory=memory, memory_sequence_length=memory_sequence_length, normalize=False) self._output_attention = False elif attention_type == 'normed_bahdanau': attention_mechanism = seq2seq.BahdanauAttention( num_units=num_units, memory=memory, memory_sequence_length=memory_sequence_length, normalize=True) self._output_attention = False elif attention_type == 'normed_monotonic_bahdanau': attention_mechanism = seq2seq.BahdanauMonotonicAttention( num_units=num_units, memory=memory, memory_sequence_length=memory_sequence_length, normalize=True, score_bias_init=-2.0, sigmoid_noise=1.0 if self._mode == 'train' else 0.0, mode='hard' if self._mode != 'train' else 'parallel') self._output_attention = False elif attention_type == 'luong': attention_mechanism = seq2seq.LuongAttention( num_units=num_units, memory=memory, memory_sequence_length=memory_sequence_length) self._output_attention = True elif attention_type == 'scaled_luong': attention_mechanism = seq2seq.LuongAttention( num_units=num_units, memory=memory, memory_sequence_length=memory_sequence_length, scale=True, ) self._output_attention = True elif attention_type == 'scaled_monotonic_luong': attention_mechanism = seq2seq.LuongMonotonicAttention( num_units=num_units, memory=memory, memory_sequence_length=memory_sequence_length, scale=True, score_bias_init=-2.0, sigmoid_noise=1.0 if self._mode == 'train' else 0.0, mode='hard' if self._mode != 'train' else 'parallel') self._output_attention = True else: raise Exception('unknown attention mechanism') return attention_mechanism
def getBeamSearchDecoderCell(self, encoder_outputs, encoder_final_states): basic_cells = [self.get_basicLSTMCell() for i in range(layer_num)] basic_cell = tf.nn.rnn_cell.MultiRNNCell(basic_cells) tiled_encoder_outputs = seq2seq.tile_batch(encoder_outputs, multiplier=beam_size) tiled_encoder_final_states = [ seq2seq.tile_batch(state, multiplier=beam_size) for state in encoder_final_states ] tiled_sequence_length = seq2seq.tile_batch(self.enc_len, multiplier=beam_size) initial_state = tuple(tiled_encoder_final_states) #attention attention_mechanism = seq2seq.BahdanauAttention( num_units=num_units, memory=tiled_encoder_outputs, memory_sequence_length=tiled_sequence_length) att_cell = seq2seq.AttentionWrapper( basic_cell, attention_mechanism=attention_mechanism, attention_layer_size=num_units, alignment_history=False, cell_input_fn=None, initial_cell_state=initial_state) initial_state = att_cell.zero_state( batch_size=tf.shape(self.enc_in)[0] * beam_size, dtype=tf.float32) # att_state.clone(cell_state=encoder_final_state) return att_cell, initial_state
def _build_attention(self, enc_outputs, enc_seq_len ): with tf.variable_scope("AttentionMechanism"): if self.attn_Type == 'bahdanau': attention_mechanism = seq2seq.BahdanauAttention( num_units=2*self.cell_dim, memory=enc_outputs, memory_sequence_length=enc_seq_len, probability_fn=tf.nn.softmax, normalize=True, dtype=tf.get_variable_scope().dtype ) elif self.params['attention_type'] == 'luong': attention_mechanism = seq2seq.LuongAttention( num_units=2*self.cell_dim, memory=enc_outputs, memory_sequence_length=enc_seq_len, probability_fn=tf.nn.softmax, dtype=tf.get_variable_scope().dtype ) else: raise ValueError('Unknown Attention Type') return attention_mechanism
def _build_encoder(self, input_sequence, keep_prob): """Define encoder architecture. """ # connect each layer sequentially, building a graph that resembles a # feed-forward network made of recurrent units encoder_cell = self._multi_cell(num_units=self.num_units, num_layers=self.num_layers, keep_prob=keep_prob) # the model is using fixed lengths of input sequences so tile the defined # length in the batch dimension sequence_lengths = tf.tile([self.input_length], [tf.shape(input_sequence)[0]]) # build the unrolled graph of the recurrent neural network encoder_outputs, encoder_states = tf.nn.dynamic_rnn( cell=encoder_cell, inputs=input_sequence, sequence_length=sequence_lengths, dtype=tf.float32) # attention provides a direct connection between the encoder and decoder # so that long-range connections are not limited by the fixed size of the # thought vector attention_layer_size = self.num_units attention_mechanism = seq2seq.BahdanauAttention( num_units=attention_layer_size, memory=encoder_outputs, memory_sequence_length=sequence_lengths, normalize=True) return (encoder_outputs, encoder_states, attention_mechanism)
def train_decode_layer(self, dec_embeddig_input, dec_cell, output_layer): atten_mech = seq2seq.BahdanauAttention( num_units=self.hidden_dim * 2, memory=self.enc_output, memory_sequence_length=self.target_len, normalize=True, name='BahadanauAttention') dec_cell = seq2seq.AttentionWrapper(dec_cell, atten_mech, self.hidden_dim * 2, name='dec_attention_cell') initial_state = dec_cell.zero_state( batch_size=self.batch_size, dtype=tf.float32).clone(cell_state=self.enc_state) train_helper = seq2seq.TrainingHelper(dec_embeddig_input, self.target_len) training_decoder = seq2seq.BasicDecoder(dec_cell, train_helper, initial_state=initial_state, output_layer=output_layer) train_logits, _, _ = seq2seq.dynamic_decode( training_decoder, output_time_major=False, impute_finished=False, maximum_iterations=self.max_target_len) return train_logits
def decoding_layer(self,dec_embed_input,embeddings,enc_output,enc_state, vocab_size,text_len,summary_len,max_sum_len): lstm = rnn.LSTMCell(self.hidden_dim * 2,initializer=tf.random_normal_initializer(-0.1,0.1,seed=2)) dec_cell = rnn.DropoutWrapper(lstm,input_keep_prob=self.keep_prob,) output_layer = tf.layers.Dense(vocab_size,kernel_initializer=tf.truncated_normal_initializer(stddev=0.1)) attn_mech = seq2seq.BahdanauAttention(self.hidden_dim * 2, enc_output, text_len, normalize=False,name='BahdanauAttention') dec_cell = seq2seq.AttentionWrapper(dec_cell,attn_mech,attention_layer_size=self.hidden_dim * 2) # initial_state = seq2seq.AttentionWrapperState(enc_state[0],_zero_state_tensors(self.hidden_dim,batch_size, # tf.float32)) initial_state = dec_cell.zero_state(self.batch_size,tf.float32).clone(cell_state=LSTMStateTuple(*enc_state)) with tf.variable_scope('decode'): traing_logits = self.training_decoding_layer(dec_embed_input,summary_len,dec_cell,initial_state, output_layer,max_sum_len) with tf.variable_scope('decode',reuse=True): inference_logits = self.inference_decoding_layer(embeddings,self.vocab_to_int['<GO>'], self.vocab_to_int['<EOS>'],dec_cell, initial_state,output_layer,max_sum_len) return traing_logits, inference_logits
def inference_decode_layer(self, start_token, dec_cell, end_token, output_layer): start_tokens = tf.tile(tf.constant([start_token], dtype=tf.int32), [self.batch_size], name='start_token') tiled_enc_output = seq2seq.tile_batch(self.enc_output, multiplier=self.Beam_width) tiled_enc_state = seq2seq.tile_batch(self.enc_state, multiplier=self.Beam_width) tiled_source_len = seq2seq.tile_batch(self.source_len, multiplier=self.Beam_width) atten_mech = seq2seq.BahdanauAttention(self.hidden_dim * 2, tiled_enc_output, tiled_source_len, normalize=True) decoder_att = seq2seq.AttentionWrapper(dec_cell, atten_mech, self.hidden_dim * 2) initial_state = decoder_att.zero_state( self.batch_size * self.Beam_width, tf.float32).clone(cell_state=tiled_enc_state) decoder = seq2seq.BeamSearchDecoder(decoder_att, self.embeddings, start_tokens, end_token, initial_state, beam_width=self.Beam_width, output_layer=output_layer) infer_logits, _, _ = seq2seq.dynamic_decode(decoder, False, False, self.max_target_len) return infer_logits
def _attn_cell(self, rnn_enc_tensor): cell = new_multi_rnn_cell(self.hsz, self.rnntype, self.nlayers) if self.attn: attn_mech = tfcontrib_seq2seq.BahdanauAttention(self.hsz, rnn_enc_tensor, self.src_len) #attn_mech = tfcontrib_seq2seq.LuongAttention(self.hsz, rnn_enc_tensor, self.src_len) cell = tf.contrib.seq2seq.AttentionWrapper(cell, attn_mech, self.hsz, name='dyn_attn_cell') return cell
def _build_single_attention_mechanism(memory): if not self._is_training: memory = seq2seq.tile_batch(memory, multiplier=self._beam_width) return seq2seq.BahdanauAttention(self._num_attention_units, memory, memory_sequence_length=None)
def _build_decoder(self): """ Decode keyword and context into a sequence of vectors. """ self.sequence_decoder = tf.placeholder( dtype=tf.float32, shape=[_BATCH_SIZE, None, CHAR_VEC_DIM], name='context') self.length_decoder = tf.placeholder(dtype=tf.int32, shape=[_BATCH_SIZE], name='length_keywords') attention = seq2seq.BahdanauAttention( _NUM_UNITS, memory=self.encoder_outputs, memory_sequence_length=self.context_length, name="BahdanauAttention") cell_attention = tf.contrib.rnn.GRUCell(_NUM_UNITS) attention_wrapper = seq2seq.AttentionWrapper(cell_attention, attention) self.initial_decode_state = attention_wrapper.zero_state( _BATCH_SIZE, dtype=tf.float32).clone(cell_state=self.states_keywords) self.decoder_outputs, self.decoder_final_state = tf.nn.dynamic_rnn( attention_wrapper, self.sequence_decoder, sequence_length=self.length_decoder, initial_state=self.initial_decode_state, dtype=tf.float32, time_major=False)
def decode(self, dec_cell, enc_outputs, ctx_outputs): with tf.variable_scope("decode"): batch_size = self._batch_size attn_mech = seq2seq.BahdanauAttention(self._memory_size, enc_outputs, self.input_lengths) dec_cell = CondWrapper(dec_cell, ctx_outputs) dec_cell = seq2seq.AttentionWrapper(dec_cell, attn_mech, self._memory_size) dec_initial_state = dec_cell.zero_state(batch_size=batch_size, dtype=tf.float32) helper_build_fn = self._infer_helper if self._infer else self._train_helper output_layer = layers_core.Dense(self._vocab_size, use_bias=True, activation=None) decoder = seq2seq.BasicDecoder(cell=dec_cell, helper=helper_build_fn(), initial_state=dec_initial_state, output_layer=output_layer) dec_output, dec_state = seq2seq.dynamic_decode( decoder, impute_finished=True, maximum_iterations=self._max_seq_length) rnn_output = dec_output.rnn_output sample_id = dec_output.sample_id return rnn_output, sample_id, dec_state
def _build_model(self, batch_size, helper_build_fn, decoder_maxiters=None, alignment_history=False): # embed input_data into a one-hot representation inputs = tf.one_hot(self.input_data, self._input_size, dtype=self._dtype) inputs_len = self.input_lengths with tf.name_scope('bidir-encoder'): fw_cell = rnn.MultiRNNCell( [rnn.BasicRNNCell(self._enc_rnn_size) for i in range(3)], state_is_tuple=True) bw_cell = rnn.MultiRNNCell( [rnn.BasicRNNCell(self._enc_rnn_size) for i in range(3)], state_is_tuple=True) fw_cell_zero = fw_cell.zero_state(batch_size, self._dtype) bw_cell_zero = bw_cell.zero_state(batch_size, self._dtype) enc_out, _ = tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, inputs, sequence_length=inputs_len, initial_state_fw=fw_cell_zero, initial_state_bw=bw_cell_zero) with tf.name_scope('attn-decoder'): dec_cell_in = rnn.GRUCell(self._dec_rnn_size) attn_values = tf.concat(enc_out, 2) attn_mech = seq2seq.BahdanauAttention(self._enc_rnn_size * 2, attn_values, inputs_len) dec_cell_attn = rnn.GRUCell(self._enc_rnn_size * 2) dec_cell_attn = seq2seq.AttentionWrapper( dec_cell_attn, attn_mech, self._enc_rnn_size * 2, alignment_history=alignment_history) dec_cell_out = rnn.GRUCell(self._output_size) dec_cell = rnn.MultiRNNCell( [dec_cell_in, dec_cell_attn, dec_cell_out], state_is_tuple=True) dec = seq2seq.BasicDecoder( dec_cell, helper_build_fn(), dec_cell.zero_state(batch_size, self._dtype)) dec_out, dec_state = seq2seq.dynamic_decode( dec, output_time_major=False, maximum_iterations=decoder_maxiters, impute_finished=True) self.outputs = dec_out.rnn_output self.output_ids = dec_out.sample_id self.final_state = dec_state
def build_decoder_cell(self): if self.use_beamsearch_decode: encoder_outputs = tf.contrib.seq2seq.tile_batch( self.encoder_outputs, multiplier=self.beam_width) encoder_last_state = tf.contrib.seq2seq.tile_batch( self.encoder_last_state, multiplier=self.beam_width) encoder_inputs_length = tf.contrib.seq2seq.tile_batch( self.encoder_inputs_length, multiplier=self.beam_width) else: encoder_outputs = self.encoder_outputs encoder_last_state = self.encoder_last_state encoder_inputs_length = self.encoder_inputs_length self.attention_mechanism = seq2seq.BahdanauAttention( num_units=self.decoder_hidden_units, memory=encoder_outputs, memory_sequence_length=encoder_inputs_length) self.decoder_cell_list = [ self.build_single_cell(self.decoder_hidden_units) for _ in range(self.depth) ] # NOTE(sdsuo): Not sure what this does yet def attn_decoder_input_fn(inputs, attention): if not self.attn_input_feeding: return inputs # Essential when use_residual=True _input_layer = Dense(self.decoder_hidden_units, dtype=self.dtype, name='attn_input_feeding') return _input_layer(rnn.array_ops.concat([inputs, attention], -1)) # Attention mechanism is implemented only on all decoder layers self.decoder_cell_list = seq2seq.AttentionWrapper( cell=rnn.MultiRNNCell(self.decoder_cell_list), attention_mechanism=self.attention_mechanism, attention_layer_size=self.decoder_hidden_units, cell_input_fn=attn_decoder_input_fn, initial_cell_state=encoder_last_state, alignment_history=False, name='attention_wrapper') if self.use_beamsearch_decode: batch_size = self.batch_size * self.beam_width else: batch_size = self.batch_size # add by Meng decoder_initial_state = self.decoder_cell_list.zero_state( batch_size=batch_size, dtype=self.dtype).clone(cell_state=encoder_last_state) return self.decoder_cell_list, decoder_initial_state
def build_decoder_cell(self): # TODO(sdsuo): Read up and decide whether to use beam search self.attention_mechanism = seq2seq.BahdanauAttention( num_units=self.decoder_hidden_units, memory=self.encoder_outputs, memory_sequence_length=self.encoder_inputs_length ) self.decoder_cell_list = [ self.build_single_cell(self.decoder_hidden_units) for _ in range(self.depth) ] # NOTE(sdsuo): Not sure what this does yet def attn_decoder_input_fn(inputs, attention): if not self.attn_input_feeding: return inputs # Essential when use_residual=True _input_layer = Dense(self.decoder_hidden_units, dtype=self.dtype, name='attn_input_feeding') return _input_layer(rnn.array_ops.concat([inputs, attention], -1)) # NOTE(sdsuo): Attention mechanism is implemented only on the top decoder layer self.decoder_cell_list[-1] = seq2seq.AttentionWrapper( cell=self.decoder_cell_list[-1], attention_mechanism=self.attention_mechanism, attention_layer_size=self.decoder_hidden_units, cell_input_fn=attn_decoder_input_fn, initial_cell_state=self.encoder_last_state[-1], alignment_history=False, name='attention_wrapper' ) # NOTE(sdsuo): Not sure why this is necessary # To be compatible with AttentionWrapper, the encoder last state # of the top layer should be converted into the AttentionWrapperState form # We can easily do this by calling AttentionWrapper.zero_state # Also if beamsearch decoding is used, the batch_size argument in .zero_state # should be ${decoder_beam_width} times to the origianl batch_size if self.use_beamsearch_decode: batch_size = self.batch_size * self.beam_width else: batch_size = self.batch_size # NOTE(vera): important dimension here # embed() initial_state = [state for state in self.encoder_last_state] initial_state[-1] = self.decoder_cell_list[-1].zero_state( batch_size=batch_size, dtype=self.dtype ) decoder_initial_state = tuple(initial_state) return rnn.MultiRNNCell(self.decoder_cell_list), decoder_initial_state
def _attn_cell_w_dropout(self, rnn_enc_tensor, beam): cell = multi_rnn_cell_w_dropout(self.hsz, self.pkeep, self.rnntype, self.nlayers) if self.attn: src_len = self.src_len if beam > 1: # Expand the encoded tensor for all beam entries rnn_enc_tensor = tf.contrib.seq2seq.tile_batch(rnn_enc_tensor, multiplier=beam) src_len = tf.contrib.seq2seq.tile_batch(src_len, multiplier=beam) attn_mech = tfcontrib_seq2seq.BahdanauAttention(self.hsz, rnn_enc_tensor, src_len) #attn_mech = tfcontrib_seq2seq.LuongAttention(self.hsz, rnn_enc_tensor, src_len) cell = tf.contrib.seq2seq.AttentionWrapper(cell, attn_mech, self.hsz, name='dyn_attn_cell') return cell
def create_decoder_cell(agenda, base_sent_embeds, insert_word_embeds, delete_word_embeds, base_length, iw_length, dw_length, attn_dim, hidden_dim, num_layer, enable_alignment_history=False, enable_dropout=False, dropout_keep=0.1, no_insert_delete_attn=False): base_attn = seq2seq.BahdanauAttention(attn_dim, base_sent_embeds, base_length, name='src_attn') attns = [base_attn] if not no_insert_delete_attn: insert_attn = seq2seq.BahdanauAttention(attn_dim, insert_word_embeds, iw_length, name='insert_attn') delete_attn = seq2seq.BahdanauAttention(attn_dim, delete_word_embeds, dw_length, name='delete_attn') attns += [insert_attn, delete_attn] if no_insert_delete_attn: assert len(attns) == 1 else: assert len(attns) == 3 bottom_cell = tf_rnn.LSTMCell(hidden_dim, name='bottom_cell') bottom_attn_cell = seq2seq.AttentionWrapper( bottom_cell, tuple(attns), output_attention=False, alignment_history=enable_alignment_history, name='att_bottom_cell' ) all_cells = [bottom_attn_cell] num_layer -= 1 for i in range(num_layer): cell = tf_rnn.LSTMCell(hidden_dim, name='layer_%s' % (i + 1)) if enable_dropout and dropout_keep < 1.: cell = tf_rnn.DropoutWrapper(cell, output_keep_prob=dropout_keep) all_cells.append(cell) decoder_cell = AttentionAugmentRNNCell(all_cells) decoder_cell.set_agenda(agenda) return decoder_cell
def build_decode_cell(self): encoder_outputs = self.encoder_outputs encoder_last_state = self.encoder_last_state encoder_inputs_length = self.encoder_inputs_length # Building attention mechanism: Default Bahdanau # 'Bahdanau' style attention: https://arxiv.org/abs/1409.0473 self.attention_mechanism = seq2seq.BahdanauAttention( num_units=self.hidden_units, memory=encoder_outputs, memory_sequence_length=encoder_inputs_length ) if self.attention_type.lower() == 'luong': self.attention_mechanism = seq2seq.LuongAttention( num_units=self.hidden_units, memory=encoder_outputs, memory_sequence_length=encoder_inputs_length ) # decoder_cell self.decoder_cell_list = [self.build_single_cell(layer=2) for _ in range(self.depth)] def attn_decoder_input_fn(inputs, attention): if not self.attn_input_feeding: return inputs # Essential when use_residual=True _input_layer = Dense(self.hidden_units * 2, dtype=self.dtype, name='attn_input_feeding') return _input_layer(tf.concat([inputs, attention], -1)) # AttentionWrapper wraps RNNCell with the attention_mechanism # Note: We implement Attention mechanism only on the top decoder layer self.decoder_cell_list[-1] = seq2seq.AttentionWrapper( cell=self.decoder_cell_list[-1], attention_mechanism=self.attention_mechanism, attention_layer_size=self.hidden_units, cell_input_fn=attn_decoder_input_fn, initial_cell_state=encoder_last_state[-1], alignment_history=False, name='Attention_wrapper' ) # To be compatible with AttentionWrapper, the encoder last state # of the top layer should be converted into the AttentionWrapperState form # We can easily do this by calling AttentionWrapper.zero_state batch_size = self.batch_size initial_state = [state for state in encoder_last_state] initial_state[-1] = self.decoder_cell_list[-1].zero_state( batch_size=batch_size, dtype=self.dtype ) decoder_initial_state = tuple(initial_state) return rnn.MultiRNNCell(self.decoder_cell_list), decoder_initial_state
def create_attention_mechanism(attention_option, num_units, memory, source_sequence_length): """ Create attention mechanism based on the attention_option. :param attention_option: "luong","scaled_luong","bahdanau","normed_bahdanau" :param num_units: :param memory: The memory to query; usually the output of an RNN encoder. This tensor should be shaped `[batch_size, max_time, ...]`. :param source_sequence_length: (optional) Sequence lengths for the batch entries in memory. If provided, the memory tensor rows are masked with zeros for values past the respective sequence lengths. :return: """ # Mechanism if attention_option == "luong": attention_mechanism = seq2seq.LuongAttention( num_units, memory, memory_sequence_length=source_sequence_length) elif attention_option == "scaled_luong": attention_mechanism = seq2seq.LuongAttention( num_units, memory, memory_sequence_length=source_sequence_length, scale=True) elif attention_option == "bahdanau": attention_mechanism = seq2seq.BahdanauAttention( num_units, memory, memory_sequence_length=source_sequence_length) elif attention_option == "normed_bahdanau": attention_mechanism = seq2seq.BahdanauAttention( num_units, memory, memory_sequence_length=source_sequence_length, normalize=True) else: raise ValueError("Unknown attention option %s" % attention_option) return attention_mechanism
def build_attention_mechanism(self): if self.hparams.attention_type == 'luong': attention_mechanism = seq2seq.LuongAttention( self.hparams.hidden_units, self.feedforward_inputs, self.feedforward_inputs_length) elif self.hparams.attention_type == 'bahdanau': attention_mechanism = seq2seq.BahdanauAttention( self.hparams.hidden_units, self.feedforward_inputs, self.feedforward_inputs_length, ) else: raise ValueError( "Currently, the only supported attention types are 'luong' and 'bahdanau'." )
def _decoder_cell(self): batch_size, _ = tf.unstack(tf.shape(self._targets)) attention = seq2seq.BahdanauAttention( num_units=2 * self.CELL_SIZE, memory=self._targets_encoder_outputs, memory_sequence_length=self._targets_length) attentive_cell = seq2seq.AttentionWrapper( cell=rnn.GRUCell(2 * self.CELL_SIZE, activation=tf.nn.tanh), attention_mechanism=attention, attention_layer_size=2 * self.CELL_SIZE, initial_cell_state=self._targets_encoder_state) return ( attentive_cell, attentive_cell.zero_state(batch_size, tf.float32), )
def _decoder(self, keep_prob, encoder_output, encoder_state, batch_size, scope, helper, reuse=None): with tf.variable_scope(scope, reuse=reuse): attention_states = encoder_output cell = rnn.MultiRNNCell([self._cell(keep_prob) for _ in range(self.lstm_dims)]) attention_mechanism = seq2seq.BahdanauAttention(self.hidden_size, attention_states) # attention decoder_cell = seq2seq.AttentionWrapper(cell, attention_mechanism, attention_layer_size=self.hidden_size // 2) decoder_cell = rnn.OutputProjectionWrapper(decoder_cell, self.hidden_size, reuse=reuse, activation=tf.nn.leaky_relu) decoder_initial_state = decoder_cell.zero_state(batch_size, tf.float32).clone(cell_state=encoder_state) output_layer = tf.layers.Dense(self.num_words, kernel_initializer=tf.contrib.layers.xavier_initializer(), activation=tf.nn.leaky_relu) decoder = seq2seq.BasicDecoder(decoder_cell, helper, decoder_initial_state, output_layer=output_layer) output, _, _ = seq2seq.dynamic_decode(decoder, maximum_iterations=self.max_sentence_length, impute_finished=True) # tf.summary.histogram('decoder', output) return output
def decoding_layer(dec_embed_input, embeddings, enc_output, enc_state, vocab_size, text_length, summary_length, max_summary_length, rnn_size, vocab_to_int, keep_prob, batch_size, num_layers): for layer in range(num_layers): with tf.variable_scope('decoder_{}'.format(layer)): lstm = tf.nn.rnn_cell.LSTMCell( rnn_size, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2)) dec_cell = tf.nn.rnn_cell.DropoutWrapper(lstm, input_keep_prob=keep_prob) #全连接层 output_layer = Dense(vocab_size, kernel_initializer=tf.truncated_normal_initializer( mean=0.0, stddev=0.1)) attn_mech = seq.BahdanauAttention(rnn_size, enc_output, text_length, normalize=False, name='BahdanauAttention') dec_cell = seq.AttentionWrapper(cell=dec_cell, attention_mechanism=attn_mech, attention_layer_size=rnn_size) # 引入注意力机制 initial_state = seq.AttentionWrapperState( enc_state[0], _zero_state_tensors(rnn_size, batch_size, tf.float32)) with tf.variable_scope("decode"): training_logits = training_decoding_layer(dec_embed_input, summary_length, dec_cell, initial_state, output_layer, vocab_size, max_summary_length) with tf.variable_scope("decode", reuse=True): inference_logits = inference_decoding_layer( embeddings, vocab_to_int['<GO>'], vocab_to_int['<EOS>'], dec_cell, initial_state, output_layer, max_summary_length, batch_size) return training_logits, inference_logits
def decoding_layer_train(self, num_units, max_time, batch_size, char2numY, data_output_embed, encoder_output, last_state, bidirectional): if not bidirectional: decoder_cell = rnn.LSTMCell(num_units) else: decoder_cell = rnn.LSTMCell(2 * num_units) training_helper = seq2seq.TrainingHelper(inputs=data_output_embed, sequence_length=[max_time] * batch_size, time_major=False) attention_mechanism = seq2seq.BahdanauAttention( num_units=num_units, memory=encoder_output, memory_sequence_length=[max_time] * batch_size) attention_cell = seq2seq.AttentionWrapper( cell=decoder_cell, attention_mechanism=attention_mechanism, attention_layer_size=num_units) decoder_initial_state = attention_cell.zero_state( batch_size=batch_size, dtype=tf.float32).clone(cell_state=last_state) output_layer = tf.layers.Dense( len(char2numY) - 2, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1)) training_decoder = seq2seq.BasicDecoder( cell=attention_cell, helper=training_helper, initial_state=decoder_initial_state, output_layer=output_layer) train_outputs, _, _ = seq2seq.dynamic_decode( decoder=training_decoder, impute_finished=True, maximum_iterations=max_time) return train_outputs
def decoding_layer(self, input, encoder_output, encoder_state): for i in range(self.num_layers): with tf.variable_scope('decoder_{}'.format(i)): decoder_cell = rnn.LSTMCell( self.cell_size, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2)) decoder_cell = rnn.DropoutWrapper( decoder_cell, input_keep_prob=self.keep_prob) output_layer = Dense( self.vocab_length, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1)) attention_mech = seq2seq.BahdanauAttention(self.cell_size, encoder_output, self.in_length, normalize=False) decoder_cell = seq2seq.DynamicAttentionWrapper(decoder_cell, attention_mech, self.cell_size) zero_state = _zero_state_tensors(self.cell_size, self.batch_size, tf.float32) initial_state = seq2seq.DynamicAttentionWrapperState( encoder_state[0], zero_state) with tf.variable_scope("decode"): train_logits = self.train_decoding_layer(input, decoder_cell, initial_state, output_layer) with tf.variable_scope("decode", reuse=True): inference_logits = self.inference_decoding_layer( self.embeddings, decoder_cell, initial_state, output_layer) return train_logits, inference_logits
def build_model(self): encoder = self.encoder inputs = self.inputs with tf.variable_scope('encoder'): t_sequence = tf.unstack(inputs, axis=1, name='TimeMajorInputs') outputs, _, _ = tf.nn.static_bidirectional_rnn(cell_fw=encoder, cell_bw=encoder, inputs=t_sequence, dtype=inputs.dtype) with tf.variable_scope('decoder'): with tf.name_scope('attention'): memory = tf.stack(outputs, axis=1, name='BatchMajorAnnotations') self.bahdanau = seq2seq.BahdanauAttention(self.attention_size, memory=memory) raw_decoder = self.decoder decoder_cell = seq2seq.AttentionWrapper(raw_decoder, self.bahdanau, output_attention=False) self.decoder_cell = decoder_cell
def getDecoderCell(self, encoder_outputs, encoder_final_states): basic_cells = [self.get_basicLSTMCell() for i in range(layer_num)] basic_cell = tf.nn.rnn_cell.MultiRNNCell(basic_cells) initial_state = encoder_final_states #attention attention_mechanism = seq2seq.BahdanauAttention( num_units=num_units, memory=encoder_outputs, memory_sequence_length=self.enc_len) att_cell = seq2seq.AttentionWrapper( basic_cell, attention_mechanism=attention_mechanism, attention_layer_size=num_units, alignment_history=False, cell_input_fn=None, initial_cell_state=initial_state) initial_state = att_cell.zero_state(batch_size=tf.shape( self.enc_in)[0], dtype=tf.float32) # att_state.clone(cell_state=encoder_final_state) return att_cell, initial_state
def decoding_layer(self, rnn_inputs, encoder_output, encoder_state): decoder_cell = build_multicell(self.uni_layers, self.cell_size, self.keep_prob) attention_mech = seq2seq.BahdanauAttention(self.cell_size, encoder_output, self.in_length) attention_cell = seq2seq.AttentionWrapper(decoder_cell, attention_mech, self.cell_size / 2) decoder_cell = rnn.OutputProjectionWrapper(attention_cell, self.vocab_length) initial_state = decoder_cell.zero_state(self.batch_size, tf.float32) initial_state.clone(cell_state=encoder_state) with tf.variable_scope("decode"): train_logits = self.train_decoding_layer(rnn_inputs, decoder_cell, initial_state) with tf.variable_scope("decode", reuse=True): inference_logits = self.inference_decoding_layer( self.embeddings, decoder_cell, initial_state) return train_logits, inference_logits
def _decoder_cell(self): batch_size, _ = tf.unstack(tf.shape(self._context)) attention = seq2seq.BahdanauAttention( num_units=2 * self._hidden_size, memory=self._inputs_encoder_outputs, memory_sequence_length=self.inputs_length) attentive_cell = seq2seq.AttentionWrapper( cell=self._rnn_cell(self.context_state_size, activation=tf.nn.tanh), attention_mechanism=attention, attention_layer_size=2 * self._hidden_size, initial_cell_state=self._context) cell = rnn.MultiRNNCell([ attentive_cell, self._rnn_cell(self.context_state_size, activation=tf.nn.tanh), ]) initial_state = tuple( [attentive_cell.zero_state(batch_size, tf.float32), self._context]) return cell, initial_state