def get(attention_type,
        num_units,
        memory,
        memory_sequence_length,
        scope=None,
        reuse=None):
    """Returns attention mechanism according to the specified type."""
    with tf.variable_scope(scope, reuse=reuse):
        if attention_type == U.ATT_LUONG:
            attention_mechanism = contrib_seq2seq.LuongAttention(
                num_units=num_units,
                memory=memory,
                memory_sequence_length=memory_sequence_length)
        elif attention_type == U.ATT_LUONG_SCALED:
            attention_mechanism = contrib_seq2seq.LuongAttention(
                num_units=num_units,
                memory=memory,
                memory_sequence_length=memory_sequence_length,
                scale=True)
        elif attention_type == U.ATT_BAHDANAU:
            attention_mechanism = contrib_seq2seq.BahdanauAttention(
                num_units=num_units,
                memory=memory,
                memory_sequence_length=memory_sequence_length)
        elif attention_type == U.ATT_BAHDANAU_NORM:
            attention_mechanism = contrib_seq2seq.BahdanauAttention(
                num_units=num_units,
                memory=memory,
                memory_sequence_length=memory_sequence_length,
                normalize=True)
        else:
            raise ValueError("Unknown attention type: %s" % attention_type)
    return attention_mechanism
Beispiel #2
0
def create_decoder_cell(agenda,
                        base_sent_embeds,
                        mev_st,
                        mev_ts,
                        base_length,
                        iw_length,
                        dw_length,
                        attn_dim,
                        hidden_dim,
                        num_layer,
                        enable_alignment_history=False,
                        enable_dropout=False,
                        dropout_keep=0.1,
                        no_insert_delete_attn=False):
    base_attn = seq2seq.BahdanauAttention(attn_dim,
                                          base_sent_embeds,
                                          base_length,
                                          name='src_attn')

    cnx_src, micro_evs_st = mev_st
    mev_st_attn = seq2seq.BahdanauAttention(attn_dim,
                                            cnx_src,
                                            iw_length,
                                            name='mev_st_attn')
    mev_st_attn._values = micro_evs_st

    attns = [base_attn, mev_st_attn]

    if not no_insert_delete_attn:
        cnx_tgt, micro_evs_ts = mev_ts
        mev_ts_attn = seq2seq.BahdanauAttention(attn_dim,
                                                cnx_tgt,
                                                dw_length,
                                                name='mev_ts_attn')
        mev_ts_attn._values = micro_evs_ts

        attns += [mev_ts_attn]

    bottom_cell = tf_rnn.LSTMCell(hidden_dim, name='bottom_cell')
    bottom_attn_cell = seq2seq.AttentionWrapper(
        bottom_cell,
        tuple(attns),
        output_attention=False,
        alignment_history=enable_alignment_history,
        name='att_bottom_cell')

    all_cells = [bottom_attn_cell]

    num_layer -= 1
    for i in range(num_layer):
        cell = tf_rnn.LSTMCell(hidden_dim, name='layer_%s' % (i + 1))
        if enable_dropout and dropout_keep < 1.:
            cell = tf_rnn.DropoutWrapper(cell, output_keep_prob=dropout_keep)

        all_cells.append(cell)

    decoder_cell = AttentionAugmentRNNCell(all_cells)
    decoder_cell.set_agenda(agenda)

    return decoder_cell
Beispiel #3
0
    def _create_attention_mechanism(self, attention_type, num_units, memory,
                                    memory_sequence_length):

        if attention_type == 'bahdanau':
            attention_mechanism = seq2seq.BahdanauAttention(
                num_units=num_units,
                memory=memory,
                memory_sequence_length=memory_sequence_length,
                normalize=False)
            self._output_attention = False
        elif attention_type == 'normed_bahdanau':
            attention_mechanism = seq2seq.BahdanauAttention(
                num_units=num_units,
                memory=memory,
                memory_sequence_length=memory_sequence_length,
                normalize=True)
            self._output_attention = False
        elif attention_type == 'normed_monotonic_bahdanau':
            attention_mechanism = seq2seq.BahdanauMonotonicAttention(
                num_units=num_units,
                memory=memory,
                memory_sequence_length=memory_sequence_length,
                normalize=True,
                score_bias_init=-2.0,
                sigmoid_noise=1.0 if self._mode == 'train' else 0.0,
                mode='hard' if self._mode != 'train' else 'parallel')
            self._output_attention = False
        elif attention_type == 'luong':
            attention_mechanism = seq2seq.LuongAttention(
                num_units=num_units,
                memory=memory,
                memory_sequence_length=memory_sequence_length)
            self._output_attention = True
        elif attention_type == 'scaled_luong':
            attention_mechanism = seq2seq.LuongAttention(
                num_units=num_units,
                memory=memory,
                memory_sequence_length=memory_sequence_length,
                scale=True,
            )
            self._output_attention = True
        elif attention_type == 'scaled_monotonic_luong':
            attention_mechanism = seq2seq.LuongMonotonicAttention(
                num_units=num_units,
                memory=memory,
                memory_sequence_length=memory_sequence_length,
                scale=True,
                score_bias_init=-2.0,
                sigmoid_noise=1.0 if self._mode == 'train' else 0.0,
                mode='hard' if self._mode != 'train' else 'parallel')
            self._output_attention = True
        else:
            raise Exception('unknown attention mechanism')

        return attention_mechanism
Beispiel #4
0
    def getBeamSearchDecoderCell(self, encoder_outputs, encoder_final_states):
        basic_cells = [self.get_basicLSTMCell() for i in range(layer_num)]
        basic_cell = tf.nn.rnn_cell.MultiRNNCell(basic_cells)
        tiled_encoder_outputs = seq2seq.tile_batch(encoder_outputs,
                                                   multiplier=beam_size)
        tiled_encoder_final_states = [
            seq2seq.tile_batch(state, multiplier=beam_size)
            for state in encoder_final_states
        ]
        tiled_sequence_length = seq2seq.tile_batch(self.enc_len,
                                                   multiplier=beam_size)
        initial_state = tuple(tiled_encoder_final_states)
        #attention
        attention_mechanism = seq2seq.BahdanauAttention(
            num_units=num_units,
            memory=tiled_encoder_outputs,
            memory_sequence_length=tiled_sequence_length)
        att_cell = seq2seq.AttentionWrapper(
            basic_cell,
            attention_mechanism=attention_mechanism,
            attention_layer_size=num_units,
            alignment_history=False,
            cell_input_fn=None,
            initial_cell_state=initial_state)

        initial_state = att_cell.zero_state(
            batch_size=tf.shape(self.enc_in)[0] * beam_size, dtype=tf.float32)
        #            att_state.clone(cell_state=encoder_final_state)

        return att_cell, initial_state
Beispiel #5
0
 def _build_attention(self,
                      enc_outputs,
                      enc_seq_len
                      ):
   with tf.variable_scope("AttentionMechanism"):
     if self.attn_Type == 'bahdanau':
       attention_mechanism = seq2seq.BahdanauAttention(
           num_units=2*self.cell_dim,
           memory=enc_outputs,
           memory_sequence_length=enc_seq_len,
           probability_fn=tf.nn.softmax,
           normalize=True,
           dtype=tf.get_variable_scope().dtype
       )
     elif self.params['attention_type'] == 'luong':
       attention_mechanism = seq2seq.LuongAttention(
           num_units=2*self.cell_dim,
           memory=enc_outputs,
           memory_sequence_length=enc_seq_len,
           probability_fn=tf.nn.softmax,
           dtype=tf.get_variable_scope().dtype
       )
     else:
       raise ValueError('Unknown Attention Type')
     return attention_mechanism
Beispiel #6
0
    def _build_encoder(self, input_sequence, keep_prob):
        """Define encoder architecture.
        """
        # connect each layer sequentially, building a graph that resembles a
        # feed-forward network made of recurrent units
        encoder_cell = self._multi_cell(num_units=self.num_units,
                                        num_layers=self.num_layers,
                                        keep_prob=keep_prob)

        # the model is using fixed lengths of input sequences so tile the defined
        # length in the batch dimension
        sequence_lengths = tf.tile([self.input_length],
                                   [tf.shape(input_sequence)[0]])

        # build the unrolled graph of the recurrent neural network
        encoder_outputs, encoder_states = tf.nn.dynamic_rnn(
            cell=encoder_cell,
            inputs=input_sequence,
            sequence_length=sequence_lengths,
            dtype=tf.float32)

        # attention provides a direct connection between the encoder and decoder
        # so that long-range connections are not limited by the fixed size of the
        # thought vector
        attention_layer_size = self.num_units
        attention_mechanism = seq2seq.BahdanauAttention(
            num_units=attention_layer_size,
            memory=encoder_outputs,
            memory_sequence_length=sequence_lengths,
            normalize=True)

        return (encoder_outputs, encoder_states, attention_mechanism)
Beispiel #7
0
    def train_decode_layer(self, dec_embeddig_input, dec_cell, output_layer):
        atten_mech = seq2seq.BahdanauAttention(
            num_units=self.hidden_dim * 2,
            memory=self.enc_output,
            memory_sequence_length=self.target_len,
            normalize=True,
            name='BahadanauAttention')
        dec_cell = seq2seq.AttentionWrapper(dec_cell,
                                            atten_mech,
                                            self.hidden_dim * 2,
                                            name='dec_attention_cell')

        initial_state = dec_cell.zero_state(
            batch_size=self.batch_size,
            dtype=tf.float32).clone(cell_state=self.enc_state)

        train_helper = seq2seq.TrainingHelper(dec_embeddig_input,
                                              self.target_len)
        training_decoder = seq2seq.BasicDecoder(dec_cell,
                                                train_helper,
                                                initial_state=initial_state,
                                                output_layer=output_layer)
        train_logits, _, _ = seq2seq.dynamic_decode(
            training_decoder,
            output_time_major=False,
            impute_finished=False,
            maximum_iterations=self.max_target_len)
        return train_logits
Beispiel #8
0
    def decoding_layer(self,dec_embed_input,embeddings,enc_output,enc_state,
                       vocab_size,text_len,summary_len,max_sum_len):

        lstm = rnn.LSTMCell(self.hidden_dim * 2,initializer=tf.random_normal_initializer(-0.1,0.1,seed=2))

        dec_cell = rnn.DropoutWrapper(lstm,input_keep_prob=self.keep_prob,)

        output_layer = tf.layers.Dense(vocab_size,kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))

        attn_mech = seq2seq.BahdanauAttention(self.hidden_dim * 2,
                                              enc_output,
                                              text_len,
                                              normalize=False,name='BahdanauAttention')

        dec_cell = seq2seq.AttentionWrapper(dec_cell,attn_mech,attention_layer_size=self.hidden_dim * 2)

        # initial_state = seq2seq.AttentionWrapperState(enc_state[0],_zero_state_tensors(self.hidden_dim,batch_size,
        #                                                                                tf.float32))
        initial_state = dec_cell.zero_state(self.batch_size,tf.float32).clone(cell_state=LSTMStateTuple(*enc_state))

        with tf.variable_scope('decode'):
            traing_logits = self.training_decoding_layer(dec_embed_input,summary_len,dec_cell,initial_state,
                                                         output_layer,max_sum_len)

        with tf.variable_scope('decode',reuse=True):
            inference_logits = self.inference_decoding_layer(embeddings,self.vocab_to_int['<GO>'],
                                                             self.vocab_to_int['<EOS>'],dec_cell,
                                                             initial_state,output_layer,max_sum_len)

        return traing_logits, inference_logits
Beispiel #9
0
 def inference_decode_layer(self, start_token, dec_cell, end_token,
                            output_layer):
     start_tokens = tf.tile(tf.constant([start_token], dtype=tf.int32),
                            [self.batch_size],
                            name='start_token')
     tiled_enc_output = seq2seq.tile_batch(self.enc_output,
                                           multiplier=self.Beam_width)
     tiled_enc_state = seq2seq.tile_batch(self.enc_state,
                                          multiplier=self.Beam_width)
     tiled_source_len = seq2seq.tile_batch(self.source_len,
                                           multiplier=self.Beam_width)
     atten_mech = seq2seq.BahdanauAttention(self.hidden_dim * 2,
                                            tiled_enc_output,
                                            tiled_source_len,
                                            normalize=True)
     decoder_att = seq2seq.AttentionWrapper(dec_cell, atten_mech,
                                            self.hidden_dim * 2)
     initial_state = decoder_att.zero_state(
         self.batch_size * self.Beam_width,
         tf.float32).clone(cell_state=tiled_enc_state)
     decoder = seq2seq.BeamSearchDecoder(decoder_att,
                                         self.embeddings,
                                         start_tokens,
                                         end_token,
                                         initial_state,
                                         beam_width=self.Beam_width,
                                         output_layer=output_layer)
     infer_logits, _, _ = seq2seq.dynamic_decode(decoder, False, False,
                                                 self.max_target_len)
     return infer_logits
Beispiel #10
0
 def _attn_cell(self, rnn_enc_tensor):
     cell = new_multi_rnn_cell(self.hsz, self.rnntype, self.nlayers)
     if self.attn:
         attn_mech = tfcontrib_seq2seq.BahdanauAttention(self.hsz, rnn_enc_tensor, self.src_len)
         #attn_mech = tfcontrib_seq2seq.LuongAttention(self.hsz, rnn_enc_tensor, self.src_len)
         cell = tf.contrib.seq2seq.AttentionWrapper(cell, attn_mech, self.hsz, name='dyn_attn_cell')
     return cell
Beispiel #11
0
 def _build_single_attention_mechanism(memory):
     if not self._is_training:
         memory = seq2seq.tile_batch(memory,
                                     multiplier=self._beam_width)
     return seq2seq.BahdanauAttention(self._num_attention_units,
                                      memory,
                                      memory_sequence_length=None)
Beispiel #12
0
    def _build_decoder(self):
        """ Decode keyword and context into a sequence of vectors. """
        self.sequence_decoder = tf.placeholder(
            dtype=tf.float32,
            shape=[_BATCH_SIZE, None, CHAR_VEC_DIM],
            name='context')
        self.length_decoder = tf.placeholder(dtype=tf.int32,
                                             shape=[_BATCH_SIZE],
                                             name='length_keywords')
        attention = seq2seq.BahdanauAttention(
            _NUM_UNITS,
            memory=self.encoder_outputs,
            memory_sequence_length=self.context_length,
            name="BahdanauAttention")
        cell_attention = tf.contrib.rnn.GRUCell(_NUM_UNITS)
        attention_wrapper = seq2seq.AttentionWrapper(cell_attention, attention)

        self.initial_decode_state = attention_wrapper.zero_state(
            _BATCH_SIZE,
            dtype=tf.float32).clone(cell_state=self.states_keywords)

        self.decoder_outputs, self.decoder_final_state = tf.nn.dynamic_rnn(
            attention_wrapper,
            self.sequence_decoder,
            sequence_length=self.length_decoder,
            initial_state=self.initial_decode_state,
            dtype=tf.float32,
            time_major=False)
Beispiel #13
0
    def decode(self, dec_cell, enc_outputs, ctx_outputs):
        with tf.variable_scope("decode"):
            batch_size = self._batch_size

            attn_mech = seq2seq.BahdanauAttention(self._memory_size,
                                                  enc_outputs,
                                                  self.input_lengths)
            dec_cell = CondWrapper(dec_cell, ctx_outputs)
            dec_cell = seq2seq.AttentionWrapper(dec_cell, attn_mech,
                                                self._memory_size)
            dec_initial_state = dec_cell.zero_state(batch_size=batch_size,
                                                    dtype=tf.float32)
            helper_build_fn = self._infer_helper if self._infer else self._train_helper

            output_layer = layers_core.Dense(self._vocab_size,
                                             use_bias=True,
                                             activation=None)
            decoder = seq2seq.BasicDecoder(cell=dec_cell,
                                           helper=helper_build_fn(),
                                           initial_state=dec_initial_state,
                                           output_layer=output_layer)
            dec_output, dec_state = seq2seq.dynamic_decode(
                decoder,
                impute_finished=True,
                maximum_iterations=self._max_seq_length)
            rnn_output = dec_output.rnn_output
            sample_id = dec_output.sample_id
        return rnn_output, sample_id, dec_state
Beispiel #14
0
    def _build_model(self,
                     batch_size,
                     helper_build_fn,
                     decoder_maxiters=None,
                     alignment_history=False):
        # embed input_data into a one-hot representation
        inputs = tf.one_hot(self.input_data,
                            self._input_size,
                            dtype=self._dtype)
        inputs_len = self.input_lengths

        with tf.name_scope('bidir-encoder'):
            fw_cell = rnn.MultiRNNCell(
                [rnn.BasicRNNCell(self._enc_rnn_size) for i in range(3)],
                state_is_tuple=True)
            bw_cell = rnn.MultiRNNCell(
                [rnn.BasicRNNCell(self._enc_rnn_size) for i in range(3)],
                state_is_tuple=True)
            fw_cell_zero = fw_cell.zero_state(batch_size, self._dtype)
            bw_cell_zero = bw_cell.zero_state(batch_size, self._dtype)

            enc_out, _ = tf.nn.bidirectional_dynamic_rnn(
                fw_cell,
                bw_cell,
                inputs,
                sequence_length=inputs_len,
                initial_state_fw=fw_cell_zero,
                initial_state_bw=bw_cell_zero)

        with tf.name_scope('attn-decoder'):
            dec_cell_in = rnn.GRUCell(self._dec_rnn_size)
            attn_values = tf.concat(enc_out, 2)
            attn_mech = seq2seq.BahdanauAttention(self._enc_rnn_size * 2,
                                                  attn_values, inputs_len)
            dec_cell_attn = rnn.GRUCell(self._enc_rnn_size * 2)
            dec_cell_attn = seq2seq.AttentionWrapper(
                dec_cell_attn,
                attn_mech,
                self._enc_rnn_size * 2,
                alignment_history=alignment_history)
            dec_cell_out = rnn.GRUCell(self._output_size)
            dec_cell = rnn.MultiRNNCell(
                [dec_cell_in, dec_cell_attn, dec_cell_out],
                state_is_tuple=True)

            dec = seq2seq.BasicDecoder(
                dec_cell, helper_build_fn(),
                dec_cell.zero_state(batch_size, self._dtype))

            dec_out, dec_state = seq2seq.dynamic_decode(
                dec,
                output_time_major=False,
                maximum_iterations=decoder_maxiters,
                impute_finished=True)

        self.outputs = dec_out.rnn_output
        self.output_ids = dec_out.sample_id
        self.final_state = dec_state
    def build_decoder_cell(self):

        if self.use_beamsearch_decode:
            encoder_outputs = tf.contrib.seq2seq.tile_batch(
                self.encoder_outputs, multiplier=self.beam_width)
            encoder_last_state = tf.contrib.seq2seq.tile_batch(
                self.encoder_last_state, multiplier=self.beam_width)
            encoder_inputs_length = tf.contrib.seq2seq.tile_batch(
                self.encoder_inputs_length, multiplier=self.beam_width)
        else:
            encoder_outputs = self.encoder_outputs
            encoder_last_state = self.encoder_last_state
            encoder_inputs_length = self.encoder_inputs_length

        self.attention_mechanism = seq2seq.BahdanauAttention(
            num_units=self.decoder_hidden_units,
            memory=encoder_outputs,
            memory_sequence_length=encoder_inputs_length)

        self.decoder_cell_list = [
            self.build_single_cell(self.decoder_hidden_units)
            for _ in range(self.depth)
        ]

        # NOTE(sdsuo): Not sure what this does yet
        def attn_decoder_input_fn(inputs, attention):
            if not self.attn_input_feeding:
                return inputs

            # Essential when use_residual=True
            _input_layer = Dense(self.decoder_hidden_units,
                                 dtype=self.dtype,
                                 name='attn_input_feeding')
            return _input_layer(rnn.array_ops.concat([inputs, attention], -1))

        # Attention mechanism is implemented only on all decoder layers
        self.decoder_cell_list = seq2seq.AttentionWrapper(
            cell=rnn.MultiRNNCell(self.decoder_cell_list),
            attention_mechanism=self.attention_mechanism,
            attention_layer_size=self.decoder_hidden_units,
            cell_input_fn=attn_decoder_input_fn,
            initial_cell_state=encoder_last_state,
            alignment_history=False,
            name='attention_wrapper')

        if self.use_beamsearch_decode:
            batch_size = self.batch_size * self.beam_width
        else:
            batch_size = self.batch_size

        # add by Meng
        decoder_initial_state = self.decoder_cell_list.zero_state(
            batch_size=batch_size,
            dtype=self.dtype).clone(cell_state=encoder_last_state)

        return self.decoder_cell_list, decoder_initial_state
Beispiel #16
0
    def build_decoder_cell(self):
        # TODO(sdsuo): Read up and decide whether to use beam search
        self.attention_mechanism = seq2seq.BahdanauAttention(
            num_units=self.decoder_hidden_units,
            memory=self.encoder_outputs,
            memory_sequence_length=self.encoder_inputs_length
        )

        self.decoder_cell_list = [
            self.build_single_cell(self.decoder_hidden_units) for _ in range(self.depth)
        ]

        # NOTE(sdsuo): Not sure what this does yet
        def attn_decoder_input_fn(inputs, attention):
            if not self.attn_input_feeding:
                return inputs

            # Essential when use_residual=True
            _input_layer = Dense(self.decoder_hidden_units, dtype=self.dtype,
                                 name='attn_input_feeding')
            return _input_layer(rnn.array_ops.concat([inputs, attention], -1))

        # NOTE(sdsuo): Attention mechanism is implemented only on the top decoder layer
        self.decoder_cell_list[-1] = seq2seq.AttentionWrapper(
            cell=self.decoder_cell_list[-1],
            attention_mechanism=self.attention_mechanism,
            attention_layer_size=self.decoder_hidden_units,
            cell_input_fn=attn_decoder_input_fn,
            initial_cell_state=self.encoder_last_state[-1],
            alignment_history=False,
            name='attention_wrapper'
        )

        # NOTE(sdsuo): Not sure why this is necessary
        # To be compatible with AttentionWrapper, the encoder last state
        # of the top layer should be converted into the AttentionWrapperState form
        # We can easily do this by calling AttentionWrapper.zero_state

        # Also if beamsearch decoding is used, the batch_size argument in .zero_state
        # should be ${decoder_beam_width} times to the origianl batch_size
        if self.use_beamsearch_decode:
            batch_size = self.batch_size * self.beam_width
        else:
            batch_size = self.batch_size
        # NOTE(vera): important dimension here
        # embed()
        initial_state = [state for state in self.encoder_last_state]
        initial_state[-1] = self.decoder_cell_list[-1].zero_state(
            batch_size=batch_size,
            dtype=self.dtype
        )
        decoder_initial_state = tuple(initial_state)


        return rnn.MultiRNNCell(self.decoder_cell_list), decoder_initial_state
Beispiel #17
0
 def _attn_cell_w_dropout(self, rnn_enc_tensor, beam):
     cell = multi_rnn_cell_w_dropout(self.hsz, self.pkeep, self.rnntype, self.nlayers)
     if self.attn:
         src_len = self.src_len
         if beam > 1:
             # Expand the encoded tensor for all beam entries
             rnn_enc_tensor = tf.contrib.seq2seq.tile_batch(rnn_enc_tensor, multiplier=beam)
             src_len = tf.contrib.seq2seq.tile_batch(src_len, multiplier=beam)
         attn_mech = tfcontrib_seq2seq.BahdanauAttention(self.hsz, rnn_enc_tensor, src_len)
         #attn_mech = tfcontrib_seq2seq.LuongAttention(self.hsz, rnn_enc_tensor, src_len)
         cell = tf.contrib.seq2seq.AttentionWrapper(cell, attn_mech, self.hsz, name='dyn_attn_cell')
     return cell
Beispiel #18
0
def create_decoder_cell(agenda, base_sent_embeds, insert_word_embeds, delete_word_embeds,
                        base_length, iw_length, dw_length,
                        attn_dim, hidden_dim, num_layer,
                        enable_alignment_history=False, enable_dropout=False, dropout_keep=0.1,
                        no_insert_delete_attn=False):
    base_attn = seq2seq.BahdanauAttention(attn_dim, base_sent_embeds, base_length, name='src_attn')
    attns = [base_attn]
    if not no_insert_delete_attn:
        insert_attn = seq2seq.BahdanauAttention(attn_dim, insert_word_embeds, iw_length, name='insert_attn')
        delete_attn = seq2seq.BahdanauAttention(attn_dim, delete_word_embeds, dw_length, name='delete_attn')
        attns += [insert_attn, delete_attn]

    if no_insert_delete_attn:
        assert len(attns) == 1
    else:
        assert len(attns) == 3

    bottom_cell = tf_rnn.LSTMCell(hidden_dim, name='bottom_cell')
    bottom_attn_cell = seq2seq.AttentionWrapper(
        bottom_cell,
        tuple(attns),
        output_attention=False,
        alignment_history=enable_alignment_history,
        name='att_bottom_cell'
    )

    all_cells = [bottom_attn_cell]

    num_layer -= 1
    for i in range(num_layer):
        cell = tf_rnn.LSTMCell(hidden_dim, name='layer_%s' % (i + 1))
        if enable_dropout and dropout_keep < 1.:
            cell = tf_rnn.DropoutWrapper(cell, output_keep_prob=dropout_keep)

        all_cells.append(cell)

    decoder_cell = AttentionAugmentRNNCell(all_cells)
    decoder_cell.set_agenda(agenda)

    return decoder_cell
Beispiel #19
0
    def build_decode_cell(self):
        encoder_outputs = self.encoder_outputs
        encoder_last_state = self.encoder_last_state
        encoder_inputs_length = self.encoder_inputs_length
        # Building attention mechanism: Default Bahdanau
        # 'Bahdanau' style attention: https://arxiv.org/abs/1409.0473
        self.attention_mechanism = seq2seq.BahdanauAttention(
            num_units=self.hidden_units,
            memory=encoder_outputs,
            memory_sequence_length=encoder_inputs_length
        )

        if self.attention_type.lower() == 'luong':
            self.attention_mechanism = seq2seq.LuongAttention(
                num_units=self.hidden_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_inputs_length
            )

        # decoder_cell
        self.decoder_cell_list = [self.build_single_cell(layer=2) for _ in range(self.depth)]

        def attn_decoder_input_fn(inputs, attention):
            if not self.attn_input_feeding:
                return inputs
            # Essential when use_residual=True
            _input_layer = Dense(self.hidden_units * 2, dtype=self.dtype, name='attn_input_feeding')
            return _input_layer(tf.concat([inputs, attention], -1))

        # AttentionWrapper wraps RNNCell with the attention_mechanism
        # Note: We implement Attention mechanism only on the top decoder layer
        self.decoder_cell_list[-1] = seq2seq.AttentionWrapper(
            cell=self.decoder_cell_list[-1],
            attention_mechanism=self.attention_mechanism,
            attention_layer_size=self.hidden_units,
            cell_input_fn=attn_decoder_input_fn,
            initial_cell_state=encoder_last_state[-1],
            alignment_history=False,
            name='Attention_wrapper'
        )

        # To be compatible with AttentionWrapper, the encoder last state
        # of the top layer should be converted into the AttentionWrapperState form
        # We can easily do this by calling AttentionWrapper.zero_state

        batch_size = self.batch_size
        initial_state = [state for state in encoder_last_state]
        initial_state[-1] = self.decoder_cell_list[-1].zero_state(
            batch_size=batch_size, dtype=self.dtype
        )
        decoder_initial_state = tuple(initial_state)
        return rnn.MultiRNNCell(self.decoder_cell_list), decoder_initial_state
def create_attention_mechanism(attention_option, num_units, memory,
                               source_sequence_length):
    """
    Create attention mechanism based on the attention_option.
    :param attention_option: "luong","scaled_luong","bahdanau","normed_bahdanau"
    :param num_units:
    :param memory: The memory to query; usually the output of an RNN encoder.  This
        tensor should be shaped `[batch_size, max_time, ...]`.
    :param source_sequence_length: (optional) Sequence lengths for the batch entries
        in memory.  If provided, the memory tensor rows are masked with zeros
        for values past the respective sequence lengths.
    :return:
    """
    # Mechanism
    if attention_option == "luong":
        attention_mechanism = seq2seq.LuongAttention(
            num_units, memory, memory_sequence_length=source_sequence_length)
    elif attention_option == "scaled_luong":
        attention_mechanism = seq2seq.LuongAttention(
            num_units,
            memory,
            memory_sequence_length=source_sequence_length,
            scale=True)
    elif attention_option == "bahdanau":
        attention_mechanism = seq2seq.BahdanauAttention(
            num_units, memory, memory_sequence_length=source_sequence_length)
    elif attention_option == "normed_bahdanau":
        attention_mechanism = seq2seq.BahdanauAttention(
            num_units,
            memory,
            memory_sequence_length=source_sequence_length,
            normalize=True)
    else:
        raise ValueError("Unknown attention option %s" % attention_option)

    return attention_mechanism
 def build_attention_mechanism(self):
     if self.hparams.attention_type == 'luong':
         attention_mechanism = seq2seq.LuongAttention(
             self.hparams.hidden_units, self.feedforward_inputs,
             self.feedforward_inputs_length)
     elif self.hparams.attention_type == 'bahdanau':
         attention_mechanism = seq2seq.BahdanauAttention(
             self.hparams.hidden_units,
             self.feedforward_inputs,
             self.feedforward_inputs_length,
         )
     else:
         raise ValueError(
             "Currently, the only supported attention types are 'luong' and 'bahdanau'."
         )
Beispiel #22
0
    def _decoder_cell(self):
        batch_size, _ = tf.unstack(tf.shape(self._targets))

        attention = seq2seq.BahdanauAttention(
            num_units=2 * self.CELL_SIZE,
            memory=self._targets_encoder_outputs,
            memory_sequence_length=self._targets_length)

        attentive_cell = seq2seq.AttentionWrapper(
            cell=rnn.GRUCell(2 * self.CELL_SIZE, activation=tf.nn.tanh),
            attention_mechanism=attention,
            attention_layer_size=2 * self.CELL_SIZE,
            initial_cell_state=self._targets_encoder_state)

        return (
            attentive_cell,
            attentive_cell.zero_state(batch_size, tf.float32),
        )
Beispiel #23
0
    def _decoder(self, keep_prob, encoder_output, encoder_state, batch_size, scope, helper, reuse=None):
        with tf.variable_scope(scope, reuse=reuse):
            attention_states = encoder_output
            cell = rnn.MultiRNNCell([self._cell(keep_prob) for _ in range(self.lstm_dims)])
            attention_mechanism = seq2seq.BahdanauAttention(self.hidden_size, attention_states)  # attention
            decoder_cell = seq2seq.AttentionWrapper(cell, attention_mechanism,
                                                    attention_layer_size=self.hidden_size // 2)
            decoder_cell = rnn.OutputProjectionWrapper(decoder_cell, self.hidden_size, reuse=reuse,
                                                       activation=tf.nn.leaky_relu)
            decoder_initial_state = decoder_cell.zero_state(batch_size, tf.float32).clone(cell_state=encoder_state)

            output_layer = tf.layers.Dense(self.num_words,
                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                           activation=tf.nn.leaky_relu)
            decoder = seq2seq.BasicDecoder(decoder_cell, helper, decoder_initial_state, output_layer=output_layer)
            output, _, _ = seq2seq.dynamic_decode(decoder, maximum_iterations=self.max_sentence_length,
                                                  impute_finished=True)

            # tf.summary.histogram('decoder', output)
        return output
Beispiel #24
0
def decoding_layer(dec_embed_input, embeddings, enc_output, enc_state,
                   vocab_size, text_length, summary_length, max_summary_length,
                   rnn_size, vocab_to_int, keep_prob, batch_size, num_layers):

    for layer in range(num_layers):
        with tf.variable_scope('decoder_{}'.format(layer)):
            lstm = tf.nn.rnn_cell.LSTMCell(
                rnn_size,
                initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2))
            dec_cell = tf.nn.rnn_cell.DropoutWrapper(lstm,
                                                     input_keep_prob=keep_prob)
    #全连接层
    output_layer = Dense(vocab_size,
                         kernel_initializer=tf.truncated_normal_initializer(
                             mean=0.0, stddev=0.1))

    attn_mech = seq.BahdanauAttention(rnn_size,
                                      enc_output,
                                      text_length,
                                      normalize=False,
                                      name='BahdanauAttention')

    dec_cell = seq.AttentionWrapper(cell=dec_cell,
                                    attention_mechanism=attn_mech,
                                    attention_layer_size=rnn_size)

    # 引入注意力机制
    initial_state = seq.AttentionWrapperState(
        enc_state[0], _zero_state_tensors(rnn_size, batch_size, tf.float32))

    with tf.variable_scope("decode"):
        training_logits = training_decoding_layer(dec_embed_input,
                                                  summary_length, dec_cell,
                                                  initial_state, output_layer,
                                                  vocab_size,
                                                  max_summary_length)
    with tf.variable_scope("decode", reuse=True):
        inference_logits = inference_decoding_layer(
            embeddings, vocab_to_int['<GO>'], vocab_to_int['<EOS>'], dec_cell,
            initial_state, output_layer, max_summary_length, batch_size)
    return training_logits, inference_logits
    def decoding_layer_train(self, num_units, max_time, batch_size, char2numY,
                             data_output_embed, encoder_output, last_state,
                             bidirectional):
        if not bidirectional:
            decoder_cell = rnn.LSTMCell(num_units)
        else:
            decoder_cell = rnn.LSTMCell(2 * num_units)
        training_helper = seq2seq.TrainingHelper(inputs=data_output_embed,
                                                 sequence_length=[max_time] *
                                                 batch_size,
                                                 time_major=False)

        attention_mechanism = seq2seq.BahdanauAttention(
            num_units=num_units,
            memory=encoder_output,
            memory_sequence_length=[max_time] * batch_size)

        attention_cell = seq2seq.AttentionWrapper(
            cell=decoder_cell,
            attention_mechanism=attention_mechanism,
            attention_layer_size=num_units)

        decoder_initial_state = attention_cell.zero_state(
            batch_size=batch_size,
            dtype=tf.float32).clone(cell_state=last_state)
        output_layer = tf.layers.Dense(
            len(char2numY) - 2,
            kernel_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                               stddev=0.1))
        training_decoder = seq2seq.BasicDecoder(
            cell=attention_cell,
            helper=training_helper,
            initial_state=decoder_initial_state,
            output_layer=output_layer)

        train_outputs, _, _ = seq2seq.dynamic_decode(
            decoder=training_decoder,
            impute_finished=True,
            maximum_iterations=max_time)

        return train_outputs
    def decoding_layer(self, input, encoder_output, encoder_state):
        for i in range(self.num_layers):
            with tf.variable_scope('decoder_{}'.format(i)):
                decoder_cell = rnn.LSTMCell(
                    self.cell_size,
                    initializer=tf.random_uniform_initializer(-0.1,
                                                              0.1,
                                                              seed=2))
                decoder_cell = rnn.DropoutWrapper(
                    decoder_cell, input_keep_prob=self.keep_prob)

        output_layer = Dense(
            self.vocab_length,
            kernel_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                               stddev=0.1))

        attention_mech = seq2seq.BahdanauAttention(self.cell_size,
                                                   encoder_output,
                                                   self.in_length,
                                                   normalize=False)
        decoder_cell = seq2seq.DynamicAttentionWrapper(decoder_cell,
                                                       attention_mech,
                                                       self.cell_size)

        zero_state = _zero_state_tensors(self.cell_size, self.batch_size,
                                         tf.float32)
        initial_state = seq2seq.DynamicAttentionWrapperState(
            encoder_state[0], zero_state)

        with tf.variable_scope("decode"):
            train_logits = self.train_decoding_layer(input, decoder_cell,
                                                     initial_state,
                                                     output_layer)

        with tf.variable_scope("decode", reuse=True):
            inference_logits = self.inference_decoding_layer(
                self.embeddings, decoder_cell, initial_state, output_layer)

        return train_logits, inference_logits
Beispiel #27
0
    def build_model(self):
        encoder = self.encoder
        inputs = self.inputs
        with tf.variable_scope('encoder'):
            t_sequence = tf.unstack(inputs, axis=1, name='TimeMajorInputs')
            outputs, _, _ = tf.nn.static_bidirectional_rnn(cell_fw=encoder,
                                                           cell_bw=encoder,
                                                           inputs=t_sequence,
                                                           dtype=inputs.dtype)
        with tf.variable_scope('decoder'):
            with tf.name_scope('attention'):
                memory = tf.stack(outputs,
                                  axis=1,
                                  name='BatchMajorAnnotations')
                self.bahdanau = seq2seq.BahdanauAttention(self.attention_size,
                                                          memory=memory)

            raw_decoder = self.decoder
            decoder_cell = seq2seq.AttentionWrapper(raw_decoder,
                                                    self.bahdanau,
                                                    output_attention=False)
            self.decoder_cell = decoder_cell
Beispiel #28
0
    def getDecoderCell(self, encoder_outputs, encoder_final_states):
        basic_cells = [self.get_basicLSTMCell() for i in range(layer_num)]
        basic_cell = tf.nn.rnn_cell.MultiRNNCell(basic_cells)
        initial_state = encoder_final_states
        #attention
        attention_mechanism = seq2seq.BahdanauAttention(
            num_units=num_units,
            memory=encoder_outputs,
            memory_sequence_length=self.enc_len)
        att_cell = seq2seq.AttentionWrapper(
            basic_cell,
            attention_mechanism=attention_mechanism,
            attention_layer_size=num_units,
            alignment_history=False,
            cell_input_fn=None,
            initial_cell_state=initial_state)

        initial_state = att_cell.zero_state(batch_size=tf.shape(
            self.enc_in)[0],
                                            dtype=tf.float32)
        #            att_state.clone(cell_state=encoder_final_state)

        return att_cell, initial_state
    def decoding_layer(self, rnn_inputs, encoder_output, encoder_state):
        decoder_cell = build_multicell(self.uni_layers, self.cell_size,
                                       self.keep_prob)

        attention_mech = seq2seq.BahdanauAttention(self.cell_size,
                                                   encoder_output,
                                                   self.in_length)
        attention_cell = seq2seq.AttentionWrapper(decoder_cell, attention_mech,
                                                  self.cell_size / 2)
        decoder_cell = rnn.OutputProjectionWrapper(attention_cell,
                                                   self.vocab_length)

        initial_state = decoder_cell.zero_state(self.batch_size, tf.float32)
        initial_state.clone(cell_state=encoder_state)

        with tf.variable_scope("decode"):
            train_logits = self.train_decoding_layer(rnn_inputs, decoder_cell,
                                                     initial_state)

        with tf.variable_scope("decode", reuse=True):
            inference_logits = self.inference_decoding_layer(
                self.embeddings, decoder_cell, initial_state)

        return train_logits, inference_logits
Beispiel #30
0
    def _decoder_cell(self):
        batch_size, _ = tf.unstack(tf.shape(self._context))

        attention = seq2seq.BahdanauAttention(
            num_units=2 * self._hidden_size,
            memory=self._inputs_encoder_outputs,
            memory_sequence_length=self.inputs_length)

        attentive_cell = seq2seq.AttentionWrapper(
            cell=self._rnn_cell(self.context_state_size,
                                activation=tf.nn.tanh),
            attention_mechanism=attention,
            attention_layer_size=2 * self._hidden_size,
            initial_cell_state=self._context)

        cell = rnn.MultiRNNCell([
            attentive_cell,
            self._rnn_cell(self.context_state_size, activation=tf.nn.tanh),
        ])

        initial_state = tuple(
            [attentive_cell.zero_state(batch_size, tf.float32), self._context])

        return cell, initial_state