def build_decoder_cell(self, encoder_outputs, encoder_state):
        """构建解码器所有层"""
        sequence_length = self.encoder_inputs_length
        if self.mode == 'decode':
            # 如果使用beam_search,则需要将encoder的输出进行tile_batch,其实就是复制beam_size份
            encoder_outputs = tf.contrib.seq2seq.tile_batch(encoder_outputs, multiplier=pm.beam_width)
            encoder_state = tf.contrib.seq2seq.tile_batch(encoder_state, multiplier=pm.beam_width)
            sequence_length = tf.contrib.seq2seq.tile_batch(sequence_length, multiplier=pm.beam_width)

        cell = tf.contrib.rnn.LSTMCell(pm.hidden_size * 2)

        if self.mode == 'train':
            cell = DropoutWrapper(cell, dtype=tf.float32, output_keep_prob=pm.keep_prob)
        else:
            cell = DropoutWrapper(cell, dtype=tf.float32, output_keep_prob=1.0)

        # 使用attention机制
        self.attention_mechanism = BahdanauAttention(num_units=pm.hidden_size, memory=encoder_outputs,
                                                     memory_sequence_length=sequence_length)

        def cell_input_fn(inputs, attention):
            attn_projection = layers.Dense(pm.hidden_size * 2, dtype=tf.float32, use_bias=False,
                                           name='attention_cell_input_fn')
            return attn_projection(array_ops.concat([inputs, attention], -1))

        cell = AttentionWrapper(
            cell=cell,  # rnn cell实例,可以是单个cell,也可以是多个cell stack后的mutli layer rnn
            attention_mechanism=self.attention_mechanism,  # attention mechanism的实例,此处为BahdanauAttention
            attention_layer_size=pm.hidden_size,
            # 用来控制我们最后生成的attention是怎么得来;如果不是None,则在调用_compute_attention方法时,得到的加权和向量还会与output进行concat,然后再经过一个线性映射,变成维度为attention_layer_size的向量
            cell_input_fn=cell_input_fn,  # input送入decoder cell的方式,默认是会将input和上一步计算得到的attention拼接起来送入decoder cell,
            name='Attention_Wrapper')

        if self.mode == 'decode':
            decoder_initial_state = cell.zero_state(batch_size=self.batch_size * pm.beam_width,
                                                    dtype=tf.float32).clone(cell_state=encoder_state)
        else:
            decoder_initial_state = cell.zero_state(batch_size=self.batch_size, dtype=tf.float32).clone(cell_state=encoder_state)

        return cell, decoder_initial_state
예제 #2
0
class Encoder(object):
    """
    Object representing an RNN encoder.
    """
    def __init__(self,
                 cell_factory,
                 input_size,
                 hidden_size,
                 input_dropout=None,
                 output_dropout=None):
        """
        :param cell_factory:
        :param input_size:
        :param hidden_size:
        :return:
        """
        self.cell_factory = cell_factory
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.cell = self.cell_factory(self.hidden_size)
        if input_dropout is not None or output_dropout is not None:
            self.cell = DropoutWrapper(self.cell, 1 - (input_dropout or 0.0),
                                       1 - (output_dropout or 0.0))
        self.state_size = self.cell.state_size

    def __call__(self, inputs, start_state, scope=None):
        """Run this RNN cell on inputs, starting from the given state.
        Args:
          inputs: list of 2D Tensors with shape [batch_size x self.input_size].
          start_state: 2D Tensor with shape [batch_size x self.state_size].
          scope: VariableScope for the created subgraph; defaults to class name.
        Returns:
          A pair containing:
          - Outputs: list of 2D Tensors with shape [batch_size x self.output_size]
          - States: list of 2D Tensors with shape [batch_size x self.state_size].
        """
        with vs.variable_scope(scope or "Encoder"):
            return rnn_encoder_factory(self.cell, inputs, start_state)

    def zero_state(self, batch_size):
        return self.cell.zero_state(batch_size, tf.float32)
##### RNN Units
"""

# Create a single GRU cell
encoder_fw_cell = tf.nn.rnn_cell.GRUCell(128)
# Add dropout : Dropout is applied to the hidden state output at every time step
encoder_fw_cell = DropoutWrapper(encoder_fw_cell, output_keep_prob=keep_prob)

encoder_bw_cell = tf.nn.rnn_cell.GRUCell(128)
encoder_bw_cell = DropoutWrapper(encoder_bw_cell, output_keep_prob=keep_prob)

# Unrolling of time-sequence
# Apply the encoder cell on input sequence and unroll computation upto
# max sequence length
enc_outputs, enc_state= tf.nn.bidirectional_dynamic_rnn(
    encoder_fw_cell,encoder_bw_cell, input_emb,sequence_length=input_lens,initial_state_fw=encoder_fw_cell.zero_state(BATCH_SIZE, dtype=tf.float32),initial_state_bw=encoder_bw_cell.zero_state(BATCH_SIZE, dtype=tf.float32),scope='blstm5')

output = tf.concat(enc_outputs, 2)

# Create a single GRU cell
encoder_fw_cell1 = tf.nn.rnn_cell.GRUCell(128)
# Add dropout : Dropout is applied to the hidden state output at every time step
encoder_fw_cell1 = DropoutWrapper(encoder_fw_cell1, output_keep_prob=keep_prob)
encoder_bw_cell1 = tf.nn.rnn_cell.GRUCell(128)
encoder_bw_cell1 = DropoutWrapper(encoder_bw_cell1, output_keep_prob=keep_prob)

enc_outputs, enc_state= tf.nn.bidirectional_dynamic_rnn(
    encoder_fw_cell1,encoder_bw_cell1,output,sequence_length=input_lens,dtype=tf.float32,scope='blstm6')

enc_state = tf.concat(enc_state, 1)
예제 #4
0
    def add_encoder(self, inputs, type_layer):
        '''Construction of the RNN model with LSTM cells.
        Arguments:
            - type_layer: should be 'Context' or 'Questions'
        '''

        with tf.variable_scope(
                'Encoding-Layer',
                initializer=tf.contrib.layers.xavier_initializer()) as scope:
            reuse = type_layer == "Questions"
            initializer = tf.random_uniform_initializer(-1, 1)

            cell_fw, cell_bw = self.create_cells(self.config.nb_hidden_layers,
                                                 reuse)

            if type_layer == "Context":
                batch_size = self.config.len_context
                sequence_length = self.context_len_placeholder
            elif type_layer == "Questions":
                batch_size = self.config.len_questions
                sequence_length = self.questions_len_placeholder

            cell_fw = DropoutWrapper(cell_fw,
                                     output_keep_prob=self.dropout_placeholder)
            initial_state_fw = cell_fw.zero_state(batch_size, tf.float32)

            if self.config.hidden_bidirectional:
                cell_bw = DropoutWrapper(
                    cell_bw, output_keep_prob=self.dropout_placeholder)
                initial_state_bw = cell_bw.zero_state(batch_size, tf.float32)
                outputs, hidden_states = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw,
                    cell_bw,
                    inputs,
                    initial_state_fw=initial_state_fw,
                    initial_state_bw=initial_state_bw,
                    sequence_length=sequence_length)
            else:
                outputs, hidden_states = tf.nn.dynamic_rnn(
                    cell_fw,
                    inputs,
                    initial_state=initial_state_fw,
                    sequence_length=sequence_length)

            if self.config.output_type == "output":
                encoding_outputs = tf.transpose(outputs, [1, 0, 2])
                encoding_outputs = tf.gather(encoding_outputs,
                                             self.config.len_questions - 1)

            elif self.config.output_type == "hs":
                if self.config.hidden_bidirectional:
                    encoding_outputs = (hidden_states[0], hidden_states[1])
                    if self.config.nb_hidden_layers > 1:
                        encoding_outputs = (encoding_outputs[0][-1],
                                            encoding_outputs[1][-1])
                    if self.config.type_cell == "LSTM":
                        encoding_outputs = (encoding_outputs[0].h,
                                            encoding_outputs[1].h)
                else:
                    encoding_outputs = hidden_states
                    if self.config.nb_hidden_layers > 1:
                        encoding_outputs = encoding_outputs[-1]
                    if self.config.type_cell == "LSTM":
                        encoding_outputs = encoding_outputs.h

        return encoding_outputs