예제 #1
0
    def __build_key_memory(self):
        #print ("memory")
        key_states = []
        with variable_scope.variable_scope("EncoderRNN"):
            for i in xrange(0, self.hps.key_slots):
                if i > 0:
                    variable_scope.get_variable_scope().reuse_variables()
                (outputs, state_fw,
                 state_bw) = rnn.static_bidirectional_rnn(self.enc_cell_fw,
                                                          self.enc_cell_bw,
                                                          self.emb_key_inps[i],
                                                          dtype=tf.float32)
                key_state = array_ops.concat([state_fw, state_bw], 1)
                key_states.append(key_state)

        with variable_scope.variable_scope("key_memory"):
            key_states = [
                array_ops.reshape(e, [-1, 1, self.enc_cell_fw.output_size * 2])
                for e in key_states
            ]
            key_states = array_ops.concat(key_states, 1)
            key_states = tf.multiply(self.key_mask, key_states)

            final_state = math_ops.reduce_mean(key_states, axis=1)
            final_state = linear(final_state,
                                 self.hps.hidden_size,
                                 True,
                                 scope="key_initial")
            final_state = tf.tanh(final_state)

        return final_state, key_states
예제 #2
0
    def __build_encoder(self, step):

        with variable_scope.variable_scope("EncoderRNN", reuse=True):
            (outputs, enc_state_fw,
             enc_state_bw) = rnn.static_bidirectional_rnn(
                 self.enc_cell_fw,
                 self.enc_cell_bw,
                 self.emb_enc_inps[step][:self.enc_len],
                 dtype=tf.float32)

            enc_outs = outputs

        with variable_scope.variable_scope("seq2seq_Encoder"):
            enc_state = enc_state_bw
            final_state = linear(enc_state,
                                 self.hps.hidden_size,
                                 True,
                                 scope="enc_initial")
            final_state = tf.tanh(final_state)

            top_states = [
                array_ops.reshape(e, [-1, 1, self.enc_cell_fw.output_size * 2])
                for e in enc_outs
            ]
            attention_states = array_ops.concat(top_states, 1)

            final_attn_states = tf.multiply(self.enc_mask[step],
                                            attention_states)

        return final_state, final_attn_states, enc_outs
예제 #3
0
  def testTimeReversedFusedRNN(self):
    with self.cached_session() as sess:
      initializer = init_ops.random_uniform_initializer(
          -0.01, 0.01, seed=19890213)
      fw_cell = rnn_cell.BasicRNNCell(10)
      bw_cell = rnn_cell.BasicRNNCell(10)
      batch_size = 5
      input_size = 20
      timelen = 15
      inputs = constant_op.constant(
          np.random.randn(timelen, batch_size, input_size))

      # test bi-directional rnn
      with variable_scope.variable_scope("basic", initializer=initializer):
        unpacked_inputs = array_ops.unstack(inputs)
        outputs, fw_state, bw_state = rnn.static_bidirectional_rnn(
            fw_cell, bw_cell, unpacked_inputs, dtype=dtypes.float64)
        packed_outputs = array_ops.stack(outputs)
        basic_vars = [
            v for v in variables.trainable_variables()
            if v.name.startswith("basic/")
        ]
        sess.run([variables.global_variables_initializer()])
        basic_outputs, basic_fw_state, basic_bw_state = sess.run(
            [packed_outputs, fw_state, bw_state])
        basic_grads = sess.run(gradients_impl.gradients(packed_outputs, inputs))
        basic_wgrads = sess.run(
            gradients_impl.gradients(packed_outputs, basic_vars))

      with variable_scope.variable_scope("fused", initializer=initializer):
        fused_cell = fused_rnn_cell.FusedRNNCellAdaptor(
            rnn_cell.BasicRNNCell(10))
        fused_bw_cell = fused_rnn_cell.TimeReversedFusedRNN(
            fused_rnn_cell.FusedRNNCellAdaptor(rnn_cell.BasicRNNCell(10)))
        fw_outputs, fw_state = fused_cell(
            inputs, dtype=dtypes.float64, scope="fw")
        bw_outputs, bw_state = fused_bw_cell(
            inputs, dtype=dtypes.float64, scope="bw")
        outputs = array_ops.concat([fw_outputs, bw_outputs], 2)
        fused_vars = [
            v for v in variables.trainable_variables()
            if v.name.startswith("fused/")
        ]
        sess.run([variables.global_variables_initializer()])
        fused_outputs, fused_fw_state, fused_bw_state = sess.run(
            [outputs, fw_state, bw_state])
        fused_grads = sess.run(gradients_impl.gradients(outputs, inputs))
        fused_wgrads = sess.run(gradients_impl.gradients(outputs, fused_vars))

      self.assertAllClose(basic_outputs, fused_outputs)
      self.assertAllClose(basic_fw_state, fused_fw_state)
      self.assertAllClose(basic_bw_state, fused_bw_state)
      self.assertAllClose(basic_grads, fused_grads)
      for basic, fused in zip(basic_wgrads, fused_wgrads):
        self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)
예제 #4
0
    def __build_encoder_state_computer(self, emb_encoder_inputs, encoder_mask):
        with variable_scope.variable_scope(variable_scope.get_variable_scope(),
                                           reuse=None):
            with variable_scope.variable_scope("seq2seq_Encoder"):
                encoder_cell_fw = tf.nn.rnn_cell.LSTMCell(self.hidden_size)
                encoder_cell_bw = tf.nn.rnn_cell.LSTMCell(self.hidden_size)

                encoder_cell_fw = tf.nn.rnn_cell.DropoutWrapper(
                    encoder_cell_fw, output_keep_prob=self.keep_prob)
                encoder_cell_bw = tf.nn.rnn_cell.DropoutWrapper(
                    encoder_cell_bw, output_keep_prob=self.keep_prob)

                (outputs, encoder_state_fw,
                 encoder_state_bw) = rnn.static_bidirectional_rnn(
                     encoder_cell_fw,
                     encoder_cell_bw,
                     emb_encoder_inputs,
                     dtype=tf.float32)

                encoder_outputs = outputs

                encoder_state_c = encoder_state_bw[0]
                encoder_state_m = encoder_state_bw[1]

                with variable_scope.variable_scope("initial_transfor_c"):
                    final_state_c = core_rnn_cell._linear(
                        encoder_state_c, self.hidden_size, True)
                    final_state_c = tf.tanh(final_state_c)

                with variable_scope.variable_scope("initial_transfor_m"):
                    final_state_m = core_rnn_cell._linear(
                        encoder_state_m, self.hidden_size, True)
                    final_state_m = tf.tanh(final_state_m)

                final_state = tf.nn.rnn_cell.LSTMStateTuple(
                    final_state_c, final_state_m)

                # First calculate a concatenation of encoder outputs to put attention on.
                # cell.output_size is embedding_size
                top_states = [
                    array_ops.reshape(e,
                                      [-1, 1, encoder_cell_fw.output_size * 2])
                    for e in encoder_outputs
                ]

                attention_states = array_ops.concat(top_states, 1)

                final_attention_states = tf.multiply(encoder_mask,
                                                     attention_states)
                return final_state, final_attention_states
예제 #5
0
    def __build_encoder(self, step):

        with variable_scope.variable_scope("EncoderRNN", reuse=True): #为什么reuse???
            (outputs , enc_state_fw, enc_state_bw)  = rnn.static_bidirectional_rnn(
                    self.enc_cell_fw, self.enc_cell_bw, self.emb_enc_inps[step][:self.enc_len], dtype=tf.float32) #input是长度为bucket[0]的list,每个元素都是[batch_size,dim]的tensor

            enc_outs = outputs #长度为time的list,每个元素为[batch,cell_fw.output_size + cell_bw.output_size]

        with variable_scope.variable_scope("seq2seq_Encoder"):
            enc_state =  enc_state_bw #反向
            final_state = linear(enc_state, self.hps.hidden_size, True,  scope="enc_initial")
            final_state = tf.tanh(final_state)

            top_states = [array_ops.reshape(e, [-1, 1, self.enc_cell_fw.output_size*2]) for e in enc_outs]
            attention_states = array_ops.concat(top_states, 1) #[batch_size,enc_len,self.enc_cell_fw.output_size*2]

            final_attn_states = tf.multiply(self.enc_mask[step], attention_states) #enc_mask的shape是[batch_size,self.enc_len,1]

        return final_state, final_attn_states, enc_outs
def BiRNN(x, n_input, n_steps, n_hidden):
    # Prepare data shape to match `bidirectional_rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
    # Permuting batch_size and n_steps
    x = tf.transpose(x, [1, 0, 2])
    # Reshape to (n_steps*batch_size, n_input)
    x = tf.reshape(x, [-1, n_input])
    # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
    # x = tf.split(0, n_steps, x)    # old code
    x = tf.split(x, n_steps, 0)      # new code

    # Define lstm cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Get lstm cell output
    outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32)
    return outputs
예제 #7
0
    def __build_key_memory(self):
        #print ("memory")
        key_states = []
        with variable_scope.variable_scope("EncoderRNN"):
            for i in xrange(0, self.hps.key_slots):
                if i > 0:
                    variable_scope.get_variable_scope().reuse_variables() #重用原来的变量,而不是新创建
                (outputs , state_fw, state_bw)  = rnn.static_bidirectional_rnn(
                    self.enc_cell_fw, self.enc_cell_bw, self.emb_key_inps[i], dtype=tf.float32) #emb_key_inps是长度为step的list,每个元素都是[batch_size,dim]的tensor
                key_state = array_ops.concat([state_fw, state_bw], 1) #tensor state_fw和state_bw的shape:[batch_size,hidden_size]
                key_states.append(key_state) #tensor key_state的shape为[batch_size,2*hidden_size]
        
        with variable_scope.variable_scope("key_memory"):
            key_states = [array_ops.reshape(e, [-1, 1, self.enc_cell_fw.output_size*2]) for e in key_states]
            key_states = array_ops.concat(key_states, 1) #返回tensor [-1,key_slots,self.enc_cell_fw.output_size*2]
            key_states = tf.multiply(self.key_mask, key_states) #element-wise,支持广播

            final_state = math_ops.reduce_mean(key_states, axis=1) #tensor [-1,self.enc_cell_fw.output_size*2]
            final_state = linear(final_state, self.hps.hidden_size, True,  scope="key_initial") #[batch_size,hidden_size]
            final_state = tf.tanh(final_state)

        return final_state, key_states
예제 #8
0
    def build_model(self):
        with tf.device('/gpu:0'):
            with tf.variable_scope('deepcas') as scope:
                with tf.variable_scope('embedding'):
                    x_vector = tf.nn.dropout(
                        tf.nn.embedding_lookup(self.embedding, self.x),
                        self.dropout_prob)
                    # (batch_size, n_sequences, n_steps, n_input)

                with tf.variable_scope('BiGRU'):
                    x_vector = tf.transpose(x_vector, [1, 0, 2, 3])
                    # (n_sequences, batch_size, n_steps, n_input)
                    x_vector = tf.reshape(x_vector,
                                          [-1, self.n_steps, self.n_input])
                    # (n_sequences*batch_size, n_steps, n_input)

                    x_vector = tf.transpose(x_vector, [1, 0, 2])
                    # (n_steps, n_sequences*batch_size, n_input)
                    x_vector = tf.reshape(x_vector, [-1, self.n_input])
                    # (n_steps*n_sequences*batch_size, n_input)

                    # Split to get a list of 'n_steps' tensors of shape (n_sequences*batch_size, n_input)
                    x_vector = tf.split(x_vector, self.n_steps, 0)
                    outputs, _, _ = rnn.static_bidirectional_rnn(
                        self.gru_fw_cell,
                        self.gru_bw_cell,
                        x_vector,
                        dtype=tf.float32)
                    hidden_states = tf.transpose(tf.stack(outputs), [1, 0, 2])
                    # (n_sequences*batch_size, n_steps, 2*n_hidden_gru)
                    hidden_states = tf.transpose(
                        tf.reshape(hidden_states, [
                            self.n_sequences, -1, self.n_steps,
                            2 * self.n_hidden_gru
                        ]), [1, 0, 2, 3])
                    # (batch_size, n_sequences, n_steps, 2*n_hiddent_gru)

                with tf.variable_scope('attention'):
                    # attention over sequence steps
                    attention_step = tf.nn.softmax(self.p_step)
                    attention_step = tf.transpose(attention_step, [1, 0])
                    attention_result = batched_scalar_mul(
                        attention_step, hidden_states)
                    # (batch_size, n_sequences, n_steps, 2*n_hiddent_gru)

                    # attention over sequence batches
                    p_geo = tf.sigmoid(self.a_geo)
                    attention_batch = tf.pow(
                        tf.multiply(p_geo, tf.ones_like(self.sz)),
                        tf.div(1.0 + tf.log(self.sz), tf.log(2.0)))

                    attention_batch_seq = tf.tile(
                        attention_batch, [1, self.sequence_batch_size])
                    for i in range(
                            1,
                            int(self.n_sequences / self.sequence_batch_size)):
                        attention_batch_seq = tf.concat([
                            attention_batch_seq,
                            tf.tile(
                                tf.pow(1 - attention_batch, i) *
                                attention_batch, [1, self.sequence_batch_size])
                        ], 1)
                    attention_batch_lin = tf.reshape(attention_batch_seq,
                                                     [-1, 1])

                    shape = attention_result.get_shape()
                    shape = [-1, int(shape[1]), int(shape[2]), int(shape[3])]
                    attention_result_t = tf.multiply(
                        attention_batch_lin,
                        tf.reshape(attention_result,
                                   [-1, shape[2] * shape[3]]))
                    attention_result = tf.reshape(
                        attention_result_t, [-1, shape[1], shape[2], shape[3]])
                    hidden_graph = tf.reduce_sum(attention_result,
                                                 reduction_indices=[1, 2])

                with tf.variable_scope('dense'):
                    dense1 = self.activation(
                        tf.add(tf.matmul(hidden_graph, self.weights['dense1']),
                               self.biases['dense1']))
                    dense2 = self.activation(
                        tf.add(tf.matmul(dense1, self.weights['dense2']),
                               self.biases['dense2']))
                    pred = self.activation(
                        tf.add(tf.matmul(dense2, self.weights['out']),
                               self.biases['out']))

                return pred
예제 #9
0
    def _build(self, incoming, *args, **kwargs):
        """
        Args:
            incoming: `Tensor`. 3-D Tensor Layer [samples, timesteps, input dim].
        """
        assert (self.rnncell_fw.output_size == self.rnncell_bw.output_size
                ), "RNN Cells number of units must match!"
        input_shape = get_shape(incoming)

        # TODO: DropoutWrapper

        inference = incoming
        # If a tensor given, convert it to a per timestep list
        if type(inference) not in [list, np.array] and not self.dynamic:
            ndim = len(input_shape)
            assert ndim >= 3, 'Input dim should be at least 3.'
            axes = [1, 0] + list(xrange(2, ndim))
            inference = tf.transpose(inference, (axes, ))
            inference = tf.unstack(inference)

        sequence_length = None
        if self.dynamic:
            sequence_length = retrieve_seq_length_op(incoming if isinstance(
                incoming, tf.Tensor) else tf.stack(incoming))
            outputs, states_fw, states_bw = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=self.rnncell_fw,
                cell_bw=self.rnncell_bw,
                inputs=inference,
                initial_state_fw=self.initial_state_fw,
                initial_state_bw=self.initial_state_bw,
                sequence_length=sequence_length,
                dtype=tf.float32)
        else:
            outputs, states_fw, states_bw = rnn.static_bidirectional_rnn(
                cell_fw=self.rnncell_fw,
                cell_bw=self.rnncell_bw,
                inputs=inference,
                initial_state_fw=self.initial_state_fw,
                initial_state_bw=self.initial_state_bw,
                dtype=tf.float32)

        for v in [
                self.rnncell_fw.w, self.rnncell_fw.b, self.rnncell_bw.w,
                self.rnncell_bw.b
        ]:
            if hasattr(v, '__len__'):
                for var in v:
                    track(var, tf.GraphKeys.LAYER_VARIABLES, self.module_name)
            else:
                track(v, tf.GraphKeys.LAYER_VARIABLES, self.module_name)

        tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, outputs[-1])

        if self.dynamic:
            if self.return_seq:
                o = outputs
            else:
                o = get_sequence_relevant_output(outputs, sequence_length)
        else:
            o = outputs if self.return_seq else outputs[-1]

        track(o, tf.GraphKeys.LAYER_TENSOR, self.module_name)

        return (o, states_fw, states_bw) if self.return_states else o
def generate_embedding_RNN_output(encoder_inputs,
                                  cell,
                                  num_encoder_symbols,
                                  word_embedding_size,
                                  num_heads=1,
                                  dtype=dtypes.float32,
                                  scope=None,
                                  initial_state_attention=False,
                                  sequence_length=None,
                                  bidirectional_rnn=False):
    """
  Generate RNN state outputs with word embeddings as inputs
      - Note that this example code does not include output label dependency modeling.
      One may add a loop function as in the rnn_decoder function in tf seq2seq.py 
      example to feed emitted label embedding back to RNN state.
  """
    with variable_scope.variable_scope(scope
                                       or "generate_embedding_RNN_output"):
        if bidirectional_rnn:
            encoder_cell_fw = cell
            encoder_cell_bw = cell
            embedding = variable_scope.get_variable(
                "embedding", [num_encoder_symbols, word_embedding_size])
            encoder_embedded_inputs = list()
            encoder_embedded_inputs = [
                embedding_ops.embedding_lookup(embedding, encoder_input)
                for encoder_input in encoder_inputs
            ]
            #encoder_outputs, encoder_state_fw, encoder_state_bw = rnn.bidirectional_rnn(
            encoder_outputs, encoder_state_fw, encoder_state_bw = rnn.static_bidirectional_rnn(
                encoder_cell_fw,
                encoder_cell_bw,
                encoder_embedded_inputs,
                sequence_length=sequence_length,
                dtype=dtype)
            encoder_state = array_ops.concat([
                array_ops.concat(encoder_state_fw, 1),
                array_ops.concat(encoder_state_bw, 1)
            ], 1)
            top_states = [
                array_ops.reshape(e, [-1, 1, cell.output_size * 2])
                for e in encoder_outputs
            ]
            attention_states = array_ops.concat(top_states, 1)
        else:
            encoder_cell = cell
            embedding = variable_scope.get_variable(
                "embedding", [num_encoder_symbols, word_embedding_size])
            encoder_embedded_inputs = list()
            encoder_embedded_inputs = [
                embedding_ops.embedding_lookup(embedding, encoder_input)
                for encoder_input in encoder_inputs
            ]
            encoder_outputs, encoder_state = rnn.rnn(
                encoder_cell,
                encoder_embedded_inputs,
                sequence_length=sequence_length,
                dtype=dtype)
            encoder_state = array_ops.concat(1, encoder_state)
            top_states = [
                array_ops.reshape(e, [-1, 1, cell.output_size])
                for e in encoder_outputs
            ]
            attention_states = array_ops.concat(1, top_states)

        return encoder_outputs, encoder_state, attention_states
예제 #11
0
    def _build(self, incoming, *args, **kwargs):
        """
        Args:
            incoming: `Tensor`. 3-D Tensor Layer [samples, timesteps, input dim].
        """
        assert (self.rnncell_fw.output_size ==
                self.rnncell_bw.output_size), "RNN Cells number of units must match!"

        sequence_length = kwargs.get('sequence_length')
        if self.dynamic and sequence_length is None:
            sequence_length = retrieve_seq_length_op(
                incoming if isinstance(incoming, tf.Tensor) else tf.stack(incoming))

        input_shape = get_shape(incoming)

        # TODO: DropoutWrapper

        inference = incoming
        # If a static rnn and tensor given, convert it to a per timestep list
        if type(inference) not in [list, np.array] and not self.dynamic:
            ndim = len(input_shape)
            assert ndim >= 3, 'Input dim should be at least 3.'
            axes = [1, 0] + list(xrange(2, ndim))
            inference = tf.transpose(inference, axes)
            inference = tf.unstack(value=inference)

        if self.dynamic:
            # outputs are a tuple of (fw, bw) outputs
            outputs, (states_fw, states_bw) = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=self.rnncell_fw, cell_bw=self.rnncell_bw, inputs=inference,
                initial_state_fw=self.initial_state_fw,
                initial_state_bw=self.initial_state_bw,
                sequence_length=sequence_length,
                dtype=tf.float32)
        else:
            # outputs are a concatenation of both bw and fw outputs
            outputs, states_fw, states_bw = rnn.static_bidirectional_rnn(
                cell_fw=self.rnncell_fw, cell_bw=self.rnncell_bw, inputs=inference,
                initial_state_fw=self.initial_state_fw,
                initial_state_bw=self.initial_state_bw,
                sequence_length=sequence_length,
                dtype=tf.float32)

        for v in [self.rnncell_fw.w, self.rnncell_fw.b, self.rnncell_bw.w, self.rnncell_bw.b]:
            if hasattr(v, '__len__'):
                for var in v:
                    track(var, tf.GraphKeys.LAYER_VARIABLES, self.module_name)
            else:
                track(v, tf.GraphKeys.LAYER_VARIABLES, self.module_name)

        if self.dynamic:
            tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, outputs[0][-1])
        else:
            tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, outputs[-1])

        if self.dynamic:
            if self.return_seq:
                o = outputs
            else:
                # we are only interested in the fw pass here
                o = get_sequence_relevant_output(outputs[0], sequence_length)
        else:
            o = outputs if self.return_seq else outputs[-1]

        track(o, tf.GraphKeys.LAYER_TENSOR, self.module_name)

        return (o, states_fw, states_bw) if self.return_states else o
예제 #12
0
    def __init__(self,
                 sequence_length,
                 num_classes,
                 text_vocab_size,
                 text_embedding_size,
                 pos_vocab_size,
                 pos_embedding_size,
                 filter_sizes,
                 num_filters,
                 l2_reg_lambda=0.0):

        # Placeholders for input, output and dropout
        self.input_text = tf.placeholder(tf.int32,
                                         shape=[None, sequence_length],
                                         name='input_text')
        self.input_p1 = tf.placeholder(tf.int32,
                                       shape=[None, sequence_length],
                                       name='input_p1')
        self.input_p2 = tf.placeholder(tf.int32,
                                       shape=[None, sequence_length],
                                       name='input_p2')
        self.input_y = tf.placeholder(tf.float32,
                                      shape=[None, num_classes],
                                      name='input_y')
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name='dropout_keep_prob')

        initializer = tf.keras.initializers.glorot_normal

        # Embedding layer
        with tf.device('/cpu:0'), tf.variable_scope("text-embedding"):
            self.W_text = tf.Variable(tf.random_uniform(
                [text_vocab_size, text_embedding_size], -0.25, 0.25),
                                      name="W_text")
            self.text_embedded_chars = tf.nn.embedding_lookup(
                self.W_text, self.input_text)
            self.text_embedded_chars_expanded = tf.expand_dims(
                self.text_embedded_chars, -1)

        with tf.device('/cpu:0'), tf.variable_scope("position-embedding"):
            self.W_pos = tf.get_variable("W_pos",
                                         [pos_vocab_size, pos_embedding_size],
                                         initializer=initializer())
            self.p1_embedded_chars = tf.nn.embedding_lookup(
                self.W_pos, self.input_p1)
            self.p2_embedded_chars = tf.nn.embedding_lookup(
                self.W_pos, self.input_p2)
            self.p1_embedded_chars_expanded = tf.expand_dims(
                self.p1_embedded_chars, -1)
            self.p2_embedded_chars_expanded = tf.expand_dims(
                self.p2_embedded_chars, -1)

        self.embedded_chars_expanded = tf.concat([
            self.text_embedded_chars_expanded, self.p1_embedded_chars_expanded,
            self.p2_embedded_chars_expanded
        ], 2)
        _embedding_size = text_embedding_size + 2 * pos_embedding_size

        hidden_size = 128
        num_layer = 1
        with tf.variable_scope('input_encode'):

            def create_cell():
                if self.dropout_keep_prob < 1.0:
                    single_cell = lambda: BasicLSTMCell(hidden_size)
                    hidden = MultiRNNCell(
                        [single_cell() for _ in range(num_layer)])
                    hidden = DropoutWrapper(
                        hidden,
                        input_keep_prob=self.dropout_keep_prob,
                        output_keep_prob=self.dropout_keep_prob)
                else:
                    single_cell = lambda: BasicLSTMCell(hidden_size)
                    hidden = MultiRNNCell(
                        [single_cell() for _ in range(num_layer)])
                return hidden

            self.init_hidden_fw = create_cell()
            self.init_hidden_bw = create_cell()

            outputs, hidden_fw, hidden_bw = static_bidirectional_rnn(
                self.init_hidden_fw,
                self.init_hidden_bw,
                self.embedded_chars_expanded,
                sequence_length=self.seq_length,
                dtype=tf.float32)  # outputs [(,256),..,(,256)]

            # get last layer state
            last_hidden_fw = hidden_fw[-1]  # (c, h) ((,128), (,128))
            last_hidden_bw = hidden_bw[-1]  # (c, h)
            self.last_hidden_state = tf.concat(
                [tf.concat(last_hidden_fw, 1),
                 tf.concat(last_hidden_bw, 1)], 1)  # (, 4*128)

            self.all_hidden_state = [
                tf.reshape(o, [
                    -1, 1, self.init_hidden_fw.output_size +
                    self.init_hidden_bw.output_size
                ]) for o in outputs
            ]  # [(,1,256),...(,1,256)]
            self.all_hidden_state = tf.concat(self.all_hidden_state,
                                              1)  # (,30,256)

        with tf.variable_scope("decode_output"):
            batch_size = self.all_hidden_state.get_shape()[0]
            seq_length = self.all_hidden_state.get_shape()[1]
            att_size = self.all_hidden_state.get_shape()[2]

            source_hidden = tf.reshape(
                self.all_hidden_state,
                [-1, seq_length, 1, att_size])  # (B,30,1,256)
            attn_weight_list = []
            context_vec_list = [
            ]  # Results of attention reads will be stored here.

            for i in range(self.num_head):
                k = tf.get_variable("AttnK_%d" % i, [1, 1, att_size, att_size])
                v = tf.get_variable("AttnV_%d" % i, [att_size])
                conv_source_hidden = tf.nn.conv2d(source_hidden, k,
                                                  [1, 1, 1, 1],
                                                  "SAME")  # (B,30,1,256)

                with tf.variable_scope("Attention_%d" % i):
                    query = tf.layers.dense(self.last_hidden_state,
                                            att_size)  # (B, 256)
                    query = tf.reshape(query,
                                       [-1, 1, 1, att_size])  # (B,1,1,256)

                    # Attention mask is a softmax of v^T * tanh(...).
                    score = v * tf.tanh(conv_source_hidden + query)
                    s = tf.reduce_sum(score, [2, 3])  # (B, 30)

                    att_weight = tf.nn.softmax(s)
                    attn_weight_list.append(att_weight)

                    # Now calculate the attention-weighted context vector.
                    context_vec = tf.reduce_sum(
                        tf.reshape(att_weight, [-1, seq_length, 1, 1]) *
                        source_hidden, [1, 2])  # (B,256)
                    context_vec_list.append(
                        tf.reshape(context_vec, [-1, att_size]))

            matrix = tf.get_variable("Out_Matrix",
                                     [att_size, self.num_class])  # (256,31)
            res = tf.matmul(
                context_vec_list[0],
                matrix)  # NOTE: here we temporarily assume num_head = 1

            bias_start = 0.0
            bias_term = tf.get_variable(
                "Out_Bias", [self.num_class],
                initializer=tf.constant_initializer(bias_start))
            self.decode_output = [res + bias_term]  # (B,32)
            self.att_weight = attn_weight_list[0]

        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.variable_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                conv = tf.layers.conv2d(self.embedded_chars_expanded,
                                        num_filters,
                                        [filter_size, _embedding_size],
                                        kernel_initializer=initializer(),
                                        activation=tf.nn.relu,
                                        name="conv")
                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    conv,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)
        self.h_pool = tf.concat(pooled_outputs, 3)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

        # Add dropout
        with tf.variable_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.h_pool_flat,
                                        self.dropout_keep_prob)

        # Final scores and predictions
        with tf.variable_scope("output"):
            self.logits = tf.layers.dense(self.h_drop,
                                          num_classes,
                                          kernel_initializer=initializer())
            self.predictions = tf.argmax(self.logits, 1, name="predictions")

        # Calculate mean cross-entropy loss
        with tf.variable_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=self.logits, labels=self.input_y)
            self.l2 = tf.add_n(
                [tf.nn.l2_loss(v) for v in tf.trainable_variables()])
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * self.l2

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   tf.float32),
                                           name="accuracy")
#XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
# Define lstm cells with tensorflow
# Forward direction cell
lstm_fw_cell = rnn_cell.BasicLSTMCell(rnn_size,
                                      forget_bias=1.0,
                                      state_is_tuple=True)
# Backward direction cell
lstm_bw_cell = rnn_cell.BasicLSTMCell(rnn_size,
                                      forget_bias=1.0,
                                      state_is_tuple=True)

#lstm_cell = rnn_cell.BasicLSTMCell(rnn_size,state_is_tuple=True)
try:
    outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell,
                                                 lstm_bw_cell,
                                                 x_in,
                                                 dtype=tf.float32)
except Exception:  # Old TensorFlow version only returns outputs not states
    outputs = rnn.static_bidirectional_rnn(lstm_fw_cell,
                                           lstm_bw_cell,
                                           x_in,
                                           dtype=tf.float32)
#XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

output = tf.matmul(outputs[-1], layer['weights']) + layer['biases']

prediction = output
cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer().minimize(cost)
예제 #14
0
    def _testSingleLayerBidirectionalLSTMHelper(self, input_size, num_units,
                                                seq_length, batch_size):
        # Only tests single layer bi-Cudnn LSTM.
        num_layers = 1
        np.random.seed(1234)

        # canonical bidirectional lstm
        param_size = _MinLSTMParamSize(
            num_layers,
            num_units,
            input_size,
            direction=cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION)
        # np data
        input_data = np.random.randn(seq_length, batch_size,
                                     input_size).astype(np.float32)
        input_h = np.zeros(
            (num_layers * 2, batch_size, num_units)).astype(np.float32)
        input_c = np.zeros(
            (num_layers * 2, batch_size, num_units)).astype(np.float32)
        cudnn_params = np.random.randn(param_size).astype(np.float32)

        with ops.Graph().as_default():
            # cudnn bidirectional lstm graph
            cudnn_params_t = variables.Variable(cudnn_params)
            input_data_t = constant_op.constant(input_data,
                                                dtype=dtypes.float32)
            input_h_t = constant_op.constant(input_h, dtype=dtypes.float32)
            input_c_t = constant_op.constant(input_c, dtype=dtypes.float32)

            cudnn_lstm = _CreateModel(
                "lstm",
                num_layers,
                num_units,
                input_size,
                direction=cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION)
            cudnn_output, cudnn_output_h, cudnn_output_c = cudnn_lstm(
                input_data=input_data_t,
                input_h=input_h_t,
                input_c=input_c_t,
                params=cudnn_params_t)

            # canonical bidirectional lstm
            cell_fw = rnn_cell_impl.LSTMCell(num_units, forget_bias=0.)
            cell_bw = rnn_cell_impl.LSTMCell(num_units, forget_bias=0.)
            outputs, output_state_fw, output_state_bw = static_bidirectional_rnn(
                cell_fw,
                cell_bw,
                array_ops.unstack(input_data),
                dtype=dtypes.float32)

            weights_list, biases_list = _TransformBidirectionalCudnnLSTMParams(
                cudnn_lstm, cudnn_params_t)
            assert len(weights_list) == 2
            assert len(biases_list) == 2

            with vs.variable_scope("", reuse=True):
                cell_fw_kernel = vs.get_variable(
                    "bidirectional_rnn/fw/lstm_cell/kernel")
                cell_fw_bias = vs.get_variable(
                    "bidirectional_rnn/fw/lstm_cell/bias")
                cell_bw_kernel = vs.get_variable(
                    "bidirectional_rnn/bw/lstm_cell/kernel")
                cell_bw_bias = vs.get_variable(
                    "bidirectional_rnn/bw/lstm_cell/bias")

            assign_fw_kernel = state_ops.assign(cell_fw_kernel,
                                                weights_list[0])
            assign_fw_bias = state_ops.assign(cell_fw_bias, biases_list[0])

            assign_bw_kernel = state_ops.assign(cell_bw_kernel,
                                                weights_list[1])
            assign_bw_bias = state_ops.assign(cell_bw_bias, biases_list[1])
            assign_ops = control_flow_ops.group(assign_fw_kernel,
                                                assign_fw_bias,
                                                assign_bw_kernel,
                                                assign_bw_bias)

            with self.test_session(use_gpu=True,
                                   graph=ops.get_default_graph()) as sess:
                sess.run(variables.global_variables_initializer())
                cu_out, cu_h, cu_c = sess.run(
                    [cudnn_output, cudnn_output_h, cudnn_output_c])

                sess.run(assign_ops)
                out, fwd_s, bak_s = sess.run(
                    [outputs, output_state_fw, output_state_bw])

                out = np.stack(out)
                fwd_h, fwd_c = fwd_s.h, fwd_s.c
                bak_h, bak_c = bak_s.h, bak_s.c
                h = np.concatenate((fwd_h, bak_h), axis=1)
                c = np.concatenate((fwd_c, bak_c), axis=1)

                cu_h = [np.array(x) for x in cu_h]
                cu_c = [np.array(x) for x in cu_c]

                cu_h = np.concatenate(cu_h, axis=1)
                cu_c = np.concatenate(cu_c, axis=1)

                self.assertAllClose(out, cu_out)
                self.assertAllClose(h, cu_h)
                self.assertAllClose(c, cu_c)
예제 #15
0
def stack_bidirectional_rnn(cells_fw,
                            cells_bw,
                            inputs,
                            initial_states_fw=None,
                            initial_states_bw=None,
                            dtype=None,
                            sequence_length=None,
                            scope=None):
  """Creates a bidirectional recurrent neural network.

  Stacks several bidirectional rnn layers. The combined forward and backward
  layer outputs are used as input of the next layer. tf.bidirectional_rnn
  does not allow to share forward and backward information between layers.
  The input_size of the first forward and backward cells must match.
  The initial state for both directions is zero and no intermediate states
  are returned.

  As described in https://arxiv.org/abs/1303.5778

  Args:
    cells_fw: List of instances of RNNCell, one per layer,
      to be used for forward direction.
    cells_bw: List of instances of RNNCell, one per layer,
      to be used for backward direction.
    inputs: A length T list of inputs, each a tensor of shape
      [batch_size, input_size], or a nested tuple of such elements.
    initial_states_fw: (optional) A list of the initial states (one per layer)
      for the forward RNN.
      Each tensor must has an appropriate type and shape
      `[batch_size, cell_fw.state_size]`.
    initial_states_bw: (optional) Same as for `initial_states_fw`, but using
      the corresponding properties of `cells_bw`.
    dtype: (optional) The data type for the initial state.  Required if
      either of the initial states are not provided.
    sequence_length: (optional) An int32/int64 vector, size `[batch_size]`,
      containing the actual lengths for each of the sequences.
    scope: VariableScope for the created subgraph; defaults to None.

  Returns:
    A tuple (outputs, output_state_fw, output_state_bw) where:
      outputs is a length `T` list of outputs (one for each input), which
        are depth-concatenated forward and backward outputs.
      output_states_fw is the final states, one tensor per layer,
        of the forward rnn.
      output_states_bw is the final states, one tensor per layer,
        of the backward rnn.

  Raises:
    TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`.
    ValueError: If inputs is None, not a list or an empty list.
  """
  if not cells_fw:
    raise ValueError("Must specify at least one fw cell for BidirectionalRNN.")
  if not cells_bw:
    raise ValueError("Must specify at least one bw cell for BidirectionalRNN.")
  if not isinstance(cells_fw, list):
    raise ValueError("cells_fw must be a list of RNNCells (one per layer).")
  if not isinstance(cells_bw, list):
    raise ValueError("cells_bw must be a list of RNNCells (one per layer).")
  if len(cells_fw) != len(cells_bw):
    raise ValueError("Forward and Backward cells must have the same depth.")
  if (initial_states_fw is not None and
      (not isinstance(initial_states_fw, list) or
       len(initial_states_fw) != len(cells_fw))):
    raise ValueError(
        "initial_states_fw must be a list of state tensors (one per layer).")
  if (initial_states_bw is not None and
      (not isinstance(initial_states_bw, list) or
       len(initial_states_bw) != len(cells_bw))):
    raise ValueError(
        "initial_states_bw must be a list of state tensors (one per layer).")
  states_fw = []
  states_bw = []
  prev_layer = inputs

  with vs.variable_scope(scope or "stack_bidirectional_rnn"):
    for i, (cell_fw, cell_bw) in enumerate(zip(cells_fw, cells_bw)):
      initial_state_fw = None
      initial_state_bw = None
      if initial_states_fw:
        initial_state_fw = initial_states_fw[i]
      if initial_states_bw:
        initial_state_bw = initial_states_bw[i]

      with vs.variable_scope("cell_%d" % i) as cell_scope:
        prev_layer, state_fw, state_bw = rnn.static_bidirectional_rnn(
            cell_fw,
            cell_bw,
            prev_layer,
            initial_state_fw=initial_state_fw,
            initial_state_bw=initial_state_bw,
            sequence_length=sequence_length,
            dtype=dtype,
            scope=cell_scope)
      states_fw.append(state_fw)
      states_bw.append(state_bw)

  return prev_layer, tuple(states_fw), tuple(states_bw)
예제 #16
0
def stack_bidirectional_rnn(cells_fw,
                            cells_bw,
                            inputs,
                            initial_states_fw=None,
                            initial_states_bw=None,
                            dtype=None,
                            sequence_length=None,
                            scope=None):
    """Creates a bidirectional recurrent neural network.

  Stacks several bidirectional rnn layers. The combined forward and backward
  layer outputs are used as input of the next layer. tf.bidirectional_rnn
  does not allow to share forward and backward information between layers.
  The input_size of the first forward and backward cells must match.
  The initial state for both directions is zero and no intermediate states
  are returned.

  As described in https://arxiv.org/abs/1303.5778

  Args:
    cells_fw: List of instances of RNNCell, one per layer,
      to be used for forward direction.
    cells_bw: List of instances of RNNCell, one per layer,
      to be used for backward direction.
    inputs: A length T list of inputs, each a tensor of shape
      [batch_size, input_size], or a nested tuple of such elements.
    initial_states_fw: (optional) A list of the initial states (one per layer)
      for the forward RNN.
      Each tensor must has an appropriate type and shape
      `[batch_size, cell_fw.state_size]`.
    initial_states_bw: (optional) Same as for `initial_states_fw`, but using
      the corresponding properties of `cells_bw`.
    dtype: (optional) The data type for the initial state.  Required if
      either of the initial states are not provided.
    sequence_length: (optional) An int32/int64 vector, size `[batch_size]`,
      containing the actual lengths for each of the sequences.
    scope: VariableScope for the created subgraph; defaults to None.

  Returns:
    A tuple (outputs, output_state_fw, output_state_bw) where:
      outputs is a length `T` list of outputs (one for each input), which
        are depth-concatenated forward and backward outputs.
      output_states_fw is the final states, one tensor per layer,
        of the forward rnn.
      output_states_bw is the final states, one tensor per layer,
        of the backward rnn.

  Raises:
    TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`.
    ValueError: If inputs is None, not a list or an empty list.
  """
    if not cells_fw:
        raise ValueError(
            "Must specify at least one fw cell for BidirectionalRNN.")
    if not cells_bw:
        raise ValueError(
            "Must specify at least one bw cell for BidirectionalRNN.")
    if not isinstance(cells_fw, list):
        raise ValueError(
            "cells_fw must be a list of RNNCells (one per layer).")
    if not isinstance(cells_bw, list):
        raise ValueError(
            "cells_bw must be a list of RNNCells (one per layer).")
    if len(cells_fw) != len(cells_bw):
        raise ValueError(
            "Forward and Backward cells must have the same depth.")
    if (initial_states_fw is not None
            and (not isinstance(initial_states_fw, list)
                 or len(initial_states_fw) != len(cells_fw))):
        raise ValueError(
            "initial_states_fw must be a list of state tensors (one per layer)."
        )
    if (initial_states_bw is not None
            and (not isinstance(initial_states_bw, list)
                 or len(initial_states_bw) != len(cells_bw))):
        raise ValueError(
            "initial_states_bw must be a list of state tensors (one per layer)."
        )
    states_fw = []
    states_bw = []
    prev_layer = inputs

    with vs.variable_scope(scope or "stack_bidirectional_rnn"):
        for i, (cell_fw, cell_bw) in enumerate(zip(cells_fw, cells_bw)):
            initial_state_fw = None
            initial_state_bw = None
            if initial_states_fw:
                initial_state_fw = initial_states_fw[i]
            if initial_states_bw:
                initial_state_bw = initial_states_bw[i]

            with vs.variable_scope("cell_%d" % i) as cell_scope:
                prev_layer, state_fw, state_bw = rnn.static_bidirectional_rnn(
                    cell_fw,
                    cell_bw,
                    prev_layer,
                    initial_state_fw=initial_state_fw,
                    initial_state_bw=initial_state_bw,
                    sequence_length=sequence_length,
                    dtype=dtype,
                    scope=cell_scope)
            states_fw.append(state_fw)
            states_bw.append(state_bw)

    return prev_layer, tuple(states_fw), tuple(states_bw)
예제 #17
0
  def _testSingleLayerBidirectionalLSTMHelper(self, input_size, num_units,
                                              seq_length, batch_size):
    # Only tests single layer bi-Cudnn LSTM.
    num_layers = 1
    np.random.seed(1234)

    # canonical bidirectional lstm
    param_size = _MinLSTMParamSize(
        num_layers,
        num_units,
        input_size,
        direction=cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION)
    # np data
    input_data = np.random.randn(seq_length, batch_size,
                                 input_size).astype(np.float32)
    input_h = np.zeros((num_layers * 2, batch_size,
                        num_units)).astype(np.float32)
    input_c = np.zeros((num_layers * 2, batch_size,
                        num_units)).astype(np.float32)
    cudnn_params = np.random.randn(param_size).astype(np.float32)

    with ops.Graph().as_default():
      # cudnn bidirectional lstm graph
      cudnn_params_t = variables.Variable(cudnn_params)
      input_data_t = constant_op.constant(input_data, dtype=dtypes.float32)
      input_h_t = constant_op.constant(input_h, dtype=dtypes.float32)
      input_c_t = constant_op.constant(input_c, dtype=dtypes.float32)

      cudnn_lstm = _CreateModel(
          "lstm",
          num_layers,
          num_units,
          input_size,
          direction=cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION)
      cudnn_output, cudnn_output_h, cudnn_output_c = cudnn_lstm(
          input_data=input_data_t,
          input_h=input_h_t,
          input_c=input_c_t,
          params=cudnn_params_t)

      # canonical bidirectional lstm
      cell_fw = rnn_cell_impl.LSTMCell(num_units, forget_bias=0.)
      cell_bw = rnn_cell_impl.LSTMCell(num_units, forget_bias=0.)
      outputs, output_state_fw, output_state_bw = static_bidirectional_rnn(
          cell_fw, cell_bw, array_ops.unstack(input_data), dtype=dtypes.float32)

      weights_list, biases_list = _TransformBidirectionalCudnnLSTMParams(
          cudnn_lstm, cudnn_params_t)
      assert len(weights_list) == 2
      assert len(biases_list) == 2

      with vs.variable_scope("", reuse=True):
        cell_fw_kernel = vs.get_variable(
            "bidirectional_rnn/fw/lstm_cell/kernel")
        cell_fw_bias = vs.get_variable("bidirectional_rnn/fw/lstm_cell/bias")
        cell_bw_kernel = vs.get_variable(
            "bidirectional_rnn/bw/lstm_cell/kernel")
        cell_bw_bias = vs.get_variable("bidirectional_rnn/bw/lstm_cell/bias")

      assign_fw_kernel = state_ops.assign(cell_fw_kernel, weights_list[0])
      assign_fw_bias = state_ops.assign(cell_fw_bias, biases_list[0])

      assign_bw_kernel = state_ops.assign(cell_bw_kernel, weights_list[1])
      assign_bw_bias = state_ops.assign(cell_bw_bias, biases_list[1])
      assign_ops = control_flow_ops.group(assign_fw_kernel, assign_fw_bias,
                                          assign_bw_kernel, assign_bw_bias)

      with self.test_session(
          use_gpu=True, graph=ops.get_default_graph()) as sess:
        sess.run(variables.global_variables_initializer())
        cu_out, cu_h, cu_c = sess.run(
            [cudnn_output, cudnn_output_h, cudnn_output_c])

        sess.run(assign_ops)
        out, fwd_s, bak_s = sess.run(
            [outputs, output_state_fw, output_state_bw])

        out = np.stack(out)
        fwd_h, fwd_c = fwd_s.h, fwd_s.c
        bak_h, bak_c = bak_s.h, bak_s.c
        h = np.concatenate((fwd_h, bak_h), axis=1)
        c = np.concatenate((fwd_c, bak_c), axis=1)

        cu_h = [np.array(x) for x in cu_h]
        cu_c = [np.array(x) for x in cu_c]

        cu_h = np.concatenate(cu_h, axis=1)
        cu_c = np.concatenate(cu_c, axis=1)

        self.assertAllClose(out, cu_out)
        self.assertAllClose(h, cu_h)
        self.assertAllClose(c, cu_c)