コード例 #1
0
ファイル: model_defs.py プロジェクト: yjernite/DeepCRF
def bi_lstm_layer(in_layer, config, reuse=False, name='Bi_LSTM'):
    num_units = config.rnn_hidden_units
    output_size = config.rnn_output_size
    batch_size = int(in_layer.get_shape()[0])
    num_steps = int(in_layer.get_shape()[1])
    input_size = int(in_layer.get_shape()[2])
    initializer = tf.random_uniform_initializer(-0.1, 0.1)
    lstm_cell_f = rnn_cell.LSTMCell(num_units,
                                    input_size,
                                    use_peepholes=True,
                                    num_proj=output_size,
                                    cell_clip=1.0,
                                    initializer=initializer)
    lstm_cell_b = rnn_cell.LSTMCell(num_units,
                                    input_size,
                                    use_peepholes=True,
                                    num_proj=output_size,
                                    cell_clip=1.0,
                                    initializer=initializer)
    initial_state_f = lstm_cell_f.zero_state(batch_size, tf.float32)
    inputs_list = [
        tf.reshape(x, [batch_size, input_size])
        for x in tf.split(1, num_steps, in_layer)
    ]
    rnn_out, rnn_states = bi_rnn(lstm_cell_f,
                                 lstm_cell_b,
                                 inputs_list,
                                 initial_state=initial_state_f,
                                 scope=name,
                                 reuse=reuse)
    out_layer = tf.transpose(tf.pack(rnn_out), perm=[1, 0, 2])
    return out_layer
コード例 #2
0
    def __init__(self, rnn_size, rnn_layer, batch_size, input_embedding_size, dim_image, dim_hidden, max_words_q, vocabulary_size, drop_out_rate):
        self.rnn_size = rnn_size
        self.rnn_layer = rnn_layer
        self.batch_size = batch_size
        self.input_embedding_size = input_embedding_size
        self.dim_image = dim_image
        self.dim_hidden = dim_hidden
        self.max_words_q = max_words_q
        self.vocabulary_size = vocabulary_size    
        self.drop_out_rate = drop_out_rate

        # 问题embedding
        self.embed_ques_W = tf.Variable(tf.random_uniform([self.vocabulary_size, self.input_embedding_size], -0.08, 0.08), name='embed_ques_W')

        # RNN编码器
        self.lstm_1 = rnn_cell.LSTMCell(rnn_size, input_embedding_size, use_peepholes=True)
        self.lstm_dropout_1 = rnn_cell.DropoutWrapper(self.lstm_1, output_keep_prob = 1 - self.drop_out_rate)
        self.lstm_2 = rnn_cell.LSTMCell(rnn_size, rnn_size, use_peepholes=True)
        self.lstm_dropout_2 = rnn_cell.DropoutWrapper(self.lstm_2, output_keep_prob = 1 - self.drop_out_rate)
        self.stacked_lstm = rnn_cell.MultiRNNCell([self.lstm_dropout_1, self.lstm_dropout_2])

        # 状态embedding
        self.embed_state_W = tf.Variable(tf.random_uniform([2*rnn_size*rnn_layer, self.dim_hidden], -0.08,0.08),name='embed_state_W')
        self.embed_state_b = tf.Variable(tf.random_uniform([self.dim_hidden], -0.08, 0.08), name='embed_state_b')
        # 图像embedding
        self.embed_image_W = tf.Variable(tf.random_uniform([dim_image, self.dim_hidden], -0.08, 0.08), name='embed_image_W')
        self.embed_image_b = tf.Variable(tf.random_uniform([dim_hidden], -0.08, 0.08), name='embed_image_b')
        # 打分embedding
        self.embed_scor_W = tf.Variable(tf.random_uniform([dim_hidden, num_output], -0.08, 0.08), name='embed_scor_W')
        self.embed_scor_b = tf.Variable(tf.random_uniform([num_output], -0.08, 0.08), name='embed_scor_b')
コード例 #3
0
    def __load_model(self, num_layers):
        # Initial memory value for recurrence.
        self.prev_mem = tf.zeros((self.train_batch_size, self.memory_dim))

        # choose RNN/GRU/LSTM cell
        with tf.variable_scope("forward"):
            fw_single_cell = rnn_cell.LSTMCell(self.memory_dim)
            # Stacks layers of RNN's to form a stacked decoder
            self.forward_cell = rnn_cell.MultiRNNCell([fw_single_cell] *
                                                      num_layers)

        with tf.variable_scope("backward"):
            bw_single_cell = rnn_cell.LSTMCell(self.memory_dim)
            # Stacks layers of RNN's to form a stacked decoder
            self.backward_cell = rnn_cell.MultiRNNCell([bw_single_cell] *
                                                       num_layers)

        # embedding model
        if not self.attention:
            with tf.variable_scope("forward"):
                self.dec_outputs_fwd, _ = seq2seq.embedding_rnn_seq2seq(\
                                self.enc_inp_fwd, self.dec_inp, self.forward_cell, \
                                self.vocab_size, self.vocab_size, self.seq_length)
            with tf.variable_scope("forward", reuse=True):
                self.dec_outputs_fwd_tst, _ = seq2seq.embedding_rnn_seq2seq(\
                                self.enc_inp_fwd, self.dec_inp, self.forward_cell, \
                                self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)

            with tf.variable_scope("backward"):
                self.dec_outputs_bwd, _ = seq2seq.embedding_rnn_seq2seq(\
                                self.enc_inp_bwd, self.dec_inp, self.backward_cell, \
                                self.vocab_size, self.vocab_size, self.seq_length)

            with tf.variable_scope("backward", reuse=True):
                self.dec_outputs_bwd_tst, _ = seq2seq.embedding_rnn_seq2seq(\
                                self.enc_inp_bwd, self.dec_inp, self.backward_cell, \
                                self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)

        else:
            with tf.variable_scope("forward"):
                self.dec_outputs_fwd, _ = seq2seq.embedding_attention_seq2seq(\
                                self.enc_inp_fwd, self.dec_inp, self.forward_cell, \
                                self.vocab_size, self.vocab_size, self.seq_length)
            with tf.variable_scope("forward", reuse=True):
                self.dec_outputs_fwd_tst, _ = seq2seq.embedding_attention_seq2seq(\
                                self.enc_inp_fwd, self.dec_inp, self.forward_cell, \
                                self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)

            with tf.variable_scope("backward"):
                self.dec_outputs_bwd, _ = seq2seq.embedding_attention_seq2seq(\
                                self.enc_inp_bwd, self.dec_inp, self.backward_cell, \
                                self.vocab_size, self.vocab_size, self.seq_length)

            with tf.variable_scope("backward", reuse=True):
                self.dec_outputs_bwd_tst, _ = seq2seq.embedding_attention_seq2seq(\
                                self.enc_inp_bwd, self.dec_inp, self.backward_cell, \
                                self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)
コード例 #4
0
    def _testShardNoShardEquivalentOutput(self, use_gpu):
        num_units = 3
        input_size = 5
        batch_size = 2
        num_proj = 4
        num_proj_shards = 4
        num_unit_shards = 2
        with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
            inputs = 10 * [tf.placeholder(tf.float32)]
            initializer = tf.constant_initializer(0.001)

            cell_noshard = rnn_cell.LSTMCell(num_units,
                                             input_size,
                                             num_proj=num_proj,
                                             use_peepholes=True,
                                             initializer=initializer,
                                             num_unit_shards=num_unit_shards,
                                             num_proj_shards=num_proj_shards)

            cell_shard = rnn_cell.LSTMCell(num_units,
                                           input_size,
                                           use_peepholes=True,
                                           initializer=initializer,
                                           num_proj=num_proj)

            with tf.variable_scope("noshard_scope"):
                outputs_noshard, states_noshard = rnn.rnn(cell_noshard,
                                                          inputs,
                                                          dtype=tf.float32)
            with tf.variable_scope("shard_scope"):
                outputs_shard, states_shard = rnn.rnn(cell_shard,
                                                      inputs,
                                                      dtype=tf.float32)

            self.assertEqual(len(outputs_noshard), len(inputs))
            self.assertEqual(len(outputs_noshard), len(outputs_shard))

            tf.initialize_all_variables().run()
            input_value = np.random.randn(batch_size, input_size)
            feeds = dict((x, input_value) for x in inputs)
            values_noshard = sess.run(outputs_noshard, feed_dict=feeds)
            values_shard = sess.run(outputs_shard, feed_dict=feeds)
            state_values_noshard = sess.run(states_noshard, feed_dict=feeds)
            state_values_shard = sess.run(states_shard, feed_dict=feeds)
            self.assertEqual(len(values_noshard), len(values_shard))
            self.assertEqual(len(state_values_noshard),
                             len(state_values_shard))
            for (v_noshard, v_shard) in zip(values_noshard, values_shard):
                self.assertAllClose(v_noshard, v_shard, atol=1e-3)
            for (s_noshard, s_shard) in zip(state_values_noshard,
                                            state_values_shard):
                self.assertAllClose(s_noshard, s_shard, atol=1e-3)
コード例 #5
0
    def __load_model(self):
        # Initial memory value for recurrence.
        self.prev_mem = tf.zeros((self.train_batch_size, self.memory_dim))

        # choose RNN/GRU/LSTM cell
        with tf.variable_scope("train_test", reuse=True):
            self.cell = rnn_cell.LSTMCell(self.memory_dim)

        # embedding model
        if not self.attention:
            with tf.variable_scope("train_test"):
                self.dec_outputs, self.dec_memory = seq2seq.embedding_rnn_seq2seq(\
                                self.enc_inp, self.dec_inp, self.cell, \
                                self.vocab_size, self.vocab_size, self.seq_length)
            with tf.variable_scope("train_test", reuse=True):
                self.dec_outputs_tst, _ = seq2seq.embedding_rnn_seq2seq(\
                                self.enc_inp, self.dec_inp, self.cell, \
                                self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)

        else:
            with tf.variable_scope("train_test"):
                self.dec_outputs, self.dec_memory = seq2seq.embedding_attention_seq2seq(\
                                self.enc_inp, self.dec_inp, self.cell, \
                                self.vocab_size, self.vocab_size, self.seq_length)
            with tf.variable_scope("train_test", reuse=True):
                self.dec_outputs_tst, _ = seq2seq.embedding_attention_seq2seq(\
                                self.enc_inp, self.dec_inp, self.cell, \
                                self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)
コード例 #6
0
    def _testDoubleInput(self, use_gpu):
        num_units = 3
        input_size = 5
        batch_size = 2
        num_proj = 4
        num_proj_shards = 4
        num_unit_shards = 2
        with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
            initializer = tf.random_uniform_initializer(-1, 1, seed=self._seed)
            inputs = 10 * [tf.placeholder(tf.float64)]

            cell = rnn_cell.LSTMCell(num_units,
                                     input_size=input_size,
                                     use_peepholes=True,
                                     num_proj=num_proj,
                                     num_unit_shards=num_unit_shards,
                                     num_proj_shards=num_proj_shards,
                                     initializer=initializer)

            outputs, _ = rnn.rnn(cell,
                                 inputs,
                                 initial_state=cell.zero_state(
                                     batch_size, tf.float64))

            self.assertEqual(len(outputs), len(inputs))

            tf.initialize_all_variables().run()
            input_value = np.asarray(np.random.randn(batch_size, input_size),
                                     dtype=np.float64)
            values = sess.run(outputs, feed_dict={inputs[0]: input_value})
            self.assertEqual(values[0].dtype, input_value.dtype)
コード例 #7
0
    def _testNoProjNoShardingSimpleStateSaver(self, use_gpu):
        num_units = 3
        input_size = 5
        batch_size = 2
        with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
            initializer = tf.random_uniform_initializer(-0.01,
                                                        0.01,
                                                        seed=self._seed)
            state_saver = TestStateSaver(batch_size, 2 * num_units)
            cell = rnn_cell.LSTMCell(num_units,
                                     input_size,
                                     use_peepholes=False,
                                     initializer=initializer)
            inputs = 10 * [
                tf.placeholder(tf.float32, shape=(batch_size, input_size))
            ]
            with tf.variable_scope("share_scope"):
                outputs, states = rnn.state_saving_rnn(cell,
                                                       inputs,
                                                       state_saver=state_saver,
                                                       state_name="save_lstm")
            self.assertEqual(len(outputs), len(inputs))
            for out in outputs:
                self.assertEqual(out.get_shape().as_list(),
                                 [batch_size, num_units])

            tf.initialize_all_variables().run()
            input_value = np.random.randn(batch_size, input_size)
            (last_state_value, saved_state_value) = sess.run(
                [states[-1], state_saver.saved_state],
                feed_dict={inputs[0]: input_value})
            self.assertAllEqual(last_state_value, saved_state_value)
コード例 #8
0
    def _testProjSharding(self, use_gpu):
        num_units = 3
        input_size = 5
        batch_size = 2
        num_proj = 4
        num_proj_shards = 4
        num_unit_shards = 2
        with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
            initializer = tf.random_uniform_initializer(-0.01,
                                                        0.01,
                                                        seed=self._seed)

            inputs = 10 * [
                tf.placeholder(tf.float32, shape=(None, input_size))
            ]

            cell = rnn_cell.LSTMCell(num_units,
                                     input_size=input_size,
                                     use_peepholes=True,
                                     num_proj=num_proj,
                                     num_unit_shards=num_unit_shards,
                                     num_proj_shards=num_proj_shards,
                                     initializer=initializer)

            outputs, _ = rnn.rnn(cell, inputs, dtype=tf.float32)

            self.assertEqual(len(outputs), len(inputs))

            tf.initialize_all_variables().run()
            input_value = np.random.randn(batch_size, input_size)
            sess.run(outputs, feed_dict={inputs[0]: input_value})
コード例 #9
0
    def __init__(self,
                 dim_image,
                 n_words,
                 dim_hidden,
                 batch_size,
                 n_lstm_steps,
                 drop_out_rate,
                 bias_init_vector=None):
        self.dim_image = dim_image
        self.n_words = n_words
        self.dim_hidden = dim_hidden
        self.batch_size = batch_size
        self.n_lstm_steps = n_lstm_steps
        self.drop_out_rate = drop_out_rate

        with tf.device("/cpu:0"):
            self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_hidden],
                                                      -0.1, 0.1),
                                    name='Wemb')

        self.lstm3 = rnn_cell.LSTMCell(self.dim_hidden,
                                       2 * self.dim_hidden,
                                       use_peepholes=True)
        self.lstm3_dropout = rnn_cell.DropoutWrapper(self.lstm3,
                                                     output_keep_prob=1 -
                                                     self.drop_out_rate)

        self.encode_image_W = tf.Variable(tf.random_uniform(
            [dim_image, dim_hidden], -0.1, 0.1),
                                          name='encode_image_W')
        self.encode_image_b = tf.Variable(tf.zeros([dim_hidden]),
                                          name='encode_image_b')
        self.embed_att_w = tf.Variable(tf.random_uniform([dim_hidden, 1], -0.1,
                                                         0.1),
                                       name='embed_att_w')
        self.embed_att_Wa = tf.Variable(tf.random_uniform(
            [dim_hidden, dim_hidden], -0.1, 0.1),
                                        name='embed_att_Wa')
        self.embed_att_Ua = tf.Variable(tf.random_uniform(
            [dim_hidden, dim_hidden], -0.1, 0.1),
                                        name='embed_att_Ua')
        self.embed_att_ba = tf.Variable(tf.zeros([dim_hidden]),
                                        name='embed_att_ba')

        self.embed_word_W = tf.Variable(tf.random_uniform(
            [dim_hidden, n_words], -0.1, 0.1),
                                        name='embed_word_W')
        if bias_init_vector is not None:
            self.embed_word_b = tf.Variable(bias_init_vector.astype(
                np.float32),
                                            name='embed_word_b')
        else:
            self.embed_word_b = tf.Variable(tf.zeros([n_words]),
                                            name='embed_word_b')

        self.embed_nn_Wp = tf.Variable(tf.random_uniform(
            [3 * dim_hidden, dim_hidden], -0.1, 0.1),
                                       name='embed_nn_Wp')
        self.embed_nn_bp = tf.Variable(tf.zeros([dim_hidden]),
                                       name='embed_nn_bp')
コード例 #10
0
    def testSharingWeightsWithDifferentNamescope(self):
        num_units = 3
        input_size = 5
        batch_size = 2
        num_proj = 4
        with self.test_session(graph=tf.Graph()) as sess:
            initializer = tf.random_uniform_initializer(-1, 1, seed=self._seed)
            inputs = 10 * [
                tf.placeholder(tf.float32, shape=(None, input_size))
            ]
            cell = rnn_cell.LSTMCell(num_units,
                                     input_size,
                                     use_peepholes=True,
                                     num_proj=num_proj,
                                     initializer=initializer)

            with tf.name_scope("scope0"):
                with tf.variable_scope("share_scope"):
                    outputs0, _ = rnn.rnn(cell, inputs, dtype=tf.float32)
            with tf.name_scope("scope1"):
                with tf.variable_scope("share_scope", reuse=True):
                    outputs1, _ = rnn.rnn(cell, inputs, dtype=tf.float32)

            tf.initialize_all_variables().run()
            input_value = np.random.randn(batch_size, input_size)
            output_values = sess.run(outputs0 + outputs1,
                                     feed_dict={inputs[0]: input_value})
            outputs0_values = output_values[:10]
            outputs1_values = output_values[10:]
            self.assertEqual(len(outputs0_values), len(outputs1_values))
            for out0, out1 in zip(outputs0_values, outputs1_values):
                self.assertAllEqual(out0, out1)
コード例 #11
0
    def _testCellClipping(self, use_gpu):
        num_units = 3
        input_size = 5
        batch_size = 2
        with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
            initializer = tf.random_uniform_initializer(-0.01,
                                                        0.01,
                                                        seed=self._seed)
            cell = rnn_cell.LSTMCell(num_units,
                                     input_size,
                                     use_peepholes=True,
                                     cell_clip=0.0,
                                     initializer=initializer)
            inputs = 10 * [
                tf.placeholder(tf.float32, shape=(batch_size, input_size))
            ]
            outputs, _ = rnn.rnn(cell, inputs, dtype=tf.float32)
            self.assertEqual(len(outputs), len(inputs))
            for out in outputs:
                self.assertEqual(out.get_shape().as_list(),
                                 [batch_size, num_units])

            tf.initialize_all_variables().run()
            input_value = np.random.randn(batch_size, input_size)
            values = sess.run(outputs, feed_dict={inputs[0]: input_value})

        for value in values:
            # if cell c is clipped to 0, tanh(c) = 0 => m==0
            self.assertAllEqual(value, np.zeros((batch_size, num_units)))
コード例 #12
0
ファイル: test.py プロジェクト: vinayakathavale/ner-lstm
    def prediction(self):
        fw_cell = rnn_cell.LSTMCell(self._num_hidden)
        fw_cell = rnn_cell.DropoutWrapper(fw_cell, output_keep_prob=self.dropout)
        bw_cell = rnn_cell.LSTMCell(self._num_hidden)
        bw_cell = rnn_cell.DropoutWrapper(bw_cell, output_keep_prob=self.dropout)

        if self._num_layers > 1:
            fw_cell = rnn_cell.MultiRNNCell([fw_cell] * self._num_layers)
            bw_cell = rnn_cell.MultiRNNCell([bw_cell] * self._num_layers)

        output, _, _ = rnn.bidirectional_rnn(fw_cell, bw_cell, tf.unpack(tf.transpose(self.data, perm=[1, 0, 2])), dtype=tf.float32, sequence_length=self.length)
        max_length = int(self.target.get_shape()[1])
        num_classes = int(self.target.get_shape()[2])
        weight, bias = self._weight_and_bias(2*self._num_hidden, num_classes)
        output = tf.reshape(tf.transpose(tf.pack(output), perm=[1, 0, 2]), [-1, 2*self._num_hidden])
        prediction = tf.nn.softmax(tf.matmul(output, weight) + bias)
        prediction = tf.reshape(prediction, [-1, max_length, num_classes])
        return prediction
コード例 #13
0
ファイル: m05.py プロジェクト: jayantk/dqa-net
    def __init__(self, params, emb_mat):
        self.params = params
        V, d, L, e = params.vocab_size, params.hidden_size, params.rnn_num_layers, params.word_size
        prev_size = e
        hidden_sizes = [d for _ in range(params.emb_num_layers)]
        for layer_idx in range(params.emb_num_layers):
            with tf.variable_scope("emb_%d" % layer_idx):
                cur_hidden_size = hidden_sizes[layer_idx]
                emb_mat = tf.tanh(
                    my.nn.linear([V, prev_size], cur_hidden_size, emb_mat))
                prev_size = cur_hidden_size
        self.emb_mat = emb_mat

        self.emb_hidden_sizes = [d for _ in range(params.emb_num_layers)]
        self.input_size = self.emb_hidden_sizes[
            -1] if self.emb_hidden_sizes else e

        if params.lstm == 'basic':
            self.first_cell = my.rnn_cell.BasicLSTMCell(
                d, input_size=self.input_size, forget_bias=params.forget_bias)
            self.second_cell = my.rnn_cell.BasicLSTMCell(
                d, forget_bias=params.forget_bias)
        elif params.lstm == 'regular':
            self.first_cell = rnn_cell.LSTMCell(d,
                                                self.input_size,
                                                cell_clip=params.cell_clip)
            self.second_cell = rnn_cell.LSTMCell(d,
                                                 d,
                                                 cell_clip=params.cell_clip)
        elif params.lstm == 'gru':
            self.first_cell = rnn_cell.GRUCell(d, input_size=self.input_size)
            self.second_cell = rnn_cell.GRUCell(d)
        else:
            raise Exception()

        if params.train and params.keep_prob < 1.0:
            self.first_cell = tf.nn.rnn_cell.DropoutWrapper(
                self.first_cell,
                input_keep_prob=params.keep_prob,
                output_keep_prob=params.keep_prob)
        self.cell = rnn_cell.MultiRNNCell([self.first_cell] +
                                          [self.second_cell] * (L - 1))
        self.scope = tf.get_variable_scope()
        self.used = False
コード例 #14
0
ファイル: rnn_test.py プロジェクト: zizifu/tensorflow
    def _testDoubleInputWithDropoutAndDynamicCalculation(self, use_gpu):
        """Smoke test for using LSTM with doubles, dropout, dynamic calculation."""

        num_units = 3
        input_size = 5
        batch_size = 2
        num_proj = 4
        num_proj_shards = 4
        num_unit_shards = 2
        with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
            sequence_length = tf.placeholder(tf.int64)
            initializer = tf.random_uniform_initializer(-0.01,
                                                        0.01,
                                                        seed=self._seed)
            inputs = 10 * [tf.placeholder(tf.float64)]

            cell = rnn_cell.LSTMCell(num_units,
                                     input_size=input_size,
                                     use_peepholes=True,
                                     num_proj=num_proj,
                                     num_unit_shards=num_unit_shards,
                                     num_proj_shards=num_proj_shards,
                                     initializer=initializer)
            dropout_cell = rnn_cell.DropoutWrapper(cell, 0.5, seed=0)

            outputs, states = rnn.rnn(dropout_cell,
                                      inputs,
                                      sequence_length=sequence_length,
                                      initial_state=cell.zero_state(
                                          batch_size, tf.float64))

            self.assertEqual(len(outputs), len(inputs))
            self.assertEqual(len(outputs), len(states))

            tf.initialize_all_variables().run(
                feed_dict={sequence_length: [2, 3]})
            input_value = np.asarray(np.random.randn(batch_size, input_size),
                                     dtype=np.float64)
            values = sess.run(outputs,
                              feed_dict={
                                  inputs[0]: input_value,
                                  sequence_length: [2, 3]
                              })
            state_values = sess.run(states,
                                    feed_dict={
                                        inputs[0]: input_value,
                                        sequence_length: [2, 3]
                                    })
            self.assertEqual(values[0].dtype, input_value.dtype)
            self.assertEqual(state_values[0].dtype, input_value.dtype)
コード例 #15
0
 def prediction(self):
     # Recurrent network.
     network = rnn_cell.LSTMCell(self._num_hidden)
     network = rnn_cell.DropoutWrapper(network,
                                       output_keep_prob=self.dropout)
     network = rnn_cell.MultiRNNCell([network] * self._num_layers)
     output, _ = rnn.dynamic_rnn(network, self.data, dtype=tf.float32)
     # Softmax layer.
     max_length = int(self.target.get_shape()[1])
     num_classes = int(self.target.get_shape()[2])
     weight, bias = self._weight_and_bias(self._num_hidden, num_classes)
     # Flatten to apply same weights to all time steps.
     output = tf.reshape(output, [-1, self._num_hidden])
     prediction = tf.nn.softmax(tf.matmul(output, weight) + bias)
     prediction = tf.reshape(prediction, [-1, max_length, num_classes])
     return prediction
コード例 #16
0
    def testSharingWeightsWithReuse(self):
        num_units = 3
        input_size = 5
        batch_size = 2
        num_proj = 4
        with self.test_session(graph=tf.Graph()) as sess:
            initializer = tf.random_uniform_initializer(-1, 1, seed=self._seed)
            inputs = 10 * [
                tf.placeholder(tf.float32, shape=(None, input_size))
            ]
            cell = rnn_cell.LSTMCell(num_units,
                                     input_size,
                                     use_peepholes=True,
                                     num_proj=num_proj,
                                     initializer=initializer)

            with tf.variable_scope("share_scope"):
                outputs0, _ = rnn.rnn(cell, inputs, dtype=tf.float32)
            with tf.variable_scope("share_scope", reuse=True):
                outputs1, _ = rnn.rnn(cell, inputs, dtype=tf.float32)
            with tf.variable_scope("diff_scope"):
                outputs2, _ = rnn.rnn(cell, inputs, dtype=tf.float32)

            tf.initialize_all_variables().run()
            input_value = np.random.randn(batch_size, input_size)
            output_values = sess.run(outputs0 + outputs1 + outputs2,
                                     feed_dict={inputs[0]: input_value})
            outputs0_values = output_values[:10]
            outputs1_values = output_values[10:20]
            outputs2_values = output_values[20:]
            self.assertEqual(len(outputs0_values), len(outputs1_values))
            self.assertEqual(len(outputs0_values), len(outputs2_values))
            for o1, o2, o3 in zip(outputs0_values, outputs1_values,
                                  outputs2_values):
                # Same weights used by both RNNs so outputs should be the same.
                self.assertAllEqual(o1, o2)
                # Different weights used so outputs should be different.
                self.assertTrue(np.linalg.norm(o1 - o3) > 1e-6)
コード例 #17
0
    def _testNoProjNoSharding(self, use_gpu):
        num_units = 3
        input_size = 5
        batch_size = 2
        with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
            initializer = tf.random_uniform_initializer(-0.01,
                                                        0.01,
                                                        seed=self._seed)
            cell = rnn_cell.LSTMCell(num_units,
                                     input_size,
                                     initializer=initializer)
            inputs = 10 * [
                tf.placeholder(tf.float32, shape=(batch_size, input_size))
            ]
            outputs, _ = rnn.rnn(cell, inputs, dtype=tf.float32)
            self.assertEqual(len(outputs), len(inputs))
            for out in outputs:
                self.assertEqual(out.get_shape().as_list(),
                                 [batch_size, num_units])

            tf.initialize_all_variables().run()
            input_value = np.random.randn(batch_size, input_size)
            sess.run(outputs, feed_dict={inputs[0]: input_value})
コード例 #18
0
ファイル: cnnlstm.py プロジェクト: vunb/qclass_dl
    def __init__(self,
                 embedding_mat,
                 non_static,
                 lstm_type,
                 hidden_unit,
                 sequence_length,
                 max_pool_size,
                 num_classes,
                 embedding_size,
                 filter_sizes,
                 num_filters,
                 l2_reg_lambda=0.0):

        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length],
                                      name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")
        self.batch_size = tf.placeholder(tf.int32)
        self.pad = tf.placeholder(tf.float32, [None, 1, embedding_size, 1],
                                  name="pad")
        self.real_len = tf.placeholder(tf.int32, [None], name="real_len")

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        # Extend input to a 4D Tensor, because tf.nn.conv2d requires so.
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            if not non_static:
                W = tf.constant(embedding_mat, name="W")
            else:
                W = tf.Variable(embedding_mat, name="W")
            self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x)
            emb = tf.expand_dims(self.embedded_chars, -1)

        # CNN
        pooled_concat = []
        reduced = np.int32(np.ceil((sequence_length) * 1.0 / max_pool_size))
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):

                # Zero paddings so that the convolution output have dimension batch x sequence_length x emb_size x channel
                num_prio = (filter_size - 1) // 2
                num_post = (filter_size - 1) - num_prio
                pad_prio = tf.concat(1, [self.pad] * num_prio)
                pad_post = tf.concat(1, [self.pad] * num_post)
                emb_pad = tf.concat(1, [pad_prio, emb, pad_post])

                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1),
                                name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]),
                                name="b")
                conv = tf.nn.conv2d(emb_pad,
                                    W,
                                    strides=[1, 1, 1, 1],
                                    padding="VALID",
                                    name="conv")

                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")

                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(h,
                                        ksize=[1, max_pool_size, 1, 1],
                                        strides=[1, max_pool_size, 1, 1],
                                        padding='SAME',
                                        name="pool")
                pooled = tf.reshape(pooled, [-1, reduced, num_filters])
                pooled_concat.append(pooled)

        pooled_concat = tf.concat(2, pooled_concat)
        pooled_concat = tf.nn.dropout(pooled_concat, self.dropout_keep_prob)

        # LSTM
        if lstm_type == "gru":
            lstm_cell = rnn_cell.GRUCell(num_units=hidden_unit,
                                         input_size=embedding_size)
        else:
            if lstm_type == "basic":
                lstm_cell = rnn_cell.BasicLSTMCell(num_units=hidden_unit,
                                                   input_size=embedding_size)
            else:
                lstm_cell = rnn_cell.LSTMCell(num_units=hidden_unit,
                                              input_size=embedding_size,
                                              use_peepholes=True)
        lstm_cell = rnn_cell.DropoutWrapper(
            lstm_cell, output_keep_prob=self.dropout_keep_prob)

        self._initial_state = lstm_cell.zero_state(self.batch_size, tf.float32)
        inputs = [
            tf.squeeze(input_, [1])
            for input_ in tf.split(1, reduced, pooled_concat)
        ]
        outputs, state = rnn.rnn(lstm_cell,
                                 inputs,
                                 initial_state=self._initial_state,
                                 sequence_length=self.real_len)

        # Collect the appropriate last words into variable output (dimension = batch x embedding_size)
        output = outputs[0]
        with tf.variable_scope("Output"):
            tf.get_variable_scope().reuse_variables()
            one = tf.ones([1, hidden_unit], tf.float32)
            for i in range(1, len(outputs)):
                ind = self.real_len < (i + 1)
                ind = tf.to_float(ind)
                ind = tf.expand_dims(ind, -1)
                mat = tf.matmul(ind, one)
                output = tf.add(tf.mul(output, mat),
                                tf.mul(outputs[i], 1.0 - mat))

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            self.W = tf.Variable(tf.truncated_normal(
                [hidden_unit, num_classes], stddev=0.1),
                                 name="W")
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            self.scores = tf.nn.xw_plus_b(output, self.W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # CalculateMean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(
                self.scores, self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")
コード例 #19
0
    def __init__(self, vocab_size, sequence_length, num_units,
                 max_gradient_norm, batch_size, learning_rate,
                 learning_rate_decay_factor):
        self.vocab_size = vocab_size
        self.sequence_length = sequence_length
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        w = training.utils.gaussian_weights_variable(
            [num_units, self.vocab_size])
        b = tf.Variable(tf.zeros([self.vocab_size]))

        lstm_cell = rnn_cell.LSTMCell(num_units, vocab_size)

        self.encoder_inputs = []
        self.decoder_inputs = []
        self.target_weights = []
        for _ in range(sequence_length):
            self.encoder_inputs.append(
                tf.placeholder(tf.float32,
                               shape=(batch_size, self.vocab_size)))
            self.decoder_inputs.append(
                tf.placeholder(tf.float32,
                               shape=(batch_size, self.vocab_size)))
            self.target_weights.append(
                tf.placeholder(tf.float32, shape=(batch_size, )))

        # Decoder has one extra cell because it starts with the GO symbol,
        # and the targets are shifted by one.
        # Not sure this is actually useful, as it is always set to 0.
        # As this is inspired by TensorFlow seq2seq models, there might be
        # something dodgy in there.
        self.decoder_inputs.append(
            tf.placeholder(tf.float32, shape=(batch_size, self.vocab_size)))
        self.target_weights.append(np.ones((batch_size, )))

        # Targets used by the sequence loss must be integer indices.
        targets = [
            tf.cast(tf.argmax(i, 1), dtype=tf.int32)
            for i in self.decoder_inputs[1:]
        ]

        outputs, self.state = seq2seq.basic_rnn_seq2seq(
            self.encoder_inputs, self.decoder_inputs, lstm_cell)

        self.logits = [tf.nn.xw_plus_b(o, w, b) for o in outputs]
        self.loss = seq2seq.sequence_loss(
            self.logits[:self.sequence_length], targets,
            self.target_weights[:self.sequence_length], self.vocab_size)

        params = tf.trainable_variables()
        opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        gradients = tf.gradients(self.loss, params)
        clipped_gradients, self.gradient_norms = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.updates = opt.apply_gradients(zip(clipped_gradients, params),
                                           global_step=self.global_step)

        self.saver = tf.train.Saver(tf.all_variables())
コード例 #20
0
    def __init__(self,
                 is_training=False,
                 hidden_units=128,
                 num_layers=1,
                 input_sequence_len=20,
                 output_sequence_len=10,
                 num_input_symbols=20,
                 num_output_symbols=20,
                 weight_amplitude=0.08,
                 batch_size=32,
                 peep=False):

        self.encoder_inputs = []
        self.decoder_inputs = []

        for i in range(input_sequence_len):
            self.encoder_inputs.append(
                tf.placeholder(tf.float32,
                               shape=(None, num_input_symbols),
                               name="encoder_{0}".format(i)))

        for i in range(output_sequence_len + 1):
            self.decoder_inputs.append(
                tf.placeholder(tf.float32,
                               shape=(None, num_output_symbols),
                               name="decoder_{0}".format(i)))

        def random_uniform():
            return tf.random_uniform_initializer(-weight_amplitude,
                                                 weight_amplitude)

        if num_layers > 1:
            cells = [
                rnn_cell.LSTMCell(hidden_units,
                                  use_peepholes=peep,
                                  input_size=num_input_symbols,
                                  initializer=random_uniform())
            ]
            cells += [
                rnn_cell.LSTMCell(hidden_units,
                                  use_peepholes=peep,
                                  input_size=hidden_units,
                                  initializer=random_uniform())
                for _ in range(num_layers - 1)
            ]
            self.cell = rnn_cell.MultiRNNCell(cells)
        else:
            self.cell = rnn_cell.LSTMCell(hidden_units,
                                          use_peepholes=peep,
                                          initializer=random_uniform())

        self.w_softmax = tf.get_variable('w_softmax',
                                         shape=(hidden_units,
                                                num_output_symbols),
                                         initializer=random_uniform())
        self.b_softmax = tf.get_variable('b_softmax',
                                         shape=(num_output_symbols, ),
                                         initializer=random_uniform())

        # decoder_outputs is a list of tensors with output_sequence_len: [(batch_size x hidden_units)]
        decoder_outputs, _ = self._init_seq2seq(self.encoder_inputs,
                                                self.decoder_inputs,
                                                self.cell,
                                                feed_previous=not is_training)

        output_logits = [
            tf.matmul(decoder_output, self.w_softmax) + self.b_softmax
            for decoder_output in decoder_outputs
        ]
        self.output_probs = [tf.nn.softmax(logit) for logit in output_logits]

        # If this is a training model create the training operation and loss function
        if is_training:
            self.targets = self.decoder_inputs[1:]
            losses = [
                tf.nn.softmax_cross_entropy_with_logits(logit, target)
                for logit, target in zip(output_logits, self.targets)
            ]

            loss = tf.reduce_sum(tf.add_n(losses))
            self.cost = loss / output_sequence_len / batch_size
            self.learning_rate = tf.Variable(DEFAULT_LEARNING_RATE,
                                             trainable=False)

            train_vars = tf.trainable_variables()
            grads = tf.gradients(self.cost, train_vars)
            optimizer = tf.train.AdamOptimizer(self.learning_rate)

            self.train_op = optimizer.apply_gradients(zip(grads, train_vars))
コード例 #21
0
ファイル: model.py プロジェクト: lucaswiser/USF
    def __init__(self, config):
        sent_len = self.sent_len = config.sent_len
        word_len = config.word_len
        batch_size = config.batch_size
        vocab_size = config.vocab_size
        embed_size = config.embed_size
        keep_prob1 = config.keep_prob1
        keep_prob2 = config.keep_prob2
        num_layers1 = config.num_layers1
        num_layers2 = config.num_layers2
        state_size1 = config.state_size1
        state_size2 = config.state_size2

        self.input_data = tf.placeholder(tf.int32,
                                         [batch_size * sent_len, word_len])
        self.lengths = tf.placeholder(tf.int64, [batch_size])
        self.wordlengths = tf.placeholder(tf.int64, [batch_size * sent_len])
        self.targets = tf.placeholder(tf.float32, [batch_size, 1])

        # Get embedding layer which requires CPU
        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, embed_size])
            inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        #LSTM 1 -> Encode the characters of every tok into a fixed dense representation
        with tf.variable_scope("rnn1", reuse=None):
            lstm_cell_1 = rnn_cell.LSTMCell(state_size1, input_size=embed_size)
            lstm_back_cell_1 = rnn_cell.LSTMCell(state_size1,
                                                 input_size=embed_size)
            if keep_prob1 < 1:
                #Only on the inputs for rnn1. That way we don't dropout twice
                lstm_cell_1 = rnn_cell.DropoutWrapper(
                    lstm_cell_1, input_keep_prob=keep_prob1)
                lstm_back_cell_1 = rnn_cell.DropoutWrapper(
                    lstm_back_cell_1, input_keep_prob=keep_prob1)

            cell_1 = rnn_cell.MultiRNNCell([lstm_cell_1] * num_layers1)
            backcell_1 = rnn_cell.MultiRNNCell([lstm_back_cell_1] *
                                               num_layers1)

            rnn_splits = [
                tf.squeeze(input_, [1])
                for input_ in tf.split(1, word_len, inputs)
            ]

            # Run the bidirectional rnn
            outputs1, last_fw_state1, last_bw_state1 = rnn.bidirectional_rnn(
                cell_1,
                backcell_1,
                rnn_splits,
                sequence_length=self.wordlengths,
                dtype=tf.float32)

        #tok_embeds = outputs1[-1]
        tok_embeds = tf.concat(1, [last_fw_state1, last_bw_state1])

        with tf.variable_scope("rnn2", reuse=None):
            lstm_cell_2 = rnn_cell.LSTMCell(state_size2,
                                            input_size=state_size1 * 4)
            lstm_back_cell_2 = rnn_cell.LSTMCell(state_size2,
                                                 input_size=state_size1 * 4)
            # Add dropout. NOTE: this adds to the input and output layers. Remember that the input layer
            # is the output from the conv net, so this also adds dropout to the output of the conv net
            if keep_prob2 < 1:
                lstm_cell_2 = rnn_cell.DropoutWrapper(
                    lstm_cell_2,
                    input_keep_prob=keep_prob2,
                    output_keep_prob=keep_prob2)
                lstm_back_cell_2 = rnn_cell.DropoutWrapper(
                    lstm_back_cell_2,
                    input_keep_prob=keep_prob2,
                    output_keep_prob=keep_prob2)

            cell_2 = rnn_cell.MultiRNNCell([lstm_cell_2] * num_layers2)
            backcell_2 = rnn_cell.MultiRNNCell([lstm_back_cell_2] *
                                               num_layers2)

            # The rnn synthesis of the tokens is size [batch_size*sent_len, state_size*2]
            # we want it to be a list of sent_len of [batch_size, state_size*2]
            # We partition as [0,1,2,...n,0,1,2,...n...]
            rnn_inputs2 = tf.dynamic_partition(
                tok_embeds,
                list(range(sent_len)) * batch_size, sent_len)

            #Sent level rnn
            outputs2, last_fw_state2, last_bw_state2 = rnn.bidirectional_rnn(
                cell_2,
                backcell_2,
                rnn_inputs2,
                sequence_length=self.lengths,
                dtype=tf.float32)
            #sent_embed = tf.reshape(tf.concat(1, [last_fw_state2, last_bw_state2]), [batch_size, state_size2*4])
            sent_embed = tf.concat(1, [last_fw_state2, last_bw_state2])

        with tf.variable_scope("linear", reuse=None):
            w = tf.get_variable("w", [state_size2 * 4, 1])
            b = tf.get_variable("b", [1])
            raw_logits = tf.matmul(sent_embed, w) + b
        self.probabilities = tf.sigmoid(raw_logits)
        self.cost = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(raw_logits, self.targets))

        #Calculate gradients and propagate
        #Aggregation method 2 is really important for rnn per the tensorflow issues list
        tvars = tf.trainable_variables()
        self.lr = tf.Variable(0.0, trainable=False)  #Assign to overwrite
        optimizer = tf.train.AdamOptimizer()
        grads, _vars = zip(*optimizer.compute_gradients(
            self.cost, tvars, aggregation_method=2))
        grads, self.grad_norm = tf.clip_by_global_norm(grads,
                                                       config.max_grad_norm)
        self.train_op = optimizer.apply_gradients(zip(grads, _vars))