Example #1
0
    def __init__(self, is_training, glove_word_vectors, vocabulary, config):
        self.size = config.hidden_size
        self.config = config
        self.is_training = is_training
        self.word_vec_size = config.word_vec_size
        vocab_size = config.vocab_size
        self.glove_word_vectors = glove_word_vectors
        self.vocabulary = vocabulary

        # Slightly better results can be obtained with forget gate biases
        # initialized to 1 but the hyperparameters of the model would need to be
        # different than reported in the paper.

        # TODO: these might be able to be improved if used the LSTMCell which has other features
        # to improve performance, but then need the sentence_length
        with tf.variable_scope("LeftLSTM"):
            self.left_lstm_cell = rnn_cell.BasicLSTMCell(self.size,
                                                         forget_bias=1.0)
        with tf.variable_scope("RightLSTM"):
            self.right_lstm_cell = rnn_cell.BasicLSTMCell(self.size,
                                                          forget_bias=1.0)
        if is_training and config.keep_prob < 1:
            with tf.variable_scope("LeftLSTM"):
                self.left_lstm_cell = rnn_cell.DropoutWrapper(
                    self.left_lstm_cell, output_keep_prob=config.keep_prob)
            with tf.variable_scope("RightLSTM"):
                self.right_lstm_cell = rnn_cell.DropoutWrapper(
                    self.right_lstm_cell, output_keep_prob=config.keep_prob)

        with tf.variable_scope("LeftLSTM"):
            self.left_lstm_cell = rnn_cell.MultiRNNCell([self.left_lstm_cell] *
                                                        config.num_layers)
        with tf.variable_scope("RightLSTM"):
            self.right_lstm_cell = rnn_cell.MultiRNNCell(
                [self.right_lstm_cell] * config.num_layers)
Example #2
0
    def __init__(self, rnn_size, rnn_layer, batch_size, input_embedding_size, dim_image, dim_hidden, max_words_q, vocabulary_size, drop_out_rate):
        self.rnn_size = rnn_size
        self.rnn_layer = rnn_layer
        self.batch_size = batch_size
        self.input_embedding_size = input_embedding_size
        self.dim_image = dim_image
        self.dim_hidden = dim_hidden
        self.max_words_q = max_words_q
        self.vocabulary_size = vocabulary_size    
        self.drop_out_rate = drop_out_rate

        # 问题embedding
        self.embed_ques_W = tf.Variable(tf.random_uniform([self.vocabulary_size, self.input_embedding_size], -0.08, 0.08), name='embed_ques_W')

        # RNN编码器
        self.lstm_1 = rnn_cell.LSTMCell(rnn_size, input_embedding_size, use_peepholes=True)
        self.lstm_dropout_1 = rnn_cell.DropoutWrapper(self.lstm_1, output_keep_prob = 1 - self.drop_out_rate)
        self.lstm_2 = rnn_cell.LSTMCell(rnn_size, rnn_size, use_peepholes=True)
        self.lstm_dropout_2 = rnn_cell.DropoutWrapper(self.lstm_2, output_keep_prob = 1 - self.drop_out_rate)
        self.stacked_lstm = rnn_cell.MultiRNNCell([self.lstm_dropout_1, self.lstm_dropout_2])

        # 状态embedding
        self.embed_state_W = tf.Variable(tf.random_uniform([2*rnn_size*rnn_layer, self.dim_hidden], -0.08,0.08),name='embed_state_W')
        self.embed_state_b = tf.Variable(tf.random_uniform([self.dim_hidden], -0.08, 0.08), name='embed_state_b')
        # 图像embedding
        self.embed_image_W = tf.Variable(tf.random_uniform([dim_image, self.dim_hidden], -0.08, 0.08), name='embed_image_W')
        self.embed_image_b = tf.Variable(tf.random_uniform([dim_hidden], -0.08, 0.08), name='embed_image_b')
        # 打分embedding
        self.embed_scor_W = tf.Variable(tf.random_uniform([dim_hidden, num_output], -0.08, 0.08), name='embed_scor_W')
        self.embed_scor_b = tf.Variable(tf.random_uniform([num_output], -0.08, 0.08), name='embed_scor_b')
Example #3
0
def build_lm_multicell_rnn(num_layers,
                           hidden_size,
                           word_proj_size,
                           use_lstm=True,
                           hidden_projection=None,
                           input_feeding=False,
                           dropout=0.0):

    if use_lstm:
        print("I'm building the model with LSTM cells")
        cell_class = rnn_cell.LSTMCell

    else:
        print("I'm building the model with GRU cells")
        if hidden_projection is not None:
            print("I'm ignoring the projection size for GRUs.")
            hidden_projection = None
        cell_class = GRUCell

    initializer = tf.random_uniform_initializer(minval=-0.1,
                                                maxval=0.1,
                                                seed=1234)

    if input_feeding:
        lm_cell0 = cell_class(num_units=hidden_size,
                              input_size=word_proj_size + hidden_size,
                              initializer=initializer,
                              num_proj=hidden_projection)

    else:
        lm_cell0 = cell_class(num_units=hidden_size,
                              input_size=hidden_size,
                              initializer=initializer,
                              num_proj=hidden_projection)

    lm_cell0 = rnn_cell.DropoutWrapper(lm_cell0,
                                       output_keep_prob=1.0 - dropout)

    if num_layers > 1:
        hidden_input = hidden_size

        if hidden_projection is not None:
            hidden_input = hidden_projection

        lm_cell1 = cell_class(num_units=hidden_size,
                              input_size=hidden_input,
                              initializer=initializer,
                              num_proj=hidden_projection)

        lm_cell1 = rnn_cell.DropoutWrapper(lm_cell1,
                                           output_keep_prob=1.0 - dropout)

        lm_rnncell = rnn_cell.MultiRNNCell([lm_cell0] + [lm_cell1] *
                                           (num_layers - 1))

    else:
        lm_rnncell = rnn_cell.MultiRNNCell([lm_cell0])

    return lm_rnncell
Example #4
0
def build_nmt_multicell_rnn(num_layers_encoder,
                            num_layers_decoder,
                            encoder_size,
                            decoder_size,
                            source_proj_size,
                            use_lstm=True,
                            input_feeding=True,
                            dropout=0.0):

    if use_lstm:
        print("I'm building the model with LSTM cells")
        cell_class = rnn_cell.LSTMCell
    else:
        print("I'm building the model with GRU cells")
        cell_class = GRUCell

    initializer = tf.random_uniform_initializer(minval=-0.1,
                                                maxval=0.1,
                                                seed=1234)

    encoder_cell = cell_class(num_units=encoder_size,
                              input_size=source_proj_size,
                              initializer=initializer)

    if input_feeding:
        decoder_cell0 = cell_class(num_units=decoder_size,
                                   input_size=decoder_size * 2,
                                   initializer=initializer)
    else:
        decoder_cell0 = cell_class(num_units=decoder_size,
                                   input_size=decoder_size,
                                   initializer=initializer)

    # if dropout > 0.0:  # if dropout is 0.0, it is turned off
    encoder_cell = rnn_cell.DropoutWrapper(encoder_cell,
                                           output_keep_prob=1.0 - dropout)
    encoder_rnncell = rnn_cell.MultiRNNCell([encoder_cell] *
                                            num_layers_encoder)

    decoder_cell0 = rnn_cell.DropoutWrapper(decoder_cell0,
                                            output_keep_prob=1.0 - dropout)
    if num_layers_decoder > 1:
        decoder_cell1 = cell_class(num_units=decoder_size,
                                   input_size=decoder_size,
                                   initializer=initializer)
        decoder_cell1 = rnn_cell.DropoutWrapper(decoder_cell1,
                                                output_keep_prob=1.0 - dropout)
        decoder_rnncell = rnn_cell.MultiRNNCell([decoder_cell0] +
                                                [decoder_cell1] *
                                                (num_layers_decoder - 1))

    else:

        decoder_rnncell = rnn_cell.MultiRNNCell([decoder_cell0])

    return encoder_rnncell, decoder_rnncell
Example #5
0
    def __init__(self,
                 dim_image,
                 n_words,
                 dim_hidden,
                 batch_size,
                 n_lstm_steps,
                 drop_out_rate,
                 bias_init_vector=None):
        self.dim_image = dim_image
        self.n_words = n_words
        self.dim_hidden = dim_hidden
        self.batch_size = batch_size
        self.n_lstm_steps = n_lstm_steps
        self.drop_out_rate = drop_out_rate

        with tf.device("/cpu:0"):
            self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_hidden],
                                                      -0.1, 0.1),
                                    name='Wemb')

        self.lstm3 = rnn_cell.LSTMCell(self.dim_hidden,
                                       2 * self.dim_hidden,
                                       use_peepholes=True)
        self.lstm3_dropout = rnn_cell.DropoutWrapper(self.lstm3,
                                                     output_keep_prob=1 -
                                                     self.drop_out_rate)

        self.encode_image_W = tf.Variable(tf.random_uniform(
            [dim_image, dim_hidden], -0.1, 0.1),
                                          name='encode_image_W')
        self.encode_image_b = tf.Variable(tf.zeros([dim_hidden]),
                                          name='encode_image_b')
        self.embed_att_w = tf.Variable(tf.random_uniform([dim_hidden, 1], -0.1,
                                                         0.1),
                                       name='embed_att_w')
        self.embed_att_Wa = tf.Variable(tf.random_uniform(
            [dim_hidden, dim_hidden], -0.1, 0.1),
                                        name='embed_att_Wa')
        self.embed_att_Ua = tf.Variable(tf.random_uniform(
            [dim_hidden, dim_hidden], -0.1, 0.1),
                                        name='embed_att_Ua')
        self.embed_att_ba = tf.Variable(tf.zeros([dim_hidden]),
                                        name='embed_att_ba')

        self.embed_word_W = tf.Variable(tf.random_uniform(
            [dim_hidden, n_words], -0.1, 0.1),
                                        name='embed_word_W')
        if bias_init_vector is not None:
            self.embed_word_b = tf.Variable(bias_init_vector.astype(
                np.float32),
                                            name='embed_word_b')
        else:
            self.embed_word_b = tf.Variable(tf.zeros([n_words]),
                                            name='embed_word_b')

        self.embed_nn_Wp = tf.Variable(tf.random_uniform(
            [3 * dim_hidden, dim_hidden], -0.1, 0.1),
                                       name='embed_nn_Wp')
        self.embed_nn_bp = tf.Variable(tf.zeros([dim_hidden]),
                                       name='embed_nn_bp')
Example #6
0
    def testDropout(self):
        cell = Plus1RNNCell()
        full_dropout_cell = rnn_cell.DropoutWrapper(cell,
                                                    input_keep_prob=1e-12,
                                                    seed=0)
        batch_size = 2
        inputs = [tf.placeholder(tf.float32, shape=(batch_size, 5))] * 10
        with tf.variable_scope("share_scope"):
            outputs, states = rnn.rnn(cell, inputs, dtype=tf.float32)
        with tf.variable_scope("drop_scope"):
            dropped_outputs, _ = rnn.rnn(full_dropout_cell,
                                         inputs,
                                         dtype=tf.float32)
        self.assertEqual(len(outputs), len(inputs))
        for out, inp in zip(outputs, inputs):
            self.assertEqual(out.get_shape().as_list(),
                             inp.get_shape().as_list())
            self.assertEqual(out.dtype, inp.dtype)

        with self.test_session(use_gpu=False) as sess:
            input_value = np.random.randn(batch_size, 5)
            values = sess.run(outputs + [states[-1]],
                              feed_dict={inputs[0]: input_value})
            full_dropout_values = sess.run(dropped_outputs,
                                           feed_dict={inputs[0]: input_value})

            for v in values[:-1]:
                self.assertAllClose(v, input_value + 1.0)
            for d_v in full_dropout_values[:
                                           -1]:  # Add 1.0 to dropped_out (all zeros)
                self.assertAllClose(d_v, np.ones_like(input_value))
Example #7
0
    def __init__(self,
                 vocab_size,
                 size=256,
                 depth=2,
                 learning_rate=1e-4,
                 batch_size=32,
                 keep_prob=0.1,
                 num_steps=100,
                 checkpoint_dir="checkpoint",
                 forward_only=False):
        """Initialize the parameters for an Deep Bidirectional LSTM model.
    
    Args:
      vocab_size: int, The dimensionality of the input vocab
      size: int, The dimensionality of the inputs into the Deep LSTM cell [32, 64, 256]
      learning_rate: float, [1e-3, 5e-4, 1e-4, 5e-5]
      batch_size: int, The size of a batch [16, 32]
      keep_prob: unit Tensor or float between 0 and 1 [0.0, 0.1, 0.2]
      num_steps: int, The max time unit [100]
    """
        super(DeepBiLSTM, self).__init__()

        self.vocab_size = int(vocab_size)
        self.size = int(size)
        self.depth = int(depth)
        self.learning_rate = float(learning_rate)
        self.batch_size = int(batch_size)
        self.keep_prob = float(keep_prob)
        self.num_steps = int(seq_length)

        self.inputs = tf.placeholder(tf.int32,
                                     [self.batch_size, self.num_steps])
        self.input_lengths = tf.placeholder(tf.int64, [self.batch_size])

        with tf.device("/cpu:0"):
            self.emb = tf.Variable(tf.truncated_normal(
                [self.vocab_size, self.size], -0.1, 0.1),
                                   name='emb')
            import ipdb
            ipdb.set_trace()
            self.embed_inputs = tf.nn.embedding_lookup(
                self.emb, tf.transpose(self.inputs))

        self.cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
        self.stacked_cell = rnn_cell.MultiRNNCell([self.cell] * depth)

        self.initial_state = self.stacked_cell.zero_state(
            batch_size, tf.float32)

        if not forward_only and self.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(lstm_cell,
                                                output_keep_prob=keep_prob)

        self.outputs, self.states = rnn.rnn(self.stacked_cell,
                                            tf.unpack(self.embed_inputs),
                                            dtype=tf.float32,
                                            sequence_length=self.input_lengths,
                                            initial_state=self.initial_state)

        output = tf.reduce_sum(tf.pack(self.output), 0)
Example #8
0
        def _shared_layer(input_data, config):
            """Build the model to decoding

            Args:
                input_data = size batch_size X num_steps X embedding size

            Returns:
                output units
            """
            cell = rnn_cell.BasicLSTMCell(config.encoder_size)

            inputs = [
                tf.squeeze(input_, [1])
                for input_ in tf.split(1, config.num_steps, input_data)
            ]

            if is_training and config.keep_prob < 1:
                cell = rnn_cell.DropoutWrapper(
                    cell, output_keep_prob=config.keep_prob)

            cell = rnn_cell.MultiRNNCell([cell] * config.num_shared_layers)

            initial_state = cell.zero_state(config.batch_size, tf.float32)

            encoder_outputs, encoder_states = rnn.rnn(
                cell, inputs, initial_state=initial_state, scope="encoder_rnn")

            return encoder_outputs, initial_state
Example #9
0
    def prediction(self):
        fw_cell = rnn_cell.LSTMCell(self._num_hidden)
        fw_cell = rnn_cell.DropoutWrapper(fw_cell, output_keep_prob=self.dropout)
        bw_cell = rnn_cell.LSTMCell(self._num_hidden)
        bw_cell = rnn_cell.DropoutWrapper(bw_cell, output_keep_prob=self.dropout)

        if self._num_layers > 1:
            fw_cell = rnn_cell.MultiRNNCell([fw_cell] * self._num_layers)
            bw_cell = rnn_cell.MultiRNNCell([bw_cell] * self._num_layers)

        output, _, _ = rnn.bidirectional_rnn(fw_cell, bw_cell, tf.unpack(tf.transpose(self.data, perm=[1, 0, 2])), dtype=tf.float32, sequence_length=self.length)
        max_length = int(self.target.get_shape()[1])
        num_classes = int(self.target.get_shape()[2])
        weight, bias = self._weight_and_bias(2*self._num_hidden, num_classes)
        output = tf.reshape(tf.transpose(tf.pack(output), perm=[1, 0, 2]), [-1, 2*self._num_hidden])
        prediction = tf.nn.softmax(tf.matmul(output, weight) + bias)
        prediction = tf.reshape(prediction, [-1, max_length, num_classes])
        return prediction
Example #10
0
    def BiRNN(self, _X, _istate_fw, _istate_bw, _weights, _biases):

        # input shape: (batch_size, n_steps, n_input)
        _X = tf.transpose(_X, [1, 0, 2])  # permute n_steps and batch_size
        # Reshape to prepare input to hidden activation
        # (n_steps*batch_size, n_input)
        _X = tf.reshape(_X, [-1, self.config.num_input])
        # Linear activation
        _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden']

        # Forward direction cell
        single_fw_cell = rnn_cell.BasicLSTMCell(self.config.num_hidden)
        single_fw_cell = rnn_cell.DropoutWrapper(single_fw_cell,
                                                 self.config.input_keep_prob,
                                                 self.config.output_keep_prob,
                                                 0.8)
        rnn_fw_cell = rnn_cell.MultiRNNCell([single_fw_cell] *
                                            self.config.model_depth)
        # Backward direction cell
        single_bw_cell = rnn_cell.BasicLSTMCell(self.config.num_hidden)
        single_bw_cell = rnn_cell.DropoutWrapper(single_bw_cell,
                                                 self.config.input_keep_prob,
                                                 self.config.output_keep_prob)
        rnn_bw_cell = rnn_cell.MultiRNNCell([single_bw_cell] *
                                            self.config.model_depth)

        # Split data because rnn cell needs a list of inputs for the RNN inner
        # loop
        # n_steps * (batch_size, n_hidden)
        _X = tf.split(0, self.config.num_steps, _X)

        # Get lstm cell output
        outputs, final_fw, final_bw = rnn.bidirectional_rnn(
            rnn_fw_cell,
            rnn_bw_cell,
            _X,
            initial_state_fw=_istate_fw,
            initial_state_bw=_istate_bw)
        # Linear activation
        return [
            tf.matmul(output, _weights['out']) + _biases['out']
            for output in outputs
        ], final_fw, final_bw
    def __init__(self, config, is_training):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size

        lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        if is_training and config.keep_prob < 1:
            cell = rnn_cell.DropoutWrapper(cell,
                                           output_keep_prob=config.keep_prob)

        self.cell = cell

        self.input_data = tf.placeholder(dtype=tf.float32,
                                         shape=[None, num_steps, 1])
        self.target_data = tf.placeholder(dtype=tf.float32,
                                          shape=[None, num_steps, 1])
        self.initial_state = cell.zero_state(batch_size=config.batch_size,
                                             dtype=tf.float32)

        inputs = tf.split(1, num_steps, self.input_data)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        with tf.variable_scope('rnnvm'):
            output_w = tf.get_variable("output_w", [size, 1])
            output_b = tf.get_variable("output_b", [1])

        outputs, states = seq2seq.rnn_decoder(inputs,
                                              self.initial_state,
                                              cell,
                                              scope='rnnvm')

        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        output = tf.nn.xw_plus_b(output, output_w, output_b)

        entropy = tf.nn.sigmoid_cross_entropy_with_logits(
            output,
            tf.reshape(self.target_data, shape=[num_steps * batch_size, 1]))

        self.cost = cost = tf.reduce_mean(entropy)
        self.final_state = states[-1]

        if not is_training:
            return

        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Example #12
0
        def _chunk_private(encoder_units, pos_prediction, config):
            """Decode model for chunks

            Args:
                encoder_units - these are the encoder units:
                [batch_size X encoder_size] with the one the pos prediction
                pos_prediction:
                must be the same size as the encoder_size

            returns:
                logits
            """
            # concatenate the encoder_units and the pos_prediction

            pos_prediction = tf.reshape(
                pos_prediction, [batch_size, num_steps, pos_embedding_size])
            chunk_inputs = tf.concat(2, [pos_prediction, encoder_units])

            with tf.variable_scope("chunk_decoder"):
                cell = rnn_cell.BasicLSTMCell(config.chunk_decoder_size,
                                              forget_bias=1.0)

                if is_training and config.keep_prob < 1:
                    cell = rnn_cell.DropoutWrapper(
                        cell, output_keep_prob=config.keep_prob)

                initial_state = cell.zero_state(config.batch_size, tf.float32)

                # this function puts the 3d tensor into a 2d tensor: batch_size x input size
                inputs = [
                    tf.squeeze(input_, [1])
                    for input_ in tf.split(1, config.num_steps, chunk_inputs)
                ]

                decoder_outputs, decoder_states = rnn.rnn(
                    cell,
                    inputs,
                    initial_state=initial_state,
                    scope="chunk_rnn")

                output = tf.reshape(tf.concat(1, decoder_outputs),
                                    [-1, config.chunk_decoder_size])

                softmax_w = tf.get_variable(
                    "softmax_w",
                    [config.chunk_decoder_size, config.num_chunk_tags])
                softmax_b = tf.get_variable("softmax_b",
                                            [config.num_chunk_tags])
                logits = tf.matmul(output, softmax_w) + softmax_b

            return logits, decoder_states
Example #13
0
    def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size

        self._input_data = tf.placeholder(tf.float32, [batch_size, num_steps])
        self._targets = tf.placeholder(tf.float32, [batch_size, num_steps])

        lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
        if is_training and config.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        iw = tf.get_variable("input_w", [1, size])
        ib = tf.get_variable("input_b", [size])
        inputs = [
            tf.nn.xw_plus_b(i_, iw, ib)
            for i_ in tf.split(1, num_steps, self._input_data)
        ]
        if is_training and config.keep_prob < 1:
            inputs = [
                tf.nn.dropout(input_, config.keep_prob) for input_ in inputs
            ]

        outputs, states = rnn.rnn(cell,
                                  inputs,
                                  initial_state=self._initial_state)
        rnn_output = tf.reshape(tf.concat(1, outputs), [-1, size])

        self._output = output = tf.nn.xw_plus_b(
            rnn_output, tf.get_variable("out_w", [size, 1]),
            tf.get_variable("out_b", [1]))

        self._cost = cost = tf.reduce_mean(
            tf.square(output - tf.reshape(self._targets, [-1])))
        self._final_state = states[-1]

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        #optimizer = tf.train.GradientDescentOptimizer(self.lr)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
Example #14
0
    def _testDoubleInputWithDropoutAndDynamicCalculation(self, use_gpu):
        """Smoke test for using LSTM with doubles, dropout, dynamic calculation."""

        num_units = 3
        input_size = 5
        batch_size = 2
        num_proj = 4
        num_proj_shards = 4
        num_unit_shards = 2
        with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
            sequence_length = tf.placeholder(tf.int64)
            initializer = tf.random_uniform_initializer(-0.01,
                                                        0.01,
                                                        seed=self._seed)
            inputs = 10 * [tf.placeholder(tf.float64)]

            cell = rnn_cell.LSTMCell(num_units,
                                     input_size=input_size,
                                     use_peepholes=True,
                                     num_proj=num_proj,
                                     num_unit_shards=num_unit_shards,
                                     num_proj_shards=num_proj_shards,
                                     initializer=initializer)
            dropout_cell = rnn_cell.DropoutWrapper(cell, 0.5, seed=0)

            outputs, states = rnn.rnn(dropout_cell,
                                      inputs,
                                      sequence_length=sequence_length,
                                      initial_state=cell.zero_state(
                                          batch_size, tf.float64))

            self.assertEqual(len(outputs), len(inputs))
            self.assertEqual(len(outputs), len(states))

            tf.initialize_all_variables().run(
                feed_dict={sequence_length: [2, 3]})
            input_value = np.asarray(np.random.randn(batch_size, input_size),
                                     dtype=np.float64)
            values = sess.run(outputs,
                              feed_dict={
                                  inputs[0]: input_value,
                                  sequence_length: [2, 3]
                              })
            state_values = sess.run(states,
                                    feed_dict={
                                        inputs[0]: input_value,
                                        sequence_length: [2, 3]
                                    })
            self.assertEqual(values[0].dtype, input_value.dtype)
            self.assertEqual(state_values[0].dtype, input_value.dtype)
Example #15
0
        def create_cell(input_size):
            if cell_type == "vanilla":
                cell_class = rnn_cell.BasicRNNCell
            elif cell_type == "gru":
                cell_class = rnn_cell.BasicGRUCell
            elif cell_type == "lstm":
                cell_class = rnn_cell.BasicLSTMCell
            else:
                raise Exception("Invalid cell type: {}".format(cell_type))

            cell = cell_class(hidden_size, input_size = input_size)
            if training:
                return rnn_cell.DropoutWrapper(cell, output_keep_prob = dropout_prob)
            else:
                return cell
Example #16
0
 def prediction(self):
     # Recurrent network.
     network = rnn_cell.GRUCell(self._num_hidden)
     network = rnn_cell.DropoutWrapper(
         network, output_keep_prob=self.dropout)
     network = rnn_cell.MultiRNNCell([network] * self._num_layers)
     output, _ = rnn.dynamic_rnn(network, data, dtype=tf.float32)
     # Softmax layer.
     max_length = int(self.target.get_shape()[1])
     num_classes = int(self.target.get_shape()[2])
     weight, bias = self._weight_and_bias(self._num_hidden, num_classes)
     # Flatten to apply same weights to all time steps.
     output = tf.reshape(output, [-1, self._num_hidden])
     prediction = tf.nn.softmax(tf.matmul(output, weight) + bias)
     prediction = tf.reshape(prediction, [-1, max_length, num_classes])
     return prediction
Example #17
0
    def __init__(self, is_training, config):  # 类似于C++的“构造函数”
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size

        self._input_data = tf.placeholder(tf.float32, [batch_size, num_steps])  # 输入batch_size×num_steps个数据,输出个数相同
        self._targets = tf.placeholder(tf.float32, [batch_size, num_steps])  # placeholder:训练时需要传进真实数据的参数

        # lstm_cell = rnn_cell.BasicRNNCell(size)  # 封装好的普通RNN单元
        lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)  # 封装好的LSTM单元
        if is_training and config.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)  # 多个RNN单元

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        #  以下是RNN_LSTM算法核心:最简单的线性函数y=wx+b(太简单了,精度不够?)
        iw = tf.get_variable("input_w", [1, size])
        ib = tf.get_variable("input_b", [size])
        inputs = [tf.nn.xw_plus_b(i_, iw, ib) for i_ in tf.split(1, num_steps, self._input_data)] # split沿列均匀分割成num_steps个张量(矩阵)
        if is_training and config.keep_prob < 1:
            inputs = [tf.nn.dropout(input_, config.keep_prob) for input_ in inputs]

        outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state)
        # c_out = tf.concat(1, outputs)  # outputs:p个m×n; c_out:m × n*p
        # c_out = tf.concat(0, outputs)  # outputs:p个m×n; c_out:m*p × n
        rnn_output = tf.reshape(tf.concat(1, outputs), [-1, size])  # [-1, size]:保持总元素个数不变,size=1×200表示一个数的权重
        # rnn_output:所得到的是n×200,代表有n个输出(对应n个输入),每个输出由1×200的一维向量表示

        # output:神经元计算的最终输出结果(即我们需要的结果),输出个数与输入个数相同
        self._output = output = tf.nn.xw_plus_b(rnn_output,
                                                tf.get_variable("out_w", [size, 1]),
                                                tf.get_variable("out_b", [1]))

        self._cost = cost = tf.sqrt(tf.reduce_mean((output - tf.reshape(self._targets, [-1]))**2))  # 均方根误差RMSE:平均单个数据的实际值与预测值之间的偏差
        self._cost_MAPE = cost_MAPE = tf.reduce_mean(tf.abs(output - tf.reshape(self._targets, [-1])) / tf.reshape(self._targets, [-1]))
        self._final_state = states

        if not is_training:  # 验证/测试或着真实预测时,则不更新权重(即不执行下面的语句)
            return
        # 训练网络,反向传播,更新权重
        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self.lr)
        # optimizer = tf.train.AdamOptimizer(self.lr)  # 反向传播,更新权重
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
Example #18
0
 def testDropoutWrapper(self):
     with self.test_session() as sess:
         with tf.variable_scope("root",
                                initializer=tf.constant_initializer(0.5)):
             x = tf.zeros([1, 3])
             m = tf.zeros([1, 3])
             keep = tf.zeros([]) + 1
             g, new_m = rnn_cell.DropoutWrapper(rnn_cell.GRUCell(3), keep,
                                                keep)(x, m)
             sess.run([tf.variables.initialize_all_variables()])
             res = sess.run(
                 [g, new_m], {
                     x.name: np.array([[1., 1., 1.]]),
                     m.name: np.array([[0.1, 0.1, 0.1]])
                 })
             self.assertEqual(res[1].shape, (1, 3))
             # The numbers in results were not calculated, this is just a smoke test.
             self.assertAllClose(res[0], [[0.154605, 0.154605, 0.154605]])
Example #19
0
    def __init__(self, is_training):

        # Need to define self._train_op
        self.batch_size = batch_size = 50
        self.num_steps = num_steps = 1000
        self.hidden_size = 5000
        self.keep_prob = 0.5
        #self.num_layers = 2
        self._input_data = tf.placeholder(tf.int8,
                                          [256, batch_size, num_steps])
        self._targets = tf.placeholder(tf.int8, [256, batch_size, num_steps])

        logging = tf.logging

        lstm_cell = rnn_cell.BasicLSTMCell(self.hidden_size, forget_bias=0.0)
        if is_training and self.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=self.keep_prob)
            #self._cell = cell = rnn_cell.MultiRNNCell([lstm_cell] * self.num_layers)
            self._cell = cell = lstm_cell
Example #20
0
        def _pos_private(encoder_units, config):
            """Decode model for pos

            Args:
                encoder_units - these are the encoder units
                num_pos - the number of pos tags there are (output units)

            returns:
                logits
            """
            with tf.variable_scope("pos_decoder"):
                cell = rnn_cell.BasicLSTMCell(config.pos_decoder_size,
                                              forget_bias=1.0)

                if is_training and config.keep_prob < 1:
                    cell = rnn_cell.DropoutWrapper(
                        cell, output_keep_prob=config.keep_prob)

                initial_state = cell.zero_state(config.batch_size, tf.float32)

                # puts it into batch_size X input_size
                inputs = [
                    tf.squeeze(input_, [1])
                    for input_ in tf.split(1, config.num_steps, encoder_units)
                ]

                decoder_outputs, decoder_states = rnn.rnn(
                    cell, inputs, initial_state=initial_state, scope="pos_rnn")

                output = tf.reshape(tf.concat(1, decoder_outputs),
                                    [-1, config.pos_decoder_size])

                softmax_w = tf.get_variable(
                    "softmax_w",
                    [config.pos_decoder_size, config.num_pos_tags])
                softmax_b = tf.get_variable("softmax_b", [config.num_pos_tags])
                logits = tf.matmul(output, softmax_w) + softmax_b

            return logits, decoder_states
Example #21
0
    def __init__(self,
                 size=256,
                 depth=3,
                 batch_size=32,
                 keep_prob=0.1,
                 max_nsteps=1000,
                 checkpoint_dir="checkpoint",
                 forward_only=False):
        """Initialize the parameters for an Deep LSTM model.
    
    Args:
      size: int, The dimensionality of the inputs into the Deep LSTM cell [32, 64, 256]
      learning_rate: float, [1e-3, 5e-4, 1e-4, 5e-5]
      batch_size: int, The size of a batch [16, 32]
      keep_prob: unit Tensor or float between 0 and 1 [0.0, 0.1, 0.2]
      max_nsteps: int, The max time unit [1000]
    """
        super(DeepLSTM, self).__init__()

        self.size = int(size)
        self.depth = int(depth)
        self.batch_size = int(batch_size)
        self.output_size = self.depth * self.size
        self.keep_prob = float(keep_prob)
        self.max_nsteps = int(max_nsteps)
        self.checkpoint_dir = checkpoint_dir

        start = time.clock()
        print(" [*] Building Deep LSTM...")
        self.cell = LSTMCell(size, forget_bias=0.0)
        if not forward_only and self.keep_prob < 1:
            self.cell = rnn_cell.DropoutWrapper(self.cell,
                                                output_keep_prob=keep_prob)
        self.stacked_cell = MultiRNNCellWithSkipConn([self.cell] * depth)

        self.initial_state = self.stacked_cell.zero_state(
            batch_size, tf.float32)
label_input_size = sentence_length + 1

train1_input_size = sentence_length
train2_input_size = train_input_size - train1_input_size

graph = tf.Graph()
with graph.as_default():

    # Dropout
    keep_prob = tf.placeholder(tf.float32)

    # Parameters:
    # Definition of the LSTM cells
    lstm = rnn_cell.BasicLSTMCell(num_nodes)
    if keep_prob < 1:
        lstm = rnn_cell.DropoutWrapper(lstm, output_keep_prob=keep_prob)
    stacked_lstm = rnn_cell.MultiRNNCell([lstm] * number_of_layers)

    # Variables saving state across unrollings.
    saved_output = tf.Variable(tf.zeros([batch_size, num_nodes]),
                               trainable=False)
    saved_state = tf.Variable(tf.zeros(
        [batch_size, num_nodes * (2 * number_of_layers)]),
                              trainable=False)

    # Embedding variables
    embeddings = tf.Variable(
        tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
    x_embeddings = tf.Variable(
        tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
Example #23
0
    def __init__(self, is_training, config):
        self._batch_size = batch_size = config.batch_size
        self._min_lr = config.min_lr
        self.num_skills = num_skills = config.num_skills
        self.hidden_size = config.hidden_size
        size = config.hidden_size
        input_size = num_skills*2

        inputs = self._input_data = tf.placeholder(tf.int32, [batch_size])
        self._target_id = target_id = tf.placeholder(tf.int32, [batch_size])
        self._target_correctness = target_correctness = tf.placeholder(tf.float32, [batch_size])

        hidden1 = rnn_cell.LSTMCell(size, input_size)
        #hidden2 = rnn_cell.LSTMCell(size, size)
        #hidden3 = rnn_cell.LSTMCell(size, size)

        #add dropout layer between hidden layers
        if is_training and config.keep_prob < 1:
            hidden1 = rnn_cell.DropoutWrapper(hidden1, output_keep_prob=config.keep_prob)
            #hidden2 = rnn_cell.DropoutWrapper(hidden2, output_keep_prob=config.keep_prob)
            #hidden3 = rnn_cell.DropoutWrapper(hidden3, output_keep_prob=config.keep_prob)

        cell = rnn_cell.MultiRNNCell([hidden1])

        # initial state
        self._initial_state = cell.zero_state(batch_size, tf.float32)

        #one-hot encoding
        with tf.device("/cpu:0"):
            labels = tf.expand_dims(self._input_data, 1)
            indices = tf.expand_dims(tf.range(0, batch_size, 1), 1)
            concated = tf.concat(1, [indices, labels])
            inputs = tf.sparse_to_dense(concated, tf.pack([batch_size, input_size]), 1.0, 0.0)
            inputs.set_shape([batch_size, input_size])

        outputs = []
        states = []
        state = self._initial_state
        with tf.variable_scope("RNN"):
            #outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state)
            (cell_output, state) = cell(inputs, state)
            #outputs = cell_output
            self._final_state = self._initial_state = state


        # calculate the logits from last hidden layer to output layer
        softmax_w = tf.get_variable("softmax_w", [size, num_skills])
        softmax_b = tf.get_variable("softmax_b", [num_skills])
        logits = tf.matmul(cell_output, softmax_w) + softmax_b

        # from output nodes to pick up the right one we want
        logits = tf.reshape(logits, [-1])
        logit_values = tf.gather(logits, self.target_id)

        #make prediction
        self._pred = self._pred_values = pred_values = tf.sigmoid(logit_values)

        loss = -tf.reduce_sum(target_correctness*tf.log(pred_values)+(1-target_correctness)*tf.log(1-pred_values))
        # loss function
        #loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logit_values, target_correctness))

        self._cost = cost = tf.reduce_mean(loss)
        #self._cost = cost = loss

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        # apply gradient descent to minimize loss function
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self.lr)
        # Momentum algorithm
        #optimizer = tf.train.MomentumOptimizer(self.lr, config.momentum)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
Example #24
0
    def __init__(self,
                 is_training,
                 config,
                 batch_size=FLAGS.batch_size,
                 do_train=True):
        self._batch_size = batch_size

        encoder_size = config.encoder_hidden_size
        vocab_size = config.vocab_size
        self.max_phrase_num = config.buckets[-1][0]
        self.max_sequence_length = config.buckets[-1][0]
        self.max_phrase_len = config.buckets[-1][1]
        self.buckets = config.buckets
        self._lr_decay = config.lr_decay
        self.max_grad_norm = config.max_grad_norm
        self.global_step = tf.Variable(0, trainable=False)
        self.init_scale = config.init_scale
        self.do_train = do_train

        self._input_refinement = tf.placeholder(
            tf.int32, [self._batch_size, self.max_phrase_len])
        self._input_refinement = []
        for i in xrange(self.max_phrase_len):
            self._input_refinement.append(
                tf.placeholder(tf.int32,
                               shape=[self._batch_size],
                               name="refinement_{0}".format(i)))

        self._target = []
        self._input_recipe_segments = []
        for i in xrange(self.max_sequence_length):
            self._target.append(
                tf.placeholder(tf.int32,
                               shape=[self._batch_size],
                               name="target_{0}".format(i)))
            self._input_recipe_segments.append([])
            for j in xrange(self.max_phrase_len):
                self._input_recipe_segments[-1].append(
                    tf.placeholder(tf.int32,
                                   shape=[self._batch_size],
                                   name="recipe_segment{0}/{1}".format(i, j)))

        #ENCODER (1st LSTM Layer)

        encoder_lstm_cell = rnn_cell.BasicLSTMCell(encoder_size,
                                                   forget_bias=0.0)
        if is_training and config.keep_prob < 1:
            encoder_lstm_cell = rnn_cell.DropoutWrapper(
                encoder_lstm_cell, output_keep_prob=config.keep_prob)
        self.encoder = rnn_cell.MultiRNNCell([encoder_lstm_cell] *
                                             config.num_layers)

        self._initial_encoder_state = self.encoder.zero_state(
            self._batch_size, tf.float32
        )  #tf.ones([self._batch_size, config.num_layers * encoder_lstm_cell.state_size])
        self._embedding_size = config.num_layers * int(
            encoder_lstm_cell.state_size)
        with tf.device('/cpu:0'):
            self._embedding_matrix = tf.get_variable(
                "embedding_matrix", [vocab_size, self._embedding_size])
            tf.histogram_summary('embedding_matrix', self._embedding_matrix)

        #RECIPE PROCESSOR (2nd LSTM Layer)
        recipe_processor_size = config.recipe_processor_hidden_size

        with tf.variable_scope("recipe_processor_cell"):
            recipe_processor_lstm_cell = rnn_cell.BasicLSTMCell(
                recipe_processor_size, forget_bias=0.0)
            if is_training and config.keep_prob < 1:
                recipe_processor_lstm_cell = rnn_cell.DropoutWrapper(
                    recipe_processor_lstm_cell,
                    output_keep_prob=config.keep_prob)
                self.recipe_processor = rnn_cell.MultiRNNCell(
                    [recipe_processor_lstm_cell] * config.num_layers)
            self._initial_recipe_processor_state = self.recipe_processor.zero_state(
                self._batch_size, tf.float32
            )  #tf.ones([self._batch_size, recipe_processor_size])

        #FINAL REDUCTION TO DISTRIBUTION OVER INDICES
        self.index_predictor_W = weight_variable([recipe_processor_size, 2])
        tf.histogram_summary('index_predictor_w', self.index_predictor_W)

        self.index_predictor_b = bias_variable([2])
        tf.histogram_summary('index_predictor_b', self.index_predictor_b)
        self._lr = tf.Variable(float(config.learning_rate), trainable=False)
        tf.scalar_summary('lr', self._lr)

        self.learning_rate_decay_op = self._lr.assign(self._lr *
                                                      self._lr_decay)

        #BUILD MODEL
        self.outputs, self.losses, self.costs = self.model_with_buckets()
        #CALC GRADIENTS
        if not self.do_train:
            self.calc_gradients()
        self.saver = tf.train.Saver(tf.all_variables())
Example #25
0
    def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        # Slightly better results can be obtained with forget gate biases
        # initialized to 1 but the hyperparameters of the model would need to be
        # different than reported in the paper.
        lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
        if is_training and config.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size])
            inputs = tf.nn.embedding_lookup(embedding, self._input_data)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        # Simplified version of tensorflow.models.rnn.rnn.py's rnn().
        # This builds an unrolled LSTM for tutorial purposes only.
        # In general, use the rnn() or state_saving_rnn() from rnn.py.
        #
        # The alternative version of the code below is:
        #
        # from tensorflow.models.rnn import rnn
        # inputs = [tf.squeeze(input_, [1])
        #           for input_ in tf.split(1, num_steps, inputs)]
        # outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state)
        outputs = []
        states = []
        state = self._initial_state
        with tf.variable_scope("RNN"):
            for time_step in range(num_steps):
                if time_step > 0: tf.get_variable_scope().reuse_variables()
                (cell_output, state) = cell(inputs[:, time_step, :], state)
                outputs.append(cell_output)
                states.append(state)

        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        logits = tf.nn.xw_plus_b(
            output, tf.get_variable("softmax_w", [size, vocab_size]),
            tf.get_variable("softmax_b", [vocab_size]))
        loss = seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(self._targets, [-1])],
            [tf.ones([batch_size * num_steps])], vocab_size)
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = states[-1]

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
Example #26
0
    def __init__(self, is_training, config):
        """constructs a graph"""
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size
        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps],
                                          name="input_data")
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps],
                                       name="targets")

        # here it is
        lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=1.0)
        if is_training and config.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        # do an embedding (always on cpu)
        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size])
            inputs = tf.split(
                1, num_steps,
                tf.nn.embedding_lookup(embedding, self._input_data))
            inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        if is_training and config.keep_prob < 1:
            inputs = [
                tf.nn.dropout(input_, config.keep_prob) for input_ in inputs
            ]

        from tensorflow.models.rnn import rnn
        outputs, states = rnn.rnn(cell,
                                  inputs,
                                  initial_state=self._initial_state)

        # reshape
        outputs = tf.reshape(tf.concat(1, outputs), [-1, size])

        logits = tf.nn.xw_plus_b(
            outputs, tf.get_variable("softmax_W", [size, vocab_size]),
            tf.get_variable("softmax_b", [vocab_size]))
        self._softmax_out = tf.nn.softmax(
            logits)  # this is just used for sampling
        loss = seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(self._targets, [-1])],
            [tf.ones([batch_size * num_steps])], vocab_size)
        self._cost = cost = tf.div(tf.reduce_sum(loss),
                                   tf.constant(batch_size, dtype=tf.float32))
        self._final_state = states[-1]

        if not is_training:
            return  # don't need to optimisation ops

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        # actually the simple guy does good
        # with the grad clipping and the lr schedule and whatnot
        #ftrl?
        #optimizer = tf.train.GradientDescentOptimizer(self.lr)
        optimizer = tf.train.FtrlOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
Example #27
0
    def __init__(self,
                 embedding_mat,
                 non_static,
                 lstm_type,
                 hidden_unit,
                 sequence_length,
                 max_pool_size,
                 num_classes,
                 embedding_size,
                 filter_sizes,
                 num_filters,
                 l2_reg_lambda=0.0):

        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length],
                                      name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")
        self.batch_size = tf.placeholder(tf.int32)
        self.pad = tf.placeholder(tf.float32, [None, 1, embedding_size, 1],
                                  name="pad")
        self.real_len = tf.placeholder(tf.int32, [None], name="real_len")

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        # Extend input to a 4D Tensor, because tf.nn.conv2d requires so.
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            if not non_static:
                W = tf.constant(embedding_mat, name="W")
            else:
                W = tf.Variable(embedding_mat, name="W")
            self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x)
            emb = tf.expand_dims(self.embedded_chars, -1)

        # CNN
        pooled_concat = []
        reduced = np.int32(np.ceil((sequence_length) * 1.0 / max_pool_size))
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):

                # Zero paddings so that the convolution output have dimension batch x sequence_length x emb_size x channel
                num_prio = (filter_size - 1) // 2
                num_post = (filter_size - 1) - num_prio
                pad_prio = tf.concat(1, [self.pad] * num_prio)
                pad_post = tf.concat(1, [self.pad] * num_post)
                emb_pad = tf.concat(1, [pad_prio, emb, pad_post])

                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1),
                                name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]),
                                name="b")
                conv = tf.nn.conv2d(emb_pad,
                                    W,
                                    strides=[1, 1, 1, 1],
                                    padding="VALID",
                                    name="conv")

                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")

                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(h,
                                        ksize=[1, max_pool_size, 1, 1],
                                        strides=[1, max_pool_size, 1, 1],
                                        padding='SAME',
                                        name="pool")
                pooled = tf.reshape(pooled, [-1, reduced, num_filters])
                pooled_concat.append(pooled)

        pooled_concat = tf.concat(2, pooled_concat)
        pooled_concat = tf.nn.dropout(pooled_concat, self.dropout_keep_prob)

        # LSTM
        if lstm_type == "gru":
            lstm_cell = rnn_cell.GRUCell(num_units=hidden_unit,
                                         input_size=embedding_size)
        else:
            if lstm_type == "basic":
                lstm_cell = rnn_cell.BasicLSTMCell(num_units=hidden_unit,
                                                   input_size=embedding_size)
            else:
                lstm_cell = rnn_cell.LSTMCell(num_units=hidden_unit,
                                              input_size=embedding_size,
                                              use_peepholes=True)
        lstm_cell = rnn_cell.DropoutWrapper(
            lstm_cell, output_keep_prob=self.dropout_keep_prob)

        self._initial_state = lstm_cell.zero_state(self.batch_size, tf.float32)
        inputs = [
            tf.squeeze(input_, [1])
            for input_ in tf.split(1, reduced, pooled_concat)
        ]
        outputs, state = rnn.rnn(lstm_cell,
                                 inputs,
                                 initial_state=self._initial_state,
                                 sequence_length=self.real_len)

        # Collect the appropriate last words into variable output (dimension = batch x embedding_size)
        output = outputs[0]
        with tf.variable_scope("Output"):
            tf.get_variable_scope().reuse_variables()
            one = tf.ones([1, hidden_unit], tf.float32)
            for i in range(1, len(outputs)):
                ind = self.real_len < (i + 1)
                ind = tf.to_float(ind)
                ind = tf.expand_dims(ind, -1)
                mat = tf.matmul(ind, one)
                output = tf.add(tf.mul(output, mat),
                                tf.mul(outputs[i], 1.0 - mat))

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            self.W = tf.Variable(tf.truncated_normal(
                [hidden_unit, num_classes], stddev=0.1),
                                 name="W")
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            self.scores = tf.nn.xw_plus_b(output, self.W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # CalculateMean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(
                self.scores, self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")
Example #28
0
    def __init__(self, CellType, is_training, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps],
                                         name="input_data")
        self.targets = tf.placeholder(tf.int32, [batch_size, num_steps],
                                      name="targets")

        lstm_cell = CellType(size)
        if is_training and config.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)
        self.initial_state = cell.zero_state(batch_size, tf.float32)

        # initializer used for reusable variable initializer (see `get_variable`)
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)

        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size],
                                        initializer=initializer)
            inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        outputs = []
        states = []
        state = self.initial_state

        with tf.variable_scope("RNN", initializer=initializer):
            for time_step in range(num_steps):
                if time_step > 0:
                    tf.get_variable_scope().reuse_variables()

                inputs_slice = inputs[:, time_step, :]
                (cell_output, state) = cell(inputs_slice, state)

                outputs.append(cell_output)
                states.append(state)

        self.final_state = states[-1]

        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        w = tf.get_variable("softmax_w", [size, vocab_size],
                            initializer=initializer)
        b = tf.get_variable("softmax_b", [vocab_size], initializer=initializer)

        logits = tf.nn.xw_plus_b(output, w, b)  # compute logits for loss
        targets = tf.reshape(self.targets, [-1])  # reshape our target outputs
        weights = tf.ones([batch_size * num_steps
                           ])  # used to scale the loss average

        # computes loss and performs softmax on our fully-connected output layer
        loss = sequence_loss_by_example([logits], [targets], [weights],
                                        vocab_size)
        self.cost = cost = tf.div(tf.reduce_sum(loss), batch_size, name="cost")

        if is_training:
            # setup learning rate variable to decay
            self.lr = tf.Variable(1.0, trainable=False)

            # define training operation and clip the gradients
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                              config.max_grad_norm)
            optimizer = tf.train.GradientDescentOptimizer(self.lr)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars),
                                                      name="train")
        else:
            # if this model isn't for training (i.e. testing/validation) then we don't do anything here
            self.train_op = tf.no_op()
Example #29
0


#Proclaim the epochs
epochs = np.floor(batch_size*max_iterations / N)
print('Train with approximately %d epochs' %(epochs))

# Nodes for the input variables
x = tf.placeholder("float", shape=[batch_size, H,W,C], name = 'Input_data')
y_ = tf.placeholder(tf.int64, shape=[batch_size], name = 'Ground_truth')
keep_prob = tf.placeholder("float")

with tf.name_scope("LSTM") as scope:
  cell = rnn_cell.LSTMCell(hidden_size)
  #cell = rnn_cell.MultiRNNCell([cell] * num_layers)
  cell = rnn_cell.DropoutWrapper(cell,output_keep_prob=keep_prob)
  #XW_plus_b
  W_a = tf.Variable(tf.random_normal([hidden_size,hidden_size], stddev=0.01))
  b_a = tf.Variable(tf.constant(0.5, shape=[hidden_size]))
  #Initial state
  initial_state = cell.zero_state(batch_size, tf.float32)
  #initial input vector is a sum over the activation map
  x_in = tf.reduce_sum(x,[1,2])
  time = sl*tf.ones([batch_size,1])
  x_in = tf.concat(1,[x_in,time])
  outputs = []
  masks = []
  state = initial_state
  for time_step in range(sl):
    if time_step > 0: tf.get_variable_scope().reuse_variables()
    (cell_output, state) = cell(x_in, state)
Example #30
0
    def __init__(self, dim, args, infer=False):
        self.dim = dim
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size)

        cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        if (infer == False and args.keep_prob < 1):  # training mode
            cell = rnn_cell.DropoutWrapper(cell,
                                           output_keep_prob=args.keep_prob)

        self.cell = cell

        self.input_data = tf.placeholder(
            dtype=tf.float32, shape=[None, args.seq_length, self.dim])
        self.target_data = tf.placeholder(
            dtype=tf.float32, shape=[None, args.seq_length, self.dim])
        self.initial_state = cell.zero_state(batch_size=args.batch_size,
                                             dtype=tf.float32)

        self.num_mixture = args.num_mixture
        NOUT = self.num_mixture * (1 + 2 * self.dim)  # prob + mu + sig
        # [prob 1-20, dim1 mu, dim1 sig, dim2,... ]

        with tf.variable_scope('rnnlm'):
            output_w = tf.get_variable("output_w", [args.rnn_size, NOUT])
            output_b = tf.get_variable("output_b", [NOUT])

        inputs = tf.split(1, args.seq_length, self.input_data)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        outputs, states = seq2seq.rnn_decoder(inputs,
                                              self.initial_state,
                                              cell,
                                              loop_function=None,
                                              scope='rnnlm')
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        output = tf.nn.xw_plus_b(output, output_w, output_b)
        self.final_state = states

        # reshape target data so that it is compatible with prediction shape
        flat_target_data = tf.reshape(self.target_data, [-1, self.dim])
        #[x1_data, x2_data, eos_data] = tf.split(1, 3, flat_target_data)
        x_data = flat_target_data

        def tf_normal(x, mu, sig):
            return tf.exp(-tf.square(x - mu) /
                          (2 * tf.square(sig))) / (sig * tf.sqrt(2 * np.pi))

        #def tf_multi_normal(x, mu, sig, ang):
        # use n (n+1) / 2 to parametrize covariance matrix
        # 1. http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.31.494&rep=rep1&type=pdf
        # 2. https://en.wikipedia.org/wiki/Triangular_matrix
        # 3. https://makarandtapaswi.wordpress.com/2011/07/08/cholesky-decomposition-for-matrix-inversion/

        # A = LL'  by 1
        # det(L) = prod of diagonals  by 2
        # det(A) = det(L)^2  by 3
        # A-1 = (L-1)'(L-1)  by 3

        # We're parametrizing using L^-1
        # Sigma^-1 = (L^-1)'(L^-1)
        # |Sigma| = 1 / det(L^-1)^2 = 1 / (diagonal product of L^-1)^2
        #return tf.exp(-tf.square(x - mu) / (2 * tf.square(sig + 0.01))) / ((sig + 0.01) * tf.sqrt(2 * np.pi))

        # z_mu, z_sig, x_data [batch_size x mixture], z_pi [batch_size x mixture]
        def get_lossfunc(z_pi, z_mu, z_sig, x_data):
            result0 = tf_normal(x_data, z_mu, z_sig)
            result1 = tf.reduce_sum(result0 * z_pi, 1, keep_dims=True)
            result2 = -tf.log(tf.maximum(result1, 1e-20))
            return tf.reduce_sum(result2)

        self.pi = output[:, 0:self.num_mixture]
        max_pi = tf.reduce_max(self.pi, 1, keep_dims=True)
        self.pi = tf.exp(tf.sub(self.pi, max_pi))
        normalize_pi = tf.inv(tf.reduce_sum(self.pi, 1, keep_dims=True))
        self.pi = normalize_pi * self.pi

        output_each_dim = tf.split(1, self.dim, output[:, self.num_mixture:])

        self.mu = []
        self.sig = []
        self.cost = 0

        for i in range(self.dim):
            [o_mu, o_sig] = tf.split(1, 2, output_each_dim[i])
            o_sig = tf.exp(o_sig) + args.sig_epsilon

            self.mu.append(o_mu)
            self.sig.append(o_sig)

            lossfunc = get_lossfunc(self.pi, o_mu, o_sig, x_data[:, i:i + 1])
            self.cost += lossfunc / (args.batch_size * args.seq_length *
                                     self.dim)

        self.mu = tf.concat(1, self.mu)
        self.sig = tf.concat(1, self.sig)

        self.loss_summary = tf.scalar_summary("loss", self.cost)
        self.summary = tf.merge_all_summaries()

        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))