Exemple #1
0
    def testSharingWeightsWithDifferentNamescope(self):
        num_units = 3
        input_size = 5
        batch_size = 2
        num_proj = 4
        with self.test_session(graph=tf.Graph()) as sess:
            initializer = tf.random_uniform_initializer(-1, 1, seed=self._seed)
            inputs = 10 * [
                tf.placeholder(tf.float32, shape=(None, input_size))
            ]
            cell = rnn_cell.LSTMCell(num_units,
                                     input_size,
                                     use_peepholes=True,
                                     num_proj=num_proj,
                                     initializer=initializer)

            with tf.name_scope("scope0"):
                with tf.variable_scope("share_scope"):
                    outputs0, _ = rnn.rnn(cell, inputs, dtype=tf.float32)
            with tf.name_scope("scope1"):
                with tf.variable_scope("share_scope", reuse=True):
                    outputs1, _ = rnn.rnn(cell, inputs, dtype=tf.float32)

            tf.initialize_all_variables().run()
            input_value = np.random.randn(batch_size, input_size)
            output_values = sess.run(outputs0 + outputs1,
                                     feed_dict={inputs[0]: input_value})
            outputs0_values = output_values[:10]
            outputs1_values = output_values[10:]
            self.assertEqual(len(outputs0_values), len(outputs1_values))
            for out0, out1 in zip(outputs0_values, outputs1_values):
                self.assertAllEqual(out0, out1)
Exemple #2
0
  def testDropout(self):
    cell = Plus1RNNCell()
    full_dropout_cell = rnn_cell.DropoutWrapper(
        cell, input_keep_prob=1e-12, seed=0)
    batch_size = 2
    inputs = [tf.placeholder(tf.float32, shape=(batch_size, 5))] * 10
    with tf.variable_scope("share_scope"):
      outputs, states = rnn.rnn(cell, inputs, dtype=tf.float32)
    with tf.variable_scope("drop_scope"):
      dropped_outputs, _ = rnn.rnn(full_dropout_cell, inputs, dtype=tf.float32)
    self.assertEqual(len(outputs), len(inputs))
    for out, inp in zip(outputs, inputs):
      self.assertEqual(out.get_shape().as_list(), inp.get_shape().as_list())
      self.assertEqual(out.dtype, inp.dtype)

    with self.test_session(use_gpu=False) as sess:
      input_value = np.random.randn(batch_size, 5)
      values = sess.run(outputs + [states[-1]],
                        feed_dict={inputs[0]: input_value})
      full_dropout_values = sess.run(dropped_outputs,
                                     feed_dict={inputs[0]: input_value})

      for v in values[:-1]:
        self.assertAllClose(v, input_value + 1.0)
      for d_v in full_dropout_values[:-1]:  # Add 1.0 to dropped_out (all zeros)
        self.assertAllClose(d_v, np.ones_like(input_value))
Exemple #3
0
    def testDropout(self):
        cell = Plus1RNNCell()
        full_dropout_cell = rnn_cell.DropoutWrapper(cell,
                                                    input_keep_prob=1e-12,
                                                    seed=0)
        batch_size = 2
        inputs = [tf.placeholder(tf.float32, shape=(batch_size, 5))] * 10
        with tf.variable_scope("share_scope"):
            outputs, states = rnn.rnn(cell, inputs, dtype=tf.float32)
        with tf.variable_scope("drop_scope"):
            dropped_outputs, _ = rnn.rnn(full_dropout_cell,
                                         inputs,
                                         dtype=tf.float32)
        self.assertEqual(len(outputs), len(inputs))
        for out, inp in zip(outputs, inputs):
            self.assertEqual(out.get_shape().as_list(),
                             inp.get_shape().as_list())
            self.assertEqual(out.dtype, inp.dtype)

        with self.test_session(use_gpu=False) as sess:
            input_value = np.random.randn(batch_size, 5)
            values = sess.run(outputs + [states[-1]],
                              feed_dict={inputs[0]: input_value})
            full_dropout_values = sess.run(dropped_outputs,
                                           feed_dict={inputs[0]: input_value})

            for v in values[:-1]:
                self.assertAllClose(v, input_value + 1.0)
            for d_v in full_dropout_values[:
                                           -1]:  # Add 1.0 to dropped_out (all zeros)
                self.assertAllClose(d_v, np.ones_like(input_value))
Exemple #4
0
  def testSharingWeightsWithDifferentNamescope(self):
    num_units = 3
    input_size = 5
    batch_size = 2
    num_proj = 4
    with self.test_session(graph=tf.Graph()) as sess:
      initializer = tf.random_uniform_initializer(-1, 1, seed=self._seed)
      inputs = 10 * [
          tf.placeholder(tf.float32, shape=(None, input_size))]
      cell = rnn_cell.LSTMCell(
          num_units, input_size, use_peepholes=True,
          num_proj=num_proj, initializer=initializer)

      with tf.name_scope("scope0"):
        with tf.variable_scope("share_scope"):
          outputs0, _ = rnn.rnn(cell, inputs, dtype=tf.float32)
      with tf.name_scope("scope1"):
        with tf.variable_scope("share_scope", reuse=True):
          outputs1, _ = rnn.rnn(cell, inputs, dtype=tf.float32)

      tf.initialize_all_variables().run()
      input_value = np.random.randn(batch_size, input_size)
      output_values = sess.run(
          outputs0 + outputs1, feed_dict={inputs[0]: input_value})
      outputs0_values = output_values[:10]
      outputs1_values = output_values[10:]
      self.assertEqual(len(outputs0_values), len(outputs1_values))
      for out0, out1 in zip(outputs0_values, outputs1_values):
        self.assertAllEqual(out0, out1)
Exemple #5
0
  def testSharingWeightsWithReuse(self):
    num_units = 3
    input_size = 5
    batch_size = 2
    num_proj = 4
    with self.test_session(graph=tf.Graph()) as sess:
      initializer = tf.random_uniform_initializer(-1, 1, seed=self._seed)
      inputs = 10 * [
          tf.placeholder(tf.float32, shape=(None, input_size))]
      cell = rnn_cell.LSTMCell(
          num_units, input_size, use_peepholes=True,
          num_proj=num_proj, initializer=initializer)

      with tf.variable_scope("share_scope"):
        outputs0, _ = rnn.rnn(cell, inputs, dtype=tf.float32)
      with tf.variable_scope("share_scope", reuse=True):
        outputs1, _ = rnn.rnn(cell, inputs, dtype=tf.float32)
      with tf.variable_scope("diff_scope"):
        outputs2, _ = rnn.rnn(cell, inputs, dtype=tf.float32)

      tf.initialize_all_variables().run()
      input_value = np.random.randn(batch_size, input_size)
      output_values = sess.run(
          outputs0 + outputs1 + outputs2, feed_dict={inputs[0]: input_value})
      outputs0_values = output_values[:10]
      outputs1_values = output_values[10:20]
      outputs2_values = output_values[20:]
      self.assertEqual(len(outputs0_values), len(outputs1_values))
      self.assertEqual(len(outputs0_values), len(outputs2_values))
      for o1, o2, o3 in zip(outputs0_values, outputs1_values, outputs2_values):
        # Same weights used by both RNNs so outputs should be the same.
        self.assertAllEqual(o1, o2)
        # Different weights used so outputs should be different.
        self.assertTrue(np.linalg.norm(o1-o3) > 1e-6)
Exemple #6
0
    def _testShardNoShardEquivalentOutput(self, use_gpu):
        num_units = 3
        input_size = 5
        batch_size = 2
        num_proj = 4
        num_proj_shards = 4
        num_unit_shards = 2
        with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
            inputs = 10 * [tf.placeholder(tf.float32)]
            initializer = tf.constant_initializer(0.001)

            cell_noshard = rnn_cell.LSTMCell(num_units,
                                             input_size,
                                             num_proj=num_proj,
                                             use_peepholes=True,
                                             initializer=initializer,
                                             num_unit_shards=num_unit_shards,
                                             num_proj_shards=num_proj_shards)

            cell_shard = rnn_cell.LSTMCell(num_units,
                                           input_size,
                                           use_peepholes=True,
                                           initializer=initializer,
                                           num_proj=num_proj)

            with tf.variable_scope("noshard_scope"):
                outputs_noshard, states_noshard = rnn.rnn(cell_noshard,
                                                          inputs,
                                                          dtype=tf.float32)
            with tf.variable_scope("shard_scope"):
                outputs_shard, states_shard = rnn.rnn(cell_shard,
                                                      inputs,
                                                      dtype=tf.float32)

            self.assertEqual(len(outputs_noshard), len(inputs))
            self.assertEqual(len(outputs_noshard), len(outputs_shard))

            tf.initialize_all_variables().run()
            input_value = np.random.randn(batch_size, input_size)
            feeds = dict((x, input_value) for x in inputs)
            values_noshard = sess.run(outputs_noshard, feed_dict=feeds)
            values_shard = sess.run(outputs_shard, feed_dict=feeds)
            state_values_noshard = sess.run(states_noshard, feed_dict=feeds)
            state_values_shard = sess.run(states_shard, feed_dict=feeds)
            self.assertEqual(len(values_noshard), len(values_shard))
            self.assertEqual(len(state_values_noshard),
                             len(state_values_shard))
            for (v_noshard, v_shard) in zip(values_noshard, values_shard):
                self.assertAllClose(v_noshard, v_shard, atol=1e-3)
            for (s_noshard, s_shard) in zip(state_values_noshard,
                                            state_values_shard):
                self.assertAllClose(s_noshard, s_shard, atol=1e-3)
def RNN(X, num_words_in_X, hidden_size, input_vector_size, max_input_size):
    """
  Passes the input data through an RNN and outputs the final states.

  X: Input is a MAX_INPUT_LENGTH X BATCH_SIZE X WORD_VECTOR_LENGTH matrix
  num_words_in_X: Number of words in X, which is needed because X is zero padded
  hidden_size: The dimensionality of the hidden layer of the RNN
  input_vector_size: This is the dimensionality of each input vector, in this case it is WORD_VECTOR_LENGTH
  max_input_size: This is the max number of input vectors that can be passed in to the RNN.

  """

    # Split X into a list of tensors of length max_input_size where each tensor is a BATCH_SIZE x input_vector_size vector
    X = tf.split(0, max_input_size, X)

    squeezed = []
    for i in range(len(X)):
        squeezed.append(tf.squeeze(X[i]))

    gru_cell = rnn_cell.GRUCell(num_units=hidden_size,
                                input_size=input_vector_size)
    output, state = rnn.rnn(gru_cell,
                            squeezed,
                            sequence_length=num_words_in_X,
                            dtype=tf.float32)
    return output, state
def myRNN(_x, _istate, _weights, _biases):
    ''' input shape: (batch_size, n_steps, n_input) '''
    _x = tf.transpose(_x, [1, 0, 2])  # permute n_steps and batch_size
    ''' _x: (n_steps,batch_size, n_input) '''
    ''' All first row in all batches are aggregate together
        [[all first rows (2d matrix)],
         [all second rows (2d matrix)]
         [all third rows (2d matrix)],
         ....
         ....
         [all 28-th rows (2d matrix)]]

         Take first 2d matrix as example
         [[first row of no.1 image (vector)],
          [first row of no.2 image (vector)],
          .....
          .....
          [first row of no.batch_size image (vector)]]
    '''
    ''' Reshape to prepare input to hidden activation '''
    _x = tf.reshape(_x, [-1, n_input]) # (n_steps*batch_size, n_input)
    ''' Linear activation '''
    _x = tf.matmul(_x, _weights['hidden']) + _biases['hidden'] # (n_steps*batch_size, n_hidden)
    ''' Define a lstm cell with tensorflow '''
    lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    ''' Split data because rnn cell needs a list of inputs for the RNN inner loop '''
    _x = tf.split(0, n_steps, _x) # n_steps * (batch_size, n_hidden)
    ''' Get lstm cell output '''
    outputs, states = rnn.rnn(lstm_cell, _x, initial_state=_istate)
    '''
    Linear activation
    Get inner loop last output
    '''
    return tf.matmul(outputs[-1], _weights['out']) + _biases['out']
Exemple #9
0
    def __init__(self, config):
        lstm_cell = rnn_cell.BasicLSTMCell(config.n_hidden, forget_bias=0.0)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)
        self._train_op = tf.no_op()
        self._input_data = tf.placeholder(tf.int32, [config.batch_size])
        _X = tf.matmul(self._input_data,
                       tf.get_variable("weights_out", [
                           config.n_hidden, 1
                       ])) + tf.get_variable("bias_hidden", [config.n_hidden])
        self._targets = tf.placeholder(tf.int32, [config.batch_size])
        self._initial_state = cell.zero_state(config.batch_size, tf.float32)
        state = self._initial_state

        outputs, states = rnn.rnn(cell,
                                  self.input_data,
                                  tf.split(0, 1, _X),
                                  initial_state=state)
        pred = tf.matmul(
            outputs[-1],
            tf.get_variable("weights_hidden",
                            [config.n_features, config.n_hidden
                             ])) + tf.get_variable("weights_out", [1])

        self._final_state = states[-1]
        self._cost = cost = tf.reduce_mean(tf.square(pred - self.targets))
        #optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
        if not config.is_training:
            return

        optimizer = tf.train.GradientDescentOptimizer(
            learning_rate=config.learning_rate).minimize(cost)
        self._train_op = optimizer
def RNN(_X, _istate, _weights, _biases):

    # input shape: (batch_size, n_steps, n_input)
    _X = tf.transpose(_X, [1, 0, 2])  # permute n_steps and batch_size => (n_steps,batch_size,n_input)
    # Reshape to prepare input to hidden activation
    _X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input) (2D list with 28*128 vectors with 28 features each)
    # Linear activation
    _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden'] # (n_steps*batch_size=128x28,n_hidden=128)

    # Define a lstm cell with tensorflow
    lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)

    #lstm_cell_drop = rnn_cell.DropoutWrapper(lstm_cell)

    #multi_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * 2)

    # Split data because rnn cell needs a list of inputs for the RNN inner loop
    _X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden) => step1 (batch_size=128,n_hidden=128)..step28 (batch_size=128,n_hidden=128)
    # It means that RNN receives list with element (batch_size,n_hidden) for each time step

    # Get lstm cell output
    outputs, states = rnn.rnn(lstm_cell, _X, initial_state=_istate)
    # Output is list with element (batch_size,n_hidden) for each time step?
    #for output in outputs:
    #    print(output)
    #exit(0)

    # Linear activation
    # Get inner loop last output
    return tf.matmul(outputs[-1], _weights['out']) + _biases['out']
def RNN(x, weights, biases, init_state):

    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Permuting batch_size and n_steps
    x = tf.transpose(x, [1, 0, 2])  #(n_steps , batch_size,  n_input)
    # Reshaping to (n_steps*batch_size, n_input)
    x = tf.reshape(x, [-1, n_input])
    # Split to get a list of 'n_steps' tensors of shape (batch_size, n_hidden)
    # This input shape is required by `rnn` function
    x = tf.split(0, n_steps, x)

    '''
    个人觉得上面的三行代码是最难理解的,具体的reshape 的demo可以看1_Introduction中的basic_op.
    最后转化成了每一副图像的第一行拿出来作为一个矩阵, 这样正好满足了[batch_size, cell.input_zise]的要求的格式,
    具体的逻辑处理在rnn.rnn函数里边
    '''

    # Define a lstm cell with tensorflow
    lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)

    # Get lstm cell output
    outputs, states = rnn.rnn(lstm_cell, x, initial_state=init_state, dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out'], lstm_cell.state_size
  def build_model(self):
    # Representation Generator
    self.inputs = tf.placeholder(tf.int32, [self.batch_size, self.seq_length])

    embed = tf.get_variable("embed", [self.vocab_size, self.embed_dim])
    word_embeds = tf.nn.embedding_lookup(embed, self.inputs)

    self.cell = rnn_cell.BasicLSTMCell(self.rnn_size)
    self.stacked_cell = rnn_cell.MultiRNNCell([self.cell] * self.layer_depth)

    outputs, _ = rnn.rnn(self.cell,
                         [tf.squeeze(embed_t) for embed_t in tf.split(1, self.seq_length, word_embeds)],
                         dtype=tf.float32)

    output_embed = tf.pack(outputs)
    mean_pool = tf.nn.relu(tf.reduce_mean(output_embed, 1))

    self.num_action = 4
    self.object_size = 4

    # Action scorer. no bias in paper
    self.pred_action = rnn_cell.linear(mean_pool, self.num_action, 0, "action")
    self.object_ = rnn_cell.linear(mean_pool, self.object_size, 0, "object")

    self.true_action = tf.placeholder(tf.int32, [self.batch_size, self.num_action])
  def __init__(self, vocabularySize, config_param):
    self.vocabularySize = vocabularySize
    self.config = config_param

    self._inputX = tf.placeholder(tf.int32, [self.config.batch_size, self.config.sequence_size], "InputsX")
    self._inputTargetsY = tf.placeholder(tf.int32, [self.config.batch_size, self.config.sequence_size], "InputTargetsY")


    #Converting Input in an Embedded form
    with tf.device("/cpu:0"): #Tells Tensorflow what GPU to use specifically
      embedding = tf.get_variable("embedding", [self.vocabularySize, self.config.embeddingSize])
      embeddingLookedUp = tf.nn.embedding_lookup(embedding, self._inputX)
      inputs = tf.split(1, self.config.sequence_size, embeddingLookedUp)
      inputTensorsAsList = [tf.squeeze(input_, [1]) for input_ in inputs]


    #Define Tensor RNN
    singleRNNCell = rnn_cell.BasicRNNCell(self.config.hidden_size)
    self.multilayerRNN =  rnn_cell.MultiRNNCell([singleRNNCell] * self.config.num_layers)
    self._initial_state = self.multilayerRNN.zero_state(self.config.batch_size, tf.float32)

    #Defining Logits
    hidden_layer_output, states = rnn.rnn(self.multilayerRNN, inputTensorsAsList, initial_state=self._initial_state)
    hidden_layer_output = tf.reshape(tf.concat(1, hidden_layer_output), [-1, self.config.hidden_size])
    self._logits = tf.nn.xw_plus_b(hidden_layer_output, tf.get_variable("softmax_w", [self.config.hidden_size, self.vocabularySize]), tf.get_variable("softmax_b", [self.vocabularySize]))
    self._predictionSoftmax = tf.nn.softmax(self._logits)

    #Define the loss
    loss = seq2seq.sequence_loss_by_example([self._logits], [tf.reshape(self._inputTargetsY, [-1])], [tf.ones([self.config.batch_size * self.config.sequence_size])], self.vocabularySize)
    self._cost = tf.div(tf.reduce_sum(loss), self.config.batch_size)

    self._final_state = states[-1]
Exemple #14
0
def unidirectional_lstm(inputs,keep_prob,INPUT_SIZE,HIDDEN_SIZE,SEQ_LENGTH):
    initializer = tf.random_uniform_initializer(-0.01,0.01)
    cell = LSTMCell(HIDDEN_SIZE, INPUT_SIZE,initializer=initializer)
    inputs_ = [tf.nn.dropout(each,keep_prob) for each in inputs]
    outputs,_ = rnn( cell, inputs_,  initial_state=None,  sequence_length=None,dtype=tf.float32)

    return outputs
 def build_lstm_model(self):
     # r = rnn_cell.LSTMCell(tf.split(0, self.batch_size, self.inputs), self.input_size,
     #                       initializer=tf.contrib.layers.xavier_initializer())
     r = rnn_cell.BasicLSTMCell(self.input_size)
     istate = r.zero_state(1, dtype=tf.float32)
     o, s = rnn.rnn(r, tf.split(0, self.batch_size, self.inputs), istate)
     return o[-1]
Exemple #16
0
 def rnn_estimator(X, y):
     """RNN estimator with target predictor function on top."""
     X = input_op_fn(X)
     if cell_type == 'rnn':
         cell_fn = rnn_cell.BasicRNNCell
     elif cell_type == 'gru':
         cell_fn = rnn_cell.GRUCell
     elif cell_type == 'lstm':
         cell_fn = rnn_cell.BasicLSTMCell
     else:
         raise ValueError(
             "cell_type {} is not supported. ".format(cell_type))
     if bidirection:
         # forward direction cell
         rnn_fw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] *
                                             num_layers)
         # backward direction cell
         rnn_bw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] *
                                             num_layers)
         # pylint: disable=unexpected-keyword-arg, no-value-for-parameter
         encoding = rnn.bidirectional_rnn(rnn_fw_cell,
                                          rnn_bw_cell,
                                          sequence_length=sequence_length,
                                          initial_state=initial_state)
     else:
         cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
         _, encoding = rnn.rnn(cell,
                               X,
                               dtype=tf.float32,
                               sequence_length=sequence_length,
                               initial_state=initial_state)
     return target_predictor_fn(encoding[-1], y)
Exemple #17
0
    def _testDoubleInput(self, use_gpu):
        num_units = 3
        input_size = 5
        batch_size = 2
        num_proj = 4
        num_proj_shards = 4
        num_unit_shards = 2
        with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
            initializer = tf.random_uniform_initializer(-1, 1, seed=self._seed)
            inputs = 10 * [tf.placeholder(tf.float64)]

            cell = rnn_cell.LSTMCell(num_units,
                                     input_size=input_size,
                                     use_peepholes=True,
                                     num_proj=num_proj,
                                     num_unit_shards=num_unit_shards,
                                     num_proj_shards=num_proj_shards,
                                     initializer=initializer)

            outputs, _ = rnn.rnn(cell,
                                 inputs,
                                 initial_state=cell.zero_state(
                                     batch_size, tf.float64))

            self.assertEqual(len(outputs), len(inputs))

            tf.initialize_all_variables().run()
            input_value = np.asarray(np.random.randn(batch_size, input_size),
                                     dtype=np.float64)
            values = sess.run(outputs, feed_dict={inputs[0]: input_value})
            self.assertEqual(values[0].dtype, input_value.dtype)
Exemple #18
0
def tied_rnn_seq2seq(encoder_inputs, decoder_inputs, cell,
                     loop_function=None, dtype=tf.float32, scope=None):
  """RNN sequence-to-sequence model with tied encoder and decoder parameters.

  This model first runs an RNN to encode encoder_inputs into a state vector, and
  then runs decoder, initialized with the last encoder state, on decoder_inputs.
  Encoder and decoder use the same RNN cell and share parameters.

  Args:
    encoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
    decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    loop_function: if not None, this function will be applied to i-th output
      in order to generate i+1-th input, and decoder_inputs will be ignored,
      except for the first element ("GO" symbol), see rnn_decoder for details.
    dtype: The dtype of the initial state of the rnn cell (default: tf.float32).
    scope: VariableScope for the created subgraph; default: "tied_rnn_seq2seq".

  Returns:
    outputs: A list of the same length as decoder_inputs of 2D Tensors with
      shape [batch_size x cell.output_size] containing the generated outputs.
    states: The state of each decoder cell in each time-step. This is a list
      with length len(decoder_inputs) -- one item for each time-step.
      Each item is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with tf.variable_scope("combined_tied_rnn_seq2seq"):
    scope = scope or "tied_rnn_seq2seq"
    _, enc_states = rnn.rnn(
        cell, encoder_inputs, dtype=dtype, scope=scope)
    tf.get_variable_scope().reuse_variables()
    return rnn_decoder(decoder_inputs, enc_states[-1], cell,
                       loop_function=loop_function, scope=scope)
Exemple #19
0
def RNN(x, input_size, num_hidden):
    weights = {
        'hidden':
        tf.Variable(tf.random_normal([input_size,
                                      num_hidden])),  # Hidden layer weights
        'out': tf.Variable(tf.random_normal([num_hidden, 1]))
    }
    biases = {
        'hidden': tf.Variable(tf.random_normal([num_hidden])),
        'out': tf.Variable(tf.random_normal([1]))
    }

    X_t = tf.transpose(x, [1, 0, 2])  # permute n_steps and batch_size
    # Reshape to prepare input to hidden activation
    X_r = tf.reshape(X_t, [-1, input_size])  # (n_steps*batch_size, n_input)

    X_m = tf.matmul(X_r, weights['hidden']) + biases['hidden']

    X_s = tf.split(0, seq_len, X_m)  # n_steps * (batch_size, n_hidden)

    lstm_cell = rnn_cell.BasicLSTMCell(num_hidden, forget_bias=1.0)

    outputs, states = rnn.rnn(
        lstm_cell, X_s,
        dtype=tf.float32)  #note that outputs is a list of seq_len

    return tf.matmul(outputs[-1], weights['out']) + biases[
        'out']  #each element is a tensor of size [batch_size,num_units]
Exemple #20
0
 def rnn_estimator(X, y):
     """RNN estimator with target predictor function on top."""
     X = input_op_fn(X)
     if cell_type == 'rnn':
         cell_fn = rnn_cell.BasicRNNCell
     elif cell_type == 'gru':
         cell_fn = rnn_cell.GRUCell
     elif cell_type == 'lstm':
         cell_fn = rnn_cell.BasicLSTMCell
     else:
         raise ValueError("cell_type {} is not supported. ".format(cell_type))
     if bidirection:
         # forward direction cell
         rnn_fw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
         # backward direction cell
         rnn_bw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
         # pylint: disable=unexpected-keyword-arg, no-value-for-parameter
         encoding = rnn.bidirectional_rnn(rnn_fw_cell, rnn_bw_cell,
                                          sequence_length=sequence_length,
                                          initial_state=initial_state)
     else:
         cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
         _, encoding = rnn.rnn(cell, X, dtype=tf.float32,
                               sequence_length=sequence_length,
                               initial_state=initial_state)
     return target_predictor_fn(encoding[-1], y)
Exemple #21
0
        def _shared_layer(input_data, config):
            """Build the model to decoding

            Args:
                input_data = size batch_size X num_steps X embedding size

            Returns:
                output units
            """
            cell = rnn_cell.BasicLSTMCell(config.encoder_size)

            inputs = [
                tf.squeeze(input_, [1])
                for input_ in tf.split(1, config.num_steps, input_data)
            ]

            if is_training and config.keep_prob < 1:
                cell = rnn_cell.DropoutWrapper(
                    cell, output_keep_prob=config.keep_prob)

            cell = rnn_cell.MultiRNNCell([cell] * config.num_shared_layers)

            initial_state = cell.zero_state(config.batch_size, tf.float32)

            encoder_outputs, encoder_states = rnn.rnn(
                cell, inputs, initial_state=initial_state, scope="encoder_rnn")

            return encoder_outputs, initial_state
def LSTM(x, y):
  x, y  = reshape(x, y, 1)
  W_out = weight_variable([FLAGS.hidden_size, FLAGS.out_size])
  b_out = bias_variable([FLAGS.out_size])
  predictions = list()
  cost_all    = list()
  with tf.variable_scope('lstm1') as scope:
    lstm_cell = rnn_cell.BasicLSTMCell(FLAGS.hidden_size, forget_bias=1.0)
    outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
    #
    # print(len(outputs))
    # for i in range(len(outputs)):
    #   print(outputs[i].get_shape()) => (?, 128)
    #
    for i in range(len(outputs)):
      output    = outputs[i]
      pred      = tf.matmul(output, W_out) + b_out
      current_y = y[i]

      # tensorflow.python.pywrap_tensorflow.StatusNotOK:
      # Invalid argument: logits and labels must be same size:
      # logits_size=[9800,3]
      # labels_size=[100,3]
      loss = tf.nn.softmax_cross_entropy_with_logits(pred, current_y)

      cost = tf.reduce_mean(loss)
      cost_all.append(cost)
      predictions.append(pred)
  return predictions, cost_all
Exemple #23
0
    def __init__(self, vocab_size, batch_size, sequece_length, embedding_size, num_classes):
        self.hyperParam = {}
        self.hyperParam["hidden_num"] = 20
        self.hyperParam["l2_lamda"] = 3;
        self.hyperParam["dropout_keep_prob"] = 0.5;
        l2_loss = tf.constant(0.0)
        
        self.dropout_keep_prob = 0.5
        ##rnnCell = rnn_cell.BasicRNNCell(hidden_num)
        rnnCell = rnn_cell.BasicLSTMCell(self.hyperParam["hidden_num"], forget_bias=1.0) 
        self.input_data = tf.placeholder(tf.int32, shape=[None, sequece_length], name = "input_data")
        self.weights = tf.placeholder(tf.int32, shape=[None, sequece_length], name= "weights")
        self.output_data = tf.placeholder(tf.int32, [None, sequece_length], name = "output_data")
        a = tf.shape(self.output_data)[0]

        #self.inputs = []
        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, embedding_size])
            inputs = tf.nn.embedding_lookup(embedding, self.input_data)
            #for i, v in enumerate(input_refine):
            #    self.inputs.append(tf.nn.embedding_lookup(embedding, input_refine[i]))
        self.inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, sequece_length, inputs)]
        self.output, self.states = rnn.rnn(rnnCell, self.inputs, dtype=tf.float32)

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = [tf.nn.dropout(p, self.hyperParam["dropout_keep_prob"]) for p in self.output]

        predictions = [];
        with tf.name_scope("result"):
            W = tf.Variable(tf.truncated_normal([self.hyperParam["hidden_num"], num_classes], stddev=0.1), name="W")
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            #output = tf.reshape(tf.concat(1, self.output), [-1, hidden_num])
            output = tf.reshape(tf.concat(1, self.h_drop), [-1, self.hyperParam["hidden_num"]])
            logits = tf.matmul(output, W) + b
            self.scores = logits
            #self.new_scores = [tf.squeeze(k, [1]) for k in tf.split(1, sequece_length, tf.reshape(logits, [-1, sequece_length ,num_classes]))]

        losses = 0;
        accuracy = []
        with tf.name_scope("loss"):
            output_refine = tf.reshape(self.output_data, [-1])
            #output_refine = tf.split(1, sequece_length, self.output_data)
            #weigth = tf.ones_like(output_refine, dtype="float32")
            weight = tf.reshape(tf.cast(self.weights, "float32"), [-1])
            loss = seq2seq.sequence_loss_by_example([self.scores], [output_refine], [weight],num_classes);
            self.loss = tf.reduce_sum(loss)/tf.cast(a, "float32") + self.hyperParam["l2_lamda"]*l2_loss
            #self.accuracy = tf.reduce_mean(tf.cast(tf.concat(0, accuracy), "float"))

        with tf.name_scope("accurcy"):
            self.predictions = tf.argmax(tf.reshape(self.scores, [-1, sequece_length, num_classes]), 2)
            #self.kk = tf.cast(tf.equal(self.predictions, tf.cast(self.output_data, "int64")), "int64")
            aa = tf.expand_dims(tf.reshape(tf.cast(tf.equal(self.predictions, tf.cast(self.output_data, "int64")), "float32"), [-1]), 0)
            bb = tf.expand_dims(tf.cast(tf.reshape(self.weights, [-1]), "float32"), 0)
            self.kk = tf.squeeze(tf.matmul(aa, bb, transpose_b=True))/tf.reduce_sum(tf.cast(self.weights, "float32"), [0,1])
             
            self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.predictions, tf.cast(self.output_data, "int64")), "float32"), name="accrucy")
def rnn_model(X, y):
    word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words,
        embedding_size=EMBEDDING_SIZE, name='words')
    word_list = [tf.squeeze(w, [1]) for w in tf.split(1, MAX_DOCUMENT_LENGTH, word_vectors)]
    cell = rnn_cell.GRUCell(EMBEDDING_SIZE)
    _, encoding = rnn.rnn(cell, word_list, dtype=tf.float32)
    return skflow.models.logistic_regression(encoding[-1], y)
Exemple #25
0
    def __init__(self,
                 vocab_size,
                 size=256,
                 depth=2,
                 learning_rate=1e-4,
                 batch_size=32,
                 keep_prob=0.1,
                 num_steps=100,
                 checkpoint_dir="checkpoint",
                 forward_only=False):
        """Initialize the parameters for an Deep Bidirectional LSTM model.
    
    Args:
      vocab_size: int, The dimensionality of the input vocab
      size: int, The dimensionality of the inputs into the Deep LSTM cell [32, 64, 256]
      learning_rate: float, [1e-3, 5e-4, 1e-4, 5e-5]
      batch_size: int, The size of a batch [16, 32]
      keep_prob: unit Tensor or float between 0 and 1 [0.0, 0.1, 0.2]
      num_steps: int, The max time unit [100]
    """
        super(DeepBiLSTM, self).__init__()

        self.vocab_size = int(vocab_size)
        self.size = int(size)
        self.depth = int(depth)
        self.learning_rate = float(learning_rate)
        self.batch_size = int(batch_size)
        self.keep_prob = float(keep_prob)
        self.num_steps = int(seq_length)

        self.inputs = tf.placeholder(tf.int32,
                                     [self.batch_size, self.num_steps])
        self.input_lengths = tf.placeholder(tf.int64, [self.batch_size])

        with tf.device("/cpu:0"):
            self.emb = tf.Variable(tf.truncated_normal(
                [self.vocab_size, self.size], -0.1, 0.1),
                                   name='emb')
            import ipdb
            ipdb.set_trace()
            self.embed_inputs = tf.nn.embedding_lookup(
                self.emb, tf.transpose(self.inputs))

        self.cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
        self.stacked_cell = rnn_cell.MultiRNNCell([self.cell] * depth)

        self.initial_state = self.stacked_cell.zero_state(
            batch_size, tf.float32)

        if not forward_only and self.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(lstm_cell,
                                                output_keep_prob=keep_prob)

        self.outputs, self.states = rnn.rnn(self.stacked_cell,
                                            tf.unpack(self.embed_inputs),
                                            dtype=tf.float32,
                                            sequence_length=self.input_lengths,
                                            initial_state=self.initial_state)

        output = tf.reduce_sum(tf.pack(self.output), 0)
def rnn_model(X, y):
    word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words,
        embedding_size=EMBEDDING_SIZE, name='words')
    word_list = [tf.squeeze(w, [1]) for w in tf.split(1, MAX_DOCUMENT_LENGTH, word_vectors)]
    cell = rnn_cell.GRUCell(EMBEDDING_SIZE)
    _, encoding = rnn.rnn(cell, word_list, dtype=tf.float32)
    return skflow.models.logistic_regression(encoding[-1], y)
def RNN(X, num_words_in_X, hidden_size, max_input_size):
    # Reshape `X` as a vector. -1 means "set this dimension automatically".
    X_as_vector = tf.reshape(X, [-1])

    # Create another vector containing zeroes to pad `X` to (MAX_INPUT_LENGTH * WORD_VECTOR_LENGTH) elements.
    zero_padding = tf.zeros([max_input_size * WORD_VECTOR_LENGTH] -
                            tf.shape(X_as_vector),
                            dtype=X.dtype)

    # Concatenate `X_as_vector` with the padding.
    X_padded_as_vector = tf.concat(0, [X_as_vector, zero_padding])

    # Reshape the padded vector to the desired shape.
    X_padded = tf.reshape(X_padded_as_vector,
                          [max_input_size, WORD_VECTOR_LENGTH])

    # Split X into a list of tensors of length MAX_INPUT_LENGTH where each tensor is a 1xWORD_VECTOR_LENGTH vector
    # of the word vectors
    # TODO change input to be a list of tensors of length MAX_INPUT_LENGTH where each tensor is a BATCH_SIZExWORD_VECTOR_LENGTH vector
    X = tf.split(0, max_input_size, X_padded)

    print "Length X: {}".format(len(X))

    gru_cell = rnn_cell.GRUCell(num_units=hidden_size,
                                input_size=WORD_VECTOR_LENGTH)

    output, state = rnn.rnn(gru_cell,
                            X,
                            sequence_length=(num_words_in_X),
                            dtype=tf.float32)

    print "State: {}".format(state)

    return output, state, X_padded
def RNN(x, weights, biases, init_state):

    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Permuting batch_size and n_steps
    x = tf.transpose(x, [1, 0, 2])  #(n_steps , batch_size,  n_input)
    # Reshaping to (n_steps*batch_size, n_input)
    x = tf.reshape(x, [-1, n_input])
    # Split to get a list of 'n_steps' tensors of shape (batch_size, n_hidden)
    # This input shape is required by `rnn` function
    x = tf.split(0, n_steps, x)
    '''
    个人觉得上面的三行代码是最难理解的,具体的reshape 的demo可以看1_Introduction中的basic_op.
    最后转化成了每一副图像的第一行拿出来作为一个矩阵, 这样正好满足了[batch_size, cell.input_zise]的要求的格式,
    具体的逻辑处理在rnn.rnn函数里边
    '''

    # Define a lstm cell with tensorflow
    lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)

    # Get lstm cell output
    outputs, states = rnn.rnn(lstm_cell,
                              x,
                              initial_state=init_state,
                              dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1],
                     weights['out']) + biases['out'], lstm_cell.state_size
def RNN(X, num_words_in_X, hidden_size, max_input_size):
  # Reshape `X` as a vector. -1 means "set this dimension automatically".
  X_as_vector = tf.reshape(X, [-1])

  # Create another vector containing zeroes to pad `X` to (MAX_INPUT_LENGTH * WORD_VECTOR_LENGTH) elements.
  zero_padding = tf.zeros([max_input_size * WORD_VECTOR_LENGTH] - tf.shape(X_as_vector), dtype=X.dtype)

  # Concatenate `X_as_vector` with the padding.
  X_padded_as_vector = tf.concat(0, [X_as_vector, zero_padding])

  # Reshape the padded vector to the desired shape.
  X_padded = tf.reshape(X_padded_as_vector, [max_input_size, WORD_VECTOR_LENGTH])

  # Split X into a list of tensors of length MAX_INPUT_LENGTH where each tensor is a 1xWORD_VECTOR_LENGTH vector
  # of the word vectors
  # TODO change input to be a list of tensors of length MAX_INPUT_LENGTH where each tensor is a BATCH_SIZExWORD_VECTOR_LENGTH vector
  X = tf.split(0, max_input_size, X_padded)

  print "Length X: {}".format(len(X))

  gru_cell = rnn_cell.GRUCell(num_units=hidden_size, input_size=WORD_VECTOR_LENGTH)

  output, state = rnn.rnn(gru_cell, X, sequence_length=(num_words_in_X), dtype=tf.float32)

  print "State: {}".format(state)

  return output, state, X_padded
Exemple #30
0
    def _testCellClipping(self, use_gpu):
        num_units = 3
        input_size = 5
        batch_size = 2
        with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
            initializer = tf.random_uniform_initializer(-0.01,
                                                        0.01,
                                                        seed=self._seed)
            cell = rnn_cell.LSTMCell(num_units,
                                     input_size,
                                     use_peepholes=True,
                                     cell_clip=0.0,
                                     initializer=initializer)
            inputs = 10 * [
                tf.placeholder(tf.float32, shape=(batch_size, input_size))
            ]
            outputs, _ = rnn.rnn(cell, inputs, dtype=tf.float32)
            self.assertEqual(len(outputs), len(inputs))
            for out in outputs:
                self.assertEqual(out.get_shape().as_list(),
                                 [batch_size, num_units])

            tf.initialize_all_variables().run()
            input_value = np.random.randn(batch_size, input_size)
            values = sess.run(outputs, feed_dict={inputs[0]: input_value})

        for value in values:
            # if cell c is clipped to 0, tanh(c) = 0 => m==0
            self.assertAllEqual(value, np.zeros((batch_size, num_units)))
    def create_model(self):
        print "Setting up model",
        sys.stdout.flush()
        # placeholders for data + targets
        self._input_data = tf.placeholder(tf.int32, shape=(self.batch_size, self.num_steps))
        self._targets = tf.placeholder(tf.int32, [self.batch_size, self.num_steps])

        # set up lookup function
        self.embedding = tf.constant(self.saved_embedding,name="embedding")
        self.inputs = tf.nn.embedding_lookup(self.embedding, self._input_data)
        # lstm model
        self.lstm_cell = rnn_cell.BasicLSTMCell(self.lstm_size)
        self.cell = rnn_cell.MultiRNNCell([self.lstm_cell] * self.num_layers)


        self._initial_state = self.cell.zero_state(self.batch_size, tf.float32)

        from tensorflow.models.rnn import rnn
        self.inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, self.num_steps, self.inputs)]
        self.outputs, self.states = rnn.rnn(self.cell, self.inputs, initial_state=self._initial_state)

        self.output = tf.reshape(tf.concat(1, self.outputs), [-1, self.lstm_size])
        self.softmax_w = tf.get_variable("softmax_w", [self.lstm_size, self.vocab_size])
        self.softmax_b = tf.get_variable("softmax_b", [self.vocab_size])
        self.logits = tf.matmul(self.output, self.softmax_w) + self.softmax_b
        #print  "self.states.get_shape():",self.states.get_shape()
        #print  "tf.shape(self.states)",tf.shape(self.states)
        self._final_state = self.states
        self.saver = tf.train.Saver()
        
        #delete data to save memory if network is used for sampling only
        if self.only_for_sampling:
            del self.data
            
        print "done"
Exemple #32
0
    def __init__(self, session, input_pipeline):
        self.session = session
        self.input_pipeline = input_pipeline

        text_embeddings = weight_init(config.words_count + 2, config.hidden_count)

        embedded = tf.split(1, config.max_len, tf.nn.embedding_lookup(text_embeddings, input_pipeline.text_input))
        inputs = [tf.squeeze(input_, [1]) for input_ in embedded]

        w_image = weight_init(config.image_features_count, config.hidden_count)
        b_image = bias_init([config.hidden_count])

        image_transform = tf.matmul(input_pipeline.image_input, w_image) + b_image
        hidden_start = tf.concat(1, [tf.zeros_like(image_transform), image_transform])

        cell = WordCell(config.hidden_count, config.output_words_count + 1)
        probs_list, self.hidden = rnn.rnn(
            cell=cell,
            inputs=inputs,
            initial_state=hidden_start,
            sequence_length=input_pipeline.lens_input)
        self.probs = tf.concat(1, [tf.expand_dims(prob, 1) for prob in probs_list])

        float_lens = tf.cast(input_pipeline.lens_input, 'float')
        sample_losses = tf.reduce_sum(self.probs * input_pipeline.result_input, [1, 2]) / float_lens
        self.loss = -tf.reduce_mean(sample_losses)
        self.train_task = tf.train.AdamOptimizer(1e-4).minimize(self.loss)
        self.loss_summary = tf.scalar_summary('loss', self.loss)

        self.saver = tf.train.Saver()
Exemple #33
0
    def build_generator(self):
	tf.get_variable_scope().reuse_variables()
        video = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps, self.dim_image])
        video_mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps])

        video_flat = tf.reshape(video, [-1, self.dim_image])
        image_emb = tf.nn.xw_plus_b( video_flat, self.encode_image_W, self.encode_image_b)
        image_emb = tf.reshape(image_emb, [self.batch_size, self.n_lstm_steps, self.dim_hidden])
        image_emb = tf.transpose(image_emb, [1,0,2])

        state2 = tf.zeros([self.batch_size, self.lstm2.state_size])

	generated_HL = []

	_X = tf.reshape(image_emb, [-1, self.dim_hidden]) # (n x b) x h
	_X = tf.split(0, self.n_lstm_steps, _X) # n x (b x h)
	[output2, state2] = rnn.rnn(self.lstm_HL_net,_X,dtype=tf.float32) # n x (b x h)
	output2 = tf.transpose(tf.pack(output2), [1,0,2]) # b x n x h
        for ii in range(self.batch_size):
		logit_words = tf.nn.xw_plus_b( output2[ii,:,:], self.embed_HL_W, self.embed_HL_b) # n x 2
		logit_words = tf.nn.softmax(logit_words) # n x 2
		generated_HL.append(logit_words[:,1]) # n x 1

	generated_HL = tf.pack(generated_HL) # b x n
	generated_HL = tf.mul(generated_HL,video_mask) # b x n
	with tf.variable_scope("RNN") as vs:
               lstmRNN_variables = [v for v in tf.all_variables() if v.name.startswith(vs.name)]
        return video, video_mask, generated_HL, lstmRNN_variables
def RNN(_X, _istate, _weights, _biases):

    # input shape: (batch_size, n_steps, 28, 28, 1)
    _X = tf.transpose(_X, [1, 0, 2, 3, 4])  # permute n_steps and batch_size
    # input shape: (n_steps=3, batch_size=20, 28, 28, 1)
    # Reshape to prepare input to hidden activation
    #_X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input)
    # Linear activation  ==> convolutional net
    #_X = tf.matmul(_X, _weights['hidden']) + _biases['hidden']
    
    A = CNN(_X[0,:,:,:,:])
    B = CNN(_X[1,:,:,:,:])
    C = CNN(_X[2,:,:,:,:])

    # Define a lstm cell with tensorflow
    lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Split data because rnn cell needs a list of inputs for the RNN inner loop
    #_X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden)

    # Get lstm cell output
    outputs, states = rnn.rnn(lstm_cell, [A,B,C], initial_state=_istate)

    # Linear activation
    # Get inner loop last output
    out1 = tf.nn.relu( tf.matmul(outputs[-1], _weights['out1']) + _biases['out1'] )
    out2 = tf.matmul(out1, _weights['out2']) + _biases['out2'] 
    return out2
Exemple #35
0
  def _testProjSharding(self, use_gpu):
    num_units = 3
    input_size = 5
    batch_size = 2
    num_proj = 4
    num_proj_shards = 4
    num_unit_shards = 2
    with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
      initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=self._seed)

      inputs = 10 * [
          tf.placeholder(tf.float32, shape=(None, input_size))]

      cell = rnn_cell.LSTMCell(
          num_units,
          input_size=input_size,
          use_peepholes=True,
          num_proj=num_proj,
          num_unit_shards=num_unit_shards,
          num_proj_shards=num_proj_shards,
          initializer=initializer)

      outputs, _ = rnn.rnn(cell, inputs, dtype=tf.float32)

      self.assertEqual(len(outputs), len(inputs))

      tf.initialize_all_variables().run()
      input_value = np.random.randn(batch_size, input_size)
      sess.run(outputs, feed_dict={inputs[0]: input_value})
Exemple #36
0
  def testDynamicCalculation(self):
    cell = Plus1RNNCell()
    sequence_length = tf.placeholder(tf.int64)
    batch_size = 2
    inputs = [tf.placeholder(tf.float32, shape=(batch_size, 5))] * 10
    with tf.variable_scope("drop_scope"):
      dynamic_outputs, dynamic_states = rnn.rnn(
          cell, inputs, sequence_length=sequence_length, dtype=tf.float32)
    self.assertEqual(len(dynamic_outputs), len(inputs))
    self.assertEqual(len(dynamic_states), len(inputs))

    with self.test_session(use_gpu=False) as sess:
      input_value = np.random.randn(batch_size, 5)
      dynamic_values = sess.run(dynamic_outputs,
                                feed_dict={inputs[0]: input_value,
                                           sequence_length: [2, 3]})
      dynamic_state_values = sess.run(dynamic_states,
                                      feed_dict={inputs[0]: input_value,
                                                 sequence_length: [2, 3]})

      # fully calculated for t = 0, 1, 2
      for v in dynamic_values[:3]:
        self.assertAllClose(v, input_value + 1.0)
      for vi, v in enumerate(dynamic_state_values[:3]):
        self.assertAllEqual(v, 1.0 * (vi + 1) * np.ones((batch_size, 5)))
      # zeros for t = 3+
      for v in dynamic_values[3:]:
        self.assertAllEqual(v, np.zeros_like(input_value))
      for v in dynamic_state_values[3:]:
        self.assertAllEqual(v, np.zeros_like(input_value))
Exemple #37
0
def basic_rnn_seq2seq(
    encoder_inputs, decoder_inputs, cell, dtype=tf.float32, scope=None):
  """Basic RNN sequence-to-sequence model.

  This model first runs an RNN to encode encoder_inputs into a state vector, and
  then runs decoder, initialized with the last encoder state, on decoder_inputs.
  Encoder and decoder use the same RNN cell type, but don't share parameters.

  Args:
    encoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
    decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    dtype: The dtype of the initial state of the RNN cell (default: tf.float32).
    scope: VariableScope for the created subgraph; default: "basic_rnn_seq2seq".

  Returns:
    outputs: A list of the same length as decoder_inputs of 2D Tensors with
      shape [batch_size x cell.output_size] containing the generated outputs.
    states: The state of each decoder cell in each time-step. This is a list
      with length len(decoder_inputs) -- one item for each time-step.
      Each item is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with tf.variable_scope(scope or "basic_rnn_seq2seq"):
    _, enc_states = rnn.rnn(cell, encoder_inputs, dtype=dtype)
    return rnn_decoder(decoder_inputs, enc_states[-1], cell)
Exemple #38
0
  def _testDoubleInput(self, use_gpu):
    num_units = 3
    input_size = 5
    batch_size = 2
    num_proj = 4
    num_proj_shards = 4
    num_unit_shards = 2
    with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
      initializer = tf.random_uniform_initializer(-1, 1, seed=self._seed)
      inputs = 10 * [tf.placeholder(tf.float64)]

      cell = rnn_cell.LSTMCell(
          num_units,
          input_size=input_size,
          use_peepholes=True,
          num_proj=num_proj,
          num_unit_shards=num_unit_shards,
          num_proj_shards=num_proj_shards,
          initializer=initializer)

      outputs, _ = rnn.rnn(
          cell, inputs, initial_state=cell.zero_state(batch_size, tf.float64))

      self.assertEqual(len(outputs), len(inputs))

      tf.initialize_all_variables().run()
      input_value = np.asarray(np.random.randn(batch_size, input_size),
                               dtype=np.float64)
      values = sess.run(outputs, feed_dict={inputs[0]: input_value})
      self.assertEqual(values[0].dtype, input_value.dtype)
Exemple #39
0
    def _testProjSharding(self, use_gpu):
        num_units = 3
        input_size = 5
        batch_size = 2
        num_proj = 4
        num_proj_shards = 4
        num_unit_shards = 2
        with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
            initializer = tf.random_uniform_initializer(-0.01,
                                                        0.01,
                                                        seed=self._seed)

            inputs = 10 * [
                tf.placeholder(tf.float32, shape=(None, input_size))
            ]

            cell = rnn_cell.LSTMCell(num_units,
                                     input_size=input_size,
                                     use_peepholes=True,
                                     num_proj=num_proj,
                                     num_unit_shards=num_unit_shards,
                                     num_proj_shards=num_proj_shards,
                                     initializer=initializer)

            outputs, _ = rnn.rnn(cell, inputs, dtype=tf.float32)

            self.assertEqual(len(outputs), len(inputs))

            tf.initialize_all_variables().run()
            input_value = np.random.randn(batch_size, input_size)
            sess.run(outputs, feed_dict={inputs[0]: input_value})
Exemple #40
0
    def testEmbeddingAttentionDecoder(self):
        with self.test_session() as sess:
            with tf.variable_scope("root",
                                   initializer=tf.constant_initializer(0.5)):
                inp = [tf.constant(0.5, shape=[2, 2]) for _ in xrange(2)]
                cell = rnn_cell.GRUCell(2)
                enc_outputs, enc_states = rnn.rnn(cell, inp, dtype=tf.float32)
                attn_states = tf.concat(1, [
                    tf.reshape(e, [-1, 1, cell.output_size])
                    for e in enc_outputs
                ])
                dec_inp = [
                    tf.constant(i, tf.int32, shape=[2]) for i in xrange(3)
                ]
                dec, mem = seq2seq.embedding_attention_decoder(dec_inp,
                                                               enc_states[-1],
                                                               attn_states,
                                                               cell,
                                                               4,
                                                               output_size=3)
                sess.run([tf.initialize_all_variables()])
                res = sess.run(dec)
                self.assertEqual(len(res), 3)
                self.assertEqual(res[0].shape, (2, 3))

                res = sess.run(mem)
                self.assertEqual(len(res), 4)
                self.assertEqual(res[0].shape, (2, 2))
Exemple #41
0
    def build_model(self):
        video = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps, self.dim_image])
        video_mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps])

        HLness = tf.placeholder(tf.int32, [self.batch_size, self.n_lstm_steps])
        HLness_mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps])

        video_flat = tf.reshape(video, [-1, self.dim_image])
        image_emb = tf.nn.xw_plus_b( video_flat, self.encode_image_W, self.encode_image_b) # (batch_size*n_lstm_steps, dim_hidden)
        image_emb = tf.reshape(image_emb, [self.batch_size, self.n_lstm_steps, self.dim_hidden])
        image_emb = tf.transpose(image_emb, [1,0,2]) # n x b x h

        state2 = tf.zeros([self.batch_size, self.lstm2.state_size])

	loss_HL = 0.0
	_X = tf.reshape(image_emb, [-1, self.dim_hidden]) # (n x b) x h
	_X = tf.split(0, self.n_lstm_steps, _X) # n x (b x h)
	[output2, state2] = rnn.rnn(self.lstm_HL_net,_X,dtype=tf.float32) # n x (b x h)
	output2 = tf.transpose(tf.pack(output2), [1,0,2]) # b x n x h
	onehot_labels = []
	logit_words = []
	indices = tf.expand_dims(tf.range(0, self.n_lstm_steps, 1), 1) # n x 1
	for ii in xrange(10):
		labels = tf.expand_dims(HLness[ii,:], 1) # n x 1
		concated = tf.concat(1, [indices, labels]) # n x 2
		onehot_labels = tf.sparse_to_dense(concated, tf.pack([self.n_lstm_steps, 2]), 1.0, 0.0) # n x 2
		logit_words = tf.nn.xw_plus_b(output2[ii,:,:], self.embed_HL_W, self.embed_HL_b) # n x 2
		cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logit_words, onehot_labels) # n x 1
		cross_entropy = tf.mul(cross_entropy, HLness_mask[ii,:]) # n x 1
		loss_HL += tf.reduce_sum(cross_entropy) # 1

	loss_HL = loss_HL / tf.reduce_sum(HLness_mask)
	loss = loss_HL
        return loss, video, video_mask, HLness, HLness_mask
Exemple #42
0
def seq2seq_f(cell, encoder_inputs, decoder_inputs, loop_output):
    ''' 
    The seq2seq neural network structurei
    
    Args: 
        cell: the RNNCell object
        encoder_inputs: a list of Tensors to feed the encoder
        decoder_inputs: a list of Tensors to feed the decoder
        loop_output: True for using the loop_func to construct the next 
            decoder_input element using the previous output element

    Returns:
        outputs: a list of Tensors generated by the decoder
        states: the hidden states at the final step of the encoder
    '''
    if loop_output:
        def loop_func(prev, i):
        # simplest construction: using the previous output as the next input
            return prev
        # use rnn() directly for modified decoder.
        _, enc_states = rnn.rnn(cell, encoder_inputs, dtype=tf.float32)
        # note that the returned states are all hidden states, not just the last one
        outputs,states = seq2seq.rnn_decoder(decoder_inputs, enc_states[-1], cell, loop_func)
    else:
        # using the given decoder inputs
        outputs,states = seq2seq.basic_rnn_seq2seq(
                 encoder_inputs, decoder_inputs, cell)

    # one way to bound the output in [-1,1]. but not used.
#            for x in outputs:
#                x = tf.tanh(x)
#  print(states)
    
    # the output states is just the last element of all hidden states
    return outputs,states
Exemple #43
0
def char_rnn_model(X, y):
    byte_list = skflow.ops.one_hot_matrix(X, 256)
    byte_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, byte_list)
    cell = rnn_cell.GRUCell(HIDDEN_SIZE)
    #cell = rnn_cell.BasicLSTMCell(HIDDEN_SIZE)
    _, encoding = rnn.rnn(cell, byte_list, dtype=tf.float32)
    return skflow.models.logistic_regression(encoding, y)
Exemple #44
0
def build(graph, input, num_steps, hidden_size, num_layers, num_classes, is_training):
  """
  
  num_steps: the number of unrolled steps of LSTM
  hidden_size: the number of LSTM units
  
  """
  
  input_shape = input.get_shape().as_list()
  batch_size = input_shape[0]
  
  # Add the GRU Cell
  with graph.name_scope("rnn") as scope:
  
    gru_cell = tf.nn.rnn_cell.GRUCell(num_units=hidden_size, input_size=hidden_size)
  
    if is_training:
      gru_cell = tf.nn.rnn_cell.DropoutWrapper(gru_cell, output_keep_prob=1.0)
  
    cell = tf.nn.rnn_cell.MultiRNNCell([gru_cell] * num_layers)
    initial_state = cell.zero_state(batch_size, tf.float32)
    
    # A length T list of inputs, each a tensor of shape [batch_size, input_size].
    inputs = [tf.squeeze(input_) for input_ in tf.split(1, num_steps, input)]
    
    print "Inputs to RNN Cell shape:"
    print "[%d, %s]" % (len(inputs), inputs[0].get_shape().as_list())
    
    outputs, state = rnn.rnn(cell, inputs, initial_state = initial_state)
    
    # [num_steps * batch_size, hidden_size]
    #features = tf.reshape(tf.concat(1, outputs), [-1, hidden_size])
    features = state
    
    print "Outputs from RNN Cell shape:"
    print features.get_shape().as_list()
    
  # Add Softmax
  with graph.name_scope("softmax") as scope:

    weights = _variable_with_weight_decay(
      name='weights',
      shape=[hidden_size, num_classes],
      initializer=tf.contrib.layers.xavier_initializer(uniform=True, seed=None, dtype=tf.float32),
      wd=4e-5
    )
    graph.add_to_collection('softmax_params', weights)
     
    biases = _variable_on_cpu(
      name='biases', 
      shape=[num_classes], 
      initializer=tf.constant_initializer(0.0)
    )
    graph.add_to_collection('softmax_params', biases)

    softmax_linear = tf.nn.xw_plus_b(features, weights, biases, name="logits") 
  
  
  return softmax_linear
Exemple #45
0
 def final_state_of_rnn_over_embedded_sequence(idx, embedded_seq):
     with tf.variable_scope("rnn_%s" % idx):
         gru = rnn_cell.GRUCell(opts.hidden_dim)
         initial_state = gru.zero_state(opts.batch_size, tf.float32)
         outputs, _states = rnn.rnn(gru,
                                    embedded_seq,
                                    initial_state=initial_state)
         return outputs[-1]
Exemple #46
0
  def _testShardNoShardEquivalentOutput(self, use_gpu):
    num_units = 3
    input_size = 5
    batch_size = 2
    num_proj = 4
    num_proj_shards = 4
    num_unit_shards = 2
    with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
      inputs = 10 * [tf.placeholder(tf.float32)]
      initializer = tf.constant_initializer(0.001)

      cell_noshard = rnn_cell.LSTMCell(
          num_units, input_size,
          num_proj=num_proj,
          use_peepholes=True,
          initializer=initializer,
          num_unit_shards=num_unit_shards,
          num_proj_shards=num_proj_shards)

      cell_shard = rnn_cell.LSTMCell(
          num_units, input_size, use_peepholes=True,
          initializer=initializer, num_proj=num_proj)

      with tf.variable_scope("noshard_scope"):
        outputs_noshard, states_noshard = rnn.rnn(
            cell_noshard, inputs, dtype=tf.float32)
      with tf.variable_scope("shard_scope"):
        outputs_shard, states_shard = rnn.rnn(
            cell_shard, inputs, dtype=tf.float32)

      self.assertEqual(len(outputs_noshard), len(inputs))
      self.assertEqual(len(outputs_noshard), len(outputs_shard))

      tf.initialize_all_variables().run()
      input_value = np.random.randn(batch_size, input_size)
      feeds = dict((x, input_value) for x in inputs)
      values_noshard = sess.run(outputs_noshard, feed_dict=feeds)
      values_shard = sess.run(outputs_shard, feed_dict=feeds)
      state_values_noshard = sess.run(states_noshard, feed_dict=feeds)
      state_values_shard = sess.run(states_shard, feed_dict=feeds)
      self.assertEqual(len(values_noshard), len(values_shard))
      self.assertEqual(len(state_values_noshard), len(state_values_shard))
      for (v_noshard, v_shard) in zip(values_noshard, values_shard):
        self.assertAllClose(v_noshard, v_shard, atol=1e-3)
      for (s_noshard, s_shard) in zip(state_values_noshard, state_values_shard):
        self.assertAllClose(s_noshard, s_shard, atol=1e-3)
	def __init__ (self):
		# ******* PARAMS *********

		# total vocabulary size
		vocab_size = 50

		# one character at a time
		lstm_size = 2

		# will feed 50 chars sequentially
		num_steps = 40

		# only 1 batch for simplicity
		batch_size = 3

		# define is training
		is_training = True


		# ********* SET UP *********

		# make the lstm cell, with size lstm_size
		self.lstm_cell = rnn_cell.BasicLSTMCell (lstm_size, forget_bias=0.0)

		# if in training mode, add a dropout layer
		if is_training:
			self.lstm_cell = rnn_cell.DropoutWrapper (self.lstm_cell, output_keep_prob=0.5)

		# set initial state to zeroes
		self.initial_state = self.lstm_cell.zero_state(batch_size, tf.float32)

		# define inputs. has size batch_size X num_steps
		self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps])

		# define the embedding tensor
		initial = tf.truncated_normal ([batch_size, lstm_size], stddev=0.1)
		self.embedding = tf.Variable (initial)

		# get the inputs from embedded data
		self.inputs = tf.split (1, num_steps, tf.nn.embedding_lookup (self.embedding, self.input_data))
		self.inputs = [tf.squeeze (input_, [1]) for input_ in self.inputs]

		# define outputs
		self.outputs, self.states = rnn.rnn(self.lstm_cell, self.inputs, initial_state=self.initial_state)

		print self.outputs [0]
		print self.states [0]

		# reshape input into [batch_size * num_steps, lstm_size]
		output = tf.reshape(tf.concat(1, self.outputs), [-1, lstm_size])

		print output

		# ********* TRAINING **********

		# quit if not training
		if not is_training:
			return
Exemple #48
0
    def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size
        size = config.n_hidden
        num_steps = config.num_steps
        self._input_data = tf.placeholder(tf.float32,
                                          (batch_size, config.num_steps))
        self._targets = tf.placeholder(tf.float32, [batch_size, 1])
        lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=2.8)
        # lstm_cell = rnn_cell.LSTMCell(size, 1)
        # cell = lstm_cell
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        self._initial_state = cell.zero_state(batch_size, tf.float32)
        self._train_op = tf.no_op()
        self._result = -1

        weights_hidden = tf.constant(
            1.0, shape=[config.num_features, config.n_hidden])
        weights_hidden = tf.get_variable(
            "weights_hidden", [config.num_features, config.n_hidden])
        inputs = []
        for k in range(num_steps):
            nextitem = tf.matmul(
                tf.reshape(self._input_data[:, k],
                           [config.batch_size, config.num_features]),
                weights_hidden)
            inputs.append(nextitem)

        outputs, states = rnn.rnn(cell,
                                  inputs,
                                  initial_state=self._initial_state)
        #output = tf.reshape(tf.concat(1, outputs), [-1, config.n_hidden])

        #pred = tf.matmul(outputs[-1], tf.get_variable("weights_out", [config.n_hidden,1])) + tf.get_variable("bias_out", [1])

        output = tf.reshape(tf.concat(1, outputs[-1]), [-1, size])
        #pred = tf.matmul(output, tf.get_variable("weights_out", [config.n_hidden,1])) + tf.get_variable("bias_out", [1])
        pred = tf.sigmoid(
            tf.matmul(outputs[-1],
                      tf.get_variable("weights_out", [config.n_hidden, 1])) +
            tf.get_variable("bias_out", [1]))
        self._pred = pred

        self._final_state = states[-1]
        self._cost = cost = tf.square((pred[:, 0] - self.targets[:, 0]))
        self._result = tf.abs(pred[0, 0] - self.targets[0, 0])

        # self._cost = cost = tf.abs(pred[0, 0] - self.targets[0,0])

        if not config.is_training:
            return

        #optimizer = tf.train.GradientDescentOptimizer(learning_rate = config.learning_rate).minimize(cost)
        optimizer = tf.train.AdamOptimizer().minimize(cost)
        self._train_op = optimizer
        print("top ", self._train_op)
Exemple #49
0
    def get_pred(self, n_input, n_steps, input_val):
        """Perform forward pass and return output of UrlRnn."""
        input_val = Model.reshape_data(n_input, n_steps, input_val)

        outputs, _ = rnn.rnn(self.lstm_cell,
                             input_val,
                             initial_state=self.istate)

        return tf.nn.softmax(
            tf.matmul(outputs[-1], self.weights['out']) + self.biases['out'])
Exemple #50
0
    def _init_seq2seq(self, encoder_inputs, decoder_inputs, cell, feed_previous):

        def inference_loop_function(prev, _):
            prev = tf.nn.xw_plus_b(prev, self.w_softmax, self.b_softmax)
            return tf.to_float(tf.equal(prev, tf.reduce_max(prev, reduction_indices=[1], keep_dims=True)))

        loop_function = inference_loop_function if feed_previous else None

        with variable_scope.variable_scope('seq2seq'):
            _, final_enc_state = rnn.rnn(cell, encoder_inputs, dtype=dtypes.float32)
            return seq2seq.rnn_decoder(decoder_inputs, final_enc_state, cell, loop_function=loop_function)
    def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        # Slightly better results can be obtained with forget gate biases
        # initialized to 1 but the hyperparameters of the model would need to be
        # different than reported in the paper.
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
        if is_training and config.keep_prob < 1:
            lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=config.keep_prob)
        cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        with tf.device('/cpu:0'):
            embedding = tf.get_variable('embedding', [vocab_size, size])
            inputs = tf.nn.embedding_lookup(embedding, self._input_data)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        inputs = [
            tf.squeeze(input_, [1])
            for input_ in tf.split(1, num_steps, inputs)
        ]
        outputs, state = rnn.rnn(cell,
                                 inputs,
                                 initial_state=self._initial_state)

        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        softmax_w = tf.get_variable('softmax_w', [size, vocab_size])
        softmax_b = tf.get_variable('softmax_b', [vocab_size])
        self._logits = logits = tf.matmul(output, softmax_w) + softmax_b
        loss = tf.nn.seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(self._targets, [-1])],
            [tf.ones([batch_size * num_steps])])
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = state

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
Exemple #52
0
 def get_encoded_segment(self, segment, reuse):
     inputs = tf.split(
         0, len(segment),
         tf.nn.embedding_lookup(self._embedding_matrix, tf.pack(segment)))
     inputs = [tf.squeeze(input_, [0]) for input_ in inputs]
     with tf.variable_scope("encoder", reuse=reuse):
         encoder_outputs, encoder_states = rnn.rnn(
             self.encoder,
             inputs,
             initial_state=self._initial_encoder_state)
     return encoder_outputs[-1]
Exemple #53
0
def RNN(x, weight, biases):
    # shape of input x: [batch_size, num_steps, dim_input]
    x = tf.transpose(x, [1, 0, 2])
    x = tf.reshape(x, [-1, dim_input])
    x = tf.split(0, num_steps, x)

    lstm_cell = rnn_cell.BasicLSTMCell(dim_hidden, forget_bias=1.0)

    outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)

    return tf.matmul(outputs[-1], weight['out']+biases['out'])
Exemple #54
0
    def _make_graph(self):
        # Encode sequence.
        # TODO: MultilayerRNN?
        encoder_cell = util.GRUCell(self.input_dim, self.spec.policy_dims[0])
        _, self.encoder_states = rnn.rnn(encoder_cell,
                                         self.inputs,
                                         dtype=tf.float32,
                                         scope="encoder")
        assert len(self.encoder_states) == self.seq_length  # DEV

        # Reshape encoder states into an "attention states" tensor of shape
        # `batch_size * seq_length * policy_dim`.
        attn_states = tf.concat(
            1, [tf.expand_dims(state_t, 1) for state_t in self.inputs])

        # Build a simple GRU-powered recurrent decoder cell.
        decoder_cell = util.GRUCell(self.input_dim, self.spec.policy_dims[0])

        # Prepare dummy encoder input. This will only be used on the first
        # timestep; in subsequent timesteps, the `loop_function` we provide
        # will be used to dynamically calculate new input values.
        batch_size = tf.shape(self.inputs[0])[0]
        dec_inp_shape = tf.pack([batch_size, decoder_cell.input_size])
        dec_inp_dummy = tf.zeros(dec_inp_shape, dtype=tf.float32)
        dec_inp_dummy.set_shape((None, decoder_cell.input_size))
        dec_inp = [dec_inp_dummy] * self.seq_length

        # Build pointer-network decoder.
        self.a_pred, dec_states, dec_inputs = ptr_net_decoder(
            dec_inp,
            self.encoder_states[-1],
            attn_states,
            decoder_cell,
            loop_function=self._loop_function(),
            scope="decoder")
        # Store dynamically calculated inputs -- critic may want to use these
        self.decoder_inputs = dec_inputs
        # Again strip the initial state.
        self.decoder_states = dec_states[1:]

        # Use noiser to build exploratory rollouts.
        self.a_explore = self.noiser(self.inputs, self.a_pred)

        # Now "dereference" the soft pointers produced by the policy network.
        a_pred_deref = self._deref_rollout(self.a_pred)
        a_explore_deref = self._deref_rollout(self.a_explore)

        # Build main model: recurrently apply a critic over the entire rollout.
        _, self.critic_on, self.critic_on_track = self._critic(a_pred_deref)
        self.critic_off_pre, self.critic_off, self.critic_off_track = \
            self._critic(a_explore_deref, reuse=True)

        self._make_q_targets()
Exemple #55
0
def unidirectional_lstm(inputs, keep_prob, INPUT_SIZE, HIDDEN_SIZE,
                        SEQ_LENGTH):
    initializer = tf.random_uniform_initializer(-0.01, 0.01)
    cell = LSTMCell(HIDDEN_SIZE, INPUT_SIZE, initializer=initializer)
    inputs_ = [tf.nn.dropout(each, keep_prob) for each in inputs]
    outputs, _ = rnn(cell,
                     inputs_,
                     initial_state=None,
                     sequence_length=None,
                     dtype=tf.float32)

    return outputs
Exemple #56
0
    def __init__(self, args, deterministic=False):
        self.args = args

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        elif args.model == 'bn-lstm':
            cell_fn = BNLSTMCell
        else:
            raise Exception('model type not supported: {}'.format(args.model))

        deterministic = tf.Variable(deterministic, name='deterministic')  # when training, set to False; when testing, set to True
        if args.model == 'bn-lstm':
            cell = cell_fn(args.rnn_size, bn=args.bn_level, deterministic=deterministic)
        else:
            cell = cell_fn(args.rnn_size)
        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        self.input_data = tf.placeholder(tf.int64, [None, args.seq_length])
        # self.targets = tf.placeholder(tf.int64, [None, args.seq_length])  # seq2seq model
        self.targets = tf.placeholder(tf.int64, [None, ])  # target is class label
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('embeddingLayer'):
            with tf.device('/cpu:0'):
                W = tf.get_variable('W', [args.vocab_size, args.rnn_size])
                embedded = tf.nn.embedding_lookup(W, self.input_data)

                # shape: (batch_size, seq_length, cell.input_size) => (seq_length, batch_size, cell.input_size)
                inputs = tf.split(1, args.seq_length, embedded)
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        outputs, last_state = rnn.rnn(cell, inputs, self.initial_state, scope='rnnLayer')

        with tf.variable_scope('softmaxLayer'):
            softmax_w = tf.get_variable('w', [args.rnn_size, args.label_size])
            softmax_b = tf.get_variable('b', [args.label_size])
            logits = tf.matmul(outputs[-1], softmax_w) + softmax_b
            self.probs = tf.nn.softmax(logits)

        # self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, self.targets))  # Softmax loss
        self.cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, self.targets))  # Softmax loss
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.cost)  # Adam Optimizer

        self.correct_pred = tf.equal(tf.argmax(self.probs, 1), self.targets)
        self.correct_num = tf.reduce_sum(tf.cast(self.correct_pred, tf.float32))
        self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32))
Exemple #57
0
def rnn_model(X, init_state, lstm_size, slicing_tensors):
    # X, input shape: (batch_size, input_vec_size, time_step_size)
    # print "X shape", X.get_shape().as_list()
    XT = tf.transpose(X, [1, 0, 2])  # permute time_step_size and batch_size
    # XT shape: (input_vec_size, batch_szie, time_step_size)
    # print "XT shape", XT.get_shape().as_list()

    XR = tf.reshape(
        XT,
        [-1, lstm_size])  # each row has input for each lstm cell (lstm_size)
    # XR shape: (input vec_size, batch_size)
    # print sess.run(num_steps)
    # print "XR shape", XR.get_shape().as_list()

    X_split = tf.split(0, n_lstm_steps,
                       XR)  # split them to time_step_size (28 arrays)
    # Each array shape: (batch_size, input_vec_size)
    # print "X_split"
    # print len(X_split)
    # print X_split

    # Make lstm with lstm_size (each input vector size)
    lstm = rnn_cell.BasicLSTMCell(lstm_size, forget_bias=1.0)

    # Get lstm cell output, time_step_size (28) arrays with lstm_size output: (batch_size, lstm_size)
    outputs, _states = rnn.rnn(lstm, X_split, initial_state=init_state)
    # print  "outputs", outputs[0].get_shape()
    outputs = tf.reshape(tf.concat(0, outputs),
                         [n_lstm_steps, batch_size, dim_hidden])
    # Linear activation is NOT REQUIRED!!
    # Get the last output.
    # print "outputs"
    # print len(outputs)
    # print outputs

    # Slicing the appropriate output vectors from the <outputs>
    # sliced_outputs = [tf.slice(outputs[break_points[i]-1], slicing_lengths[i][0], slicing_lengths[i][1]) for i in range(batch_size)]
    slicing_tensors = [
        tf.squeeze(tsr) for tsr in tf.split(0, batch_size, slicing_tensors)
    ]
    # print  "slicing_tensors", slicing_tensors[0].get_shape()
    sliced_outputs = [
        tf.slice(outputs, begin=tensor, size=[1, 1, dim_hidden])
        for tensor in slicing_tensors
    ]
    # for begin,size in slicing_lengths:
    # print tf.slice(outputs, begin, size)

    # return outputs[-1], lstm.state_size # State size to initialize the state
    # return tf.squeeze(tf.concat(0, sliced_outputs)), lstm.state_size
    return sliced_outputs, lstm.state_size
    def __init__(self, config):
        self._config = config
        # Create placeholders for the input and the targets
        self._input_data = tf.placeholder(
            tf.int32, [config.batch_size, config.num_steps])
        self._targets = tf.placeholder(tf.int32,
                                       [config.batch_size, config.num_steps])
        self._actual_seq_lengths = tf.placeholder(tf.int32,
                                                  [config.batch_size])
        self._prediction = tf.placeholder(
            tf.int32, [config.batch_size, config.num_steps])
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(
            config.hidden_size)  # Create a basic LSTM cell
        # Now replicate the LSTM cell to create layers for a deep network
        cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        # Make the initial state operator available
        self._initial_state = cell.zero_state(config.batch_size, tf.float32)

        # Map the inputs to their current embedding vectors
        # Embedding lookup must happen on the CPU as it is not currently supported on GPU
        with tf.device("/cpu:0"):
            embedding = tf.get_variable(
                "embedding", [config.num_songs, config.embedding_size])
            inputs = tf.nn.embedding_lookup(embedding, self._input_data)

        inputs = [
            tf.squeeze(input_, [1])
            for input_ in tf.split(1, config.num_steps, inputs)
        ]
        outputs, state = rnn.rnn(cell,
                                 inputs,
                                 initial_state=self._initial_state,
                                 sequence_length=self._actual_seq_lengths)

        output = tf.reshape(tf.concat(1, outputs), [-1, config.hidden_size])
        softmax_w = tf.get_variable("softmax_w",
                                    [config.hidden_size, config.num_songs])
        softmax_b = tf.get_variable("softmax_b", [config.num_songs])
        logits = tf.matmul(output, softmax_w) + softmax_b

        # Compute the cross-entropy loss of the sequence by comparing each prediction with each target
        loss = tf.nn.seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(self._targets, [-1])],
            [tf.ones([config.batch_size * config.num_steps])])
        # Added prediction
        self._prediction = logits

        # Expose the cost and final_state
        self._cost = tf.reduce_sum(loss) / config.batch_size
        self._final_state = state