Beispiel #1
0
    def setup_target_encoder(self):
        """
        This sets up an encoder that works on
        target sentence and produce a single label in the end
        encoder has attentions

        Returns
        -------
        """
        if self.num_layers > 1:
            self.tgt_encoder_cell = rnn_cell.GRUCell(self.size, input_size=self.embedding[1])
        self.attn_cell = GRUCellAttn(self.size, self.encoder_output, scope="EncoderAttnCell")

        out = self.decoder_inputs

        with vs.variable_scope("TgtEncoder"):
            inp = self.decoder_inputs
            for i in xrange(self.num_layers - 1):
                with vs.variable_scope("TgtEncoderCell%d" % i) as scope:
                    out, state_output = rnn.dynamic_rnn(self.tgt_encoder_cell, self.dropout(inp), time_major=False,
                                                        dtype=dtypes.float32, sequence_length=self.tgt_steps,
                                                        scope=scope, initial_state=self.tgt_encoder_state_output[i])
                    inp = out
                    self.tgt_encoder_state_output.append(state_output)

            with vs.variable_scope("TgtEncoderAttnCell") as scope:
                out, state_output = rnn.dynamic_rnn(self.attn_cell, self.dropout(inp), time_major=False,
                                                    dtype=dtypes.float32, sequence_length=self.tgt_steps,
                                                    scope=scope, initial_state=self.tgt_encoder_state_output[i + 1])
                self.tgt_encoder_output = out
                self.tgt_encoder_state_output.append(state_output)
Beispiel #2
0
 def _composition_function(self, inputs, length, init_state=None):
     if self._composition == "GRU":
         cell = GRUCell(self._size)
         return dynamic_rnn(cell, inputs, sequence_length=length, time_major=True,
                            initial_state=init_state, dtype=tf.float32)[0]
     elif self._composition == "LSTM":
         cell = BasicLSTMCell(self._size)
         init_state = tf.concat(1, [tf.zeros_like(init_state, tf.float32), init_state]) if init_state else None
         outs = dynamic_rnn(cell, inputs, sequence_length=length, time_major=True,
                            initial_state=init_state, dtype=tf.float32)[0]
         return outs
     elif self._composition == "BiGRU":
         cell = GRUCell(self._size // 2, self._size)
         init_state_fw, init_state_bw = tf.split(1, 2, init_state) if init_state else (None, None)
         with tf.variable_scope("forward"):
             fw_outs = dynamic_rnn(cell, inputs, sequence_length=length, time_major=True,
                                   initial_state=init_state_fw, dtype=tf.float32)[0]
         with tf.variable_scope("backward"):
             rev_inputs = tf.reverse_sequence(tf.pack(inputs), length, 0, 1)
             rev_inputs = [tf.reshape(x, [-1, self._size]) for x in tf.split(0, len(inputs), rev_inputs)]
             bw_outs = dynamic_rnn(cell, rev_inputs, sequence_length=length, time_major=True,
                                   initial_state=init_state_bw, dtype=tf.float32)[0]
             bw_outs = tf.reverse_sequence(tf.pack(bw_outs), length, 0, 1)
             bw_outs = [tf.reshape(x, [-1, self._size]) for x in tf.split(0, len(inputs), bw_outs)]
         return [tf.concat(1, [fw_out, bw_out]) for fw_out, bw_out in zip(fw_outs, bw_outs)]
     else:
         raise NotImplementedError("Other compositions not implemented yet.")
Beispiel #3
0
    def setup_decoder(self):
        """
        This sets up a decoder

        but we may need a double-encoder

        Returns
        -------
        """
        if self.num_layers > 1:
            self.decoder_cell = rnn_cell.GRUCell(self.size, input_size=self.embedding[1])
        self.attn_cell = GRUCellAttn(self.size, self.encoder_output, scope="DecoderAttnCell")

        out = self.decoder_inputs

        with vs.variable_scope("Decoder"):
            inp = self.decoder_inputs
            for i in xrange(self.num_layers - 1):
                with vs.variable_scope("DecoderCell%d" % i) as scope:
                    out, state_output = rnn.dynamic_rnn(self.decoder_cell, self.dropout(inp), time_major=False,
                                                        dtype=dtypes.float32, sequence_length=self.tgt_steps,
                                                        scope=scope, initial_state=self.decoder_state_input[i])
                    inp = out
                    self.decoder_state_output.append(state_output)

            with vs.variable_scope("DecoderAttnCell") as scope:
                out, state_output = rnn.dynamic_rnn(self.attn_cell, self.dropout(inp), time_major=False,
                                                    dtype=dtypes.float32, sequence_length=self.tgt_steps,
                                                    scope=scope, initial_state=self.decoder_state_input[i + 1])
                self.decoder_output = out
                self.decoder_state_output.append(state_output)
Beispiel #4
0
 def testBatchSizeFromInput(self):
   cell = Plus1RNNCell()
   # With static batch size
   inputs = array_ops.placeholder(dtypes.float32, shape=(3, 4, 5))
   # - Without initial_state
   outputs, state = rnn.dynamic_rnn(cell, inputs, dtype=dtypes.float32)
   self.assertEqual(3, outputs.shape[0].value)
   self.assertEqual(3, state.shape[0].value)
   # - With initial_state
   outputs, state = rnn.dynamic_rnn(
       cell,
       inputs,
       initial_state=array_ops.placeholder(dtypes.float32, shape=(3, 5)))
   self.assertEqual(3, outputs.shape[0].value)
   self.assertEqual(3, state.shape[0].value)
   # Without static batch size
   inputs = array_ops.placeholder(dtypes.float32, shape=(None, 4, 5))
   # - Without initial_state
   outputs, state = rnn.dynamic_rnn(cell, inputs, dtype=dtypes.float32)
   self.assertEqual(None, outputs.shape[0].value)
   self.assertEqual(None, state.shape[0].value)
   # - With initial_state
   outputs, state = rnn.dynamic_rnn(
       cell,
       inputs,
       initial_state=array_ops.placeholder(dtypes.float32, shape=(None, 5)))
   self.assertEqual(None, outputs.shape[0].value)
   self.assertEqual(None, state.shape[0].value)
Beispiel #5
0
def sentence_embedding_rnn(_encoder_inputs, vocab_size, cell, 
	embedding_size, mask=None, dtype=dtypes.float32, scope=None, reuse_scop=None):
	"""
	
	"""
	with variable_scope.variable_scope("embedding_rnn", reuse=reuse_scop):
		# encoder_cell = rnn_cell.EmbeddingWrapper(
		# 		cell, embedding_classes=vocab_size,
		# 		embedding_size=embedding_size)
		# Divde encoder_inputs by given input_mask
		if mask != None:
			encoder_inputs = [[] for _ in mask]
			_mask = 0
			for num in range(len(_encoder_inputs)):
				encoder_inputs[_mask].append(_encoder_inputs[num])
				if num == mask[_mask]:
					_mask += 1
		else:
			encoder_inputs = []
			encoder_inputs.append(_encoder_inputs)
		encoder_state = None	 
		encoder_states = []
		for encoder_input in encoder_inputs:
			if encoder_state == []:
				_, encoder_state = rnn.dynamic_rnn(encoder_cell, encoder_input, dtype=dtype)
			else:
				_, encoder_state = rnn.dynamic_rnn(encoder_cell, encoder_input, encoder_state, dtype=dtype)
			encoder_states.append(encoder_state)
		return encoder_states
Beispiel #6
0
  def testBlockGRUToGRUCellMultiStep(self):
    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
      batch_size = 2
      cell_size = 3
      input_size = 3
      time_steps = 4

      # Random initializers.
      seed = 1994
      initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=seed)
      np.random.seed(seed)

      # Inputs
      concat_x = array_ops.placeholder(
          dtypes.float32, shape=(time_steps, batch_size, input_size))
      h = array_ops.zeros([batch_size, cell_size])

      # Values for the inputs.
      x_values = np.random.rand(time_steps, batch_size, input_size)
      h_value = np.random.rand(batch_size, cell_size)

      # Output from the block GRU cell implementation.
      with vs.variable_scope("block", initializer=initializer):
        cell = gru_ops.GRUBlockCell(cell_size)
        outputs_dynamic, state_dynamic = rnn.dynamic_rnn(
            cell,
            inputs=concat_x,
            initial_state=h,
            time_major=True,
            dtype=dtypes.float32)
        feeds = {concat_x: x_values, h: h_value}
        sess.run([variables.global_variables_initializer()])
        block_res = sess.run([outputs_dynamic, state_dynamic], feeds)

      # Output from the basic GRU cell implementation.
      with vs.variable_scope("basic", initializer=initializer):
        cell = rnn_cell.GRUCell(cell_size)
        outputs_dynamic, state_dynamic = rnn.dynamic_rnn(
            cell,
            inputs=concat_x,
            initial_state=h,
            time_major=True,
            dtype=dtypes.float32)
        feeds = {concat_x: x_values, h: h_value}
        sess.run([variables.global_variables_initializer()])
        basic_res = sess.run([outputs_dynamic, state_dynamic], feeds)

      # Check the lengths of the outputs_dynamic, and states.
      self.assertEqual(len(block_res), len(basic_res))
      self.assertEqual(len(block_res[0]), len(basic_res[0]))
      self.assertEqual(len(block_res[1]), len(basic_res[1]))

      # Check the outputs_dynamic values.
      for block_output, basic_output in zip(block_res[0], basic_res[0]):
        self.assertAllClose(block_output, basic_output)

      # Check the state_dynamic value.
      self.assertAllClose(block_res[1], block_res[1])
Beispiel #7
0
 def testInvalidSequenceLengthShape(self):
   cell = Plus1RNNCell()
   inputs = [array_ops.placeholder(dtypes.float32, shape=(3, 4))]
   with self.assertRaisesRegexp(ValueError, "must be a vector"):
     rnn.dynamic_rnn(
         cell,
         array_ops.stack(inputs),
         dtype=dtypes.float32,
         sequence_length=[[4]])
Beispiel #8
0
def inference_gru_block_vs_gru_cell(batch_size,
                                    cell_size,
                                    input_size,
                                    time_steps,
                                    use_gpu=False,
                                    iters=30):
  """Benchmark inference speed between GRUBlockCell vs GRUCell."""
  ops.reset_default_graph()
  with session.Session(graph=ops.Graph()) as sess:
    with benchmarking.device(use_gpu):

      # Random initializers.
      seed = 1994
      initializer = init_ops.random_uniform_initializer(-1, 1, seed=seed)
      np.random.seed(seed)

      # Inputs
      concat_x = vs.get_variable("concat_x",
                                 [time_steps, batch_size, input_size])
      h = vs.get_variable("h", [batch_size, cell_size])

      # Output from the basic GRU cell implementation.
      with vs.variable_scope("basic", initializer=initializer):
        cell = rnn_cell.GRUCell(cell_size)
        outputs_dynamic, _ = rnn.dynamic_rnn(
            cell,
            inputs=concat_x,
            initial_state=h,
            time_major=True,
            dtype=dtypes.float32)
        sess.run([variables.global_variables_initializer()])
        basic_time_inference = benchmarking.seconds_per_run(
            outputs_dynamic, sess, iters)

      # Output from the block GRU cell implementation.
      with vs.variable_scope("block", initializer=initializer):
        cell = gru_ops.GRUBlockCell(cell_size)
        outputs_dynamic, _ = rnn.dynamic_rnn(
            cell,
            inputs=concat_x,
            initial_state=h,
            time_major=True,
            dtype=dtypes.float32)
        sess.run([variables.global_variables_initializer()])
        block_time_inference = benchmarking.seconds_per_run(
            outputs_dynamic, sess, iters)

    performance_inference = (basic_time_inference - block_time_inference
                            ) * 100 / basic_time_inference
    print(",".join([
        str(batch_size), str(cell_size), str(input_size), str(time_steps), str(
            use_gpu), str(basic_time_inference), str(block_time_inference), str(
                performance_inference)
    ]))

    return basic_time_inference, block_time_inference
Beispiel #9
0
def crf_decode(potentials, transition_params, sequence_length):
  """Decode the highest scoring sequence of tags in TensorFlow.

  This is a function for tensor.

  Args:
    potentials: A [batch_size, max_seq_len, num_tags] tensor of
              unary potentials.
    transition_params: A [num_tags, num_tags] matrix of
              binary potentials.
    sequence_length: A [batch_size] vector of true sequence lengths.

  Returns:
    decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`.
                Contains the highest scoring tag indicies.
    best_score: A [batch_size] vector, containing the score of `decode_tags`.
  """
  # For simplicity, in shape comments, denote:
  # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output).
  num_tags = potentials.get_shape()[2].value

  # Computes forward decoding. Get last score and backpointers.
  crf_fwd_cell = CrfDecodeForwardRnnCell(transition_params)
  initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1])
  initial_state = array_ops.squeeze(initial_state, axis=[1])      # [B, O]
  inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1])   # [B, T-1, O]
  backpointers, last_score = rnn.dynamic_rnn(
      crf_fwd_cell,
      inputs=inputs,
      sequence_length=sequence_length - 1,
      initial_state=initial_state,
      time_major=False,
      dtype=dtypes.int32)             # [B, T - 1, O], [B, O]
  backpointers = gen_array_ops.reverse_sequence(
      backpointers, sequence_length - 1, seq_dim=1)               # [B, T-1, O]

  # Computes backward decoding. Extract tag indices from backpointers.
  crf_bwd_cell = CrfDecodeBackwardRnnCell(num_tags)
  initial_state = math_ops.cast(math_ops.argmax(last_score, axis=1),
                                dtype=dtypes.int32)               # [B]
  initial_state = array_ops.expand_dims(initial_state, axis=-1)   # [B, 1]
  decode_tags, _ = rnn.dynamic_rnn(
      crf_bwd_cell,
      inputs=backpointers,
      sequence_length=sequence_length - 1,
      initial_state=initial_state,
      time_major=False,
      dtype=dtypes.int32)           # [B, T - 1, 1]
  decode_tags = array_ops.squeeze(decode_tags, axis=[2])           # [B, T - 1]
  decode_tags = array_ops.concat([initial_state, decode_tags], axis=1)  # [B, T]
  decode_tags = gen_array_ops.reverse_sequence(
      decode_tags, sequence_length, seq_dim=1)                     # [B, T]

  best_score = math_ops.reduce_max(last_score, axis=1)             # [B]
  return decode_tags, best_score
Beispiel #10
0
 def testInvalidSequenceLengthShape(self):
   cell = Plus1RNNCell()
   if context.in_graph_mode():
     inputs = [array_ops.placeholder(dtypes.float32, shape=(3, 4))]
   else:
     inputs = [constant_op.constant(np.ones((3, 4)))]
   with self.assertRaisesRegexp(ValueError, "must be a vector"):
     rnn.dynamic_rnn(
         cell,
         array_ops.stack(inputs),
         dtype=dtypes.float32,
         sequence_length=[[4]])
Beispiel #11
0
def RNN(inputs, lens, name, reuse):
    print ("Building network " + name)
    # Define weights
    inputs = tf.gather(one_hots, inputs)
    weights = tf.Variable(tf.random_normal([__n_hidden, n_output]), name=name+"_weights")
    biases = tf.Variable(tf.random_normal([n_output]), name=name+"_biases")

    # Define a lstm cell with tensorflow

    enc_outputs, enc_states = rnn.dynamic_rnn(
        __cell_kind(__n_hidden),
        inputs,
        sequence_length=lens,
        dtype=tf.float32,
        scope=name,
        time_major=False)

    dec_outputs, dec_states = rnn.dynamic_rnn(
        __cell_kind(__n_hidden),
        enc_outputs,
        sequence_length=lens,
        dtype=tf.float32,
        scope=name,
        time_major=False)

    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (__batch_size, __n_steps, n_input)
    # Required shape: '__n_steps' tensors list of shape (__batch_size, n_input)

    '''dec_outputs, dec_states = rnn.rnn(
        __cell_kind(__n_hidden),
        tf.unpack(tf.transpose(inputs, [1, 0, 2])),
        sequence_length=lens,
        dtype=tf.float32,
        scope=name)
    outputs = tf.transpose(tf.pack(outputs), [1, 0, 2])'''
    print ("Done building network " + name)

    # Asserts are actually documentation: they can't be out of date
    assert dec_outputs.get_shape() == (__batch_size, __n_steps, __n_hidden)
    # Linear activation, using rnn output for each char
    # Reshaping here for a `batch` matrix multiply
    # It's faster than `batch_matmul` probably because it can guarantee a
    # static shape
    outputs = tf.reshape(dec_outputs, [__batch_size * __n_steps, __n_hidden])
    finals = tf.matmul(outputs, weights)
    finals = tf.reshape(finals, [__batch_size, __n_steps, n_output]) + biases
    return finals[:, :__n_steps-1, :]
  def testCustomizedAttention(self):
    batch_size = 2
    max_time = 3
    num_units = 2
    memory = constant_op.constant([[[1., 1.], [2., 2.], [3., 3.]],
                                   [[4., 4.], [5., 5.], [6., 6.]]])
    memory_sequence_length = constant_op.constant([3, 2])
    attention_mechanism = wrapper.BahdanauAttention(num_units, memory,
                                                    memory_sequence_length)

    # Sets all returned values to be all ones.
    def _customized_attention(unused_attention_mechanism, unused_cell_output,
                              unused_attention_state, unused_attention_layer):
      """Customized attention.

      Returns:
        attention: `Tensor` of shape [batch_size, num_units], attention output.
        alignments: `Tensor` of shape [batch_size, max_time], sigma value for
          each input memory (prob. function of input keys).
        next_attention_state: A `Tensor` representing the next state for the
          attention.
      """
      attention = array_ops.ones([batch_size, num_units])
      alignments = array_ops.ones([batch_size, max_time])
      next_attention_state = alignments
      return attention, alignments, next_attention_state

    attention_cell = wrapper.AttentionWrapper(
        rnn_cell.LSTMCell(2),
        attention_mechanism,
        attention_layer_size=None,  # don't use attention layer.
        output_attention=False,
        alignment_history=(),
        attention_fn=_customized_attention,
        name='attention')
    self.assertEqual(num_units, attention_cell.output_size)

    initial_state = attention_cell.zero_state(
        batch_size=2, dtype=dtypes.float32)
    source_input_emb = array_ops.ones([2, 3, 2])
    source_input_length = constant_op.constant([3, 2])

    # 'state' is a tuple of
    # (cell_state, h, attention, alignments, alignment_history, attention_state)
    output, state = rnn.dynamic_rnn(
        attention_cell,
        inputs=source_input_emb,
        sequence_length=source_input_length,
        initial_state=initial_state,
        dtype=dtypes.float32)

    with self.session() as sess:
      sess.run(variables.global_variables_initializer())
      output_value, state_value = sess.run([output, state], feed_dict={})
      self.assertAllEqual(np.array([2, 3, 2]), output_value.shape)
      self.assertAllClose(np.array([[1., 1.], [1., 1.]]), state_value.attention)
      self.assertAllClose(
          np.array([[1., 1., 1.], [1., 1., 1.]]), state_value.alignments)
      self.assertAllClose(
          np.array([[1., 1., 1.], [1., 1., 1.]]), state_value.attention_state)
Beispiel #13
0
def crf_log_norm(inputs, sequence_lengths, transition_params):
  """Computes the normalization for a CRF.

  Args:
    inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials
        to use as input to the CRF layer.
    sequence_lengths: A [batch_size] vector of true sequence lengths.
    transition_params: A [num_tags, num_tags] transition matrix.
  Returns:
    log_norm: A [batch_size] vector of normalizers for a CRF.
  """
  # Split up the first and rest of the inputs in preparation for the forward
  # algorithm.
  first_input = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1])
  first_input = array_ops.squeeze(first_input, [1])
  rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1])

  # Compute the alpha values in the forward algorithm in order to get the
  # partition function.
  forward_cell = CrfForwardRnnCell(transition_params)
  _, alphas = rnn.dynamic_rnn(
      cell=forward_cell,
      inputs=rest_of_input,
      sequence_length=sequence_lengths - 1,
      initial_state=first_input,
      dtype=dtypes.float32)
  log_norm = math_ops.reduce_logsumexp(alphas, [1])
  return log_norm
Beispiel #14
0
  def testRNNWithKerasGRUCell(self):
    with self.cached_session() as sess:
      input_shape = 10
      output_shape = 5
      timestep = 4
      batch = 100
      (x_train, y_train), _ = testing_utils.get_test_data(
          train_samples=batch,
          test_samples=0,
          input_shape=(timestep, input_shape),
          num_classes=output_shape)
      y_train = keras.utils.to_categorical(y_train)
      cell = keras.layers.GRUCell(output_shape)

      inputs = array_ops.placeholder(
          dtypes.float32, shape=(None, timestep, input_shape))
      predict = array_ops.placeholder(
          dtypes.float32, shape=(None, output_shape))

      outputs, state = rnn.dynamic_rnn(
          cell, inputs, dtype=dtypes.float32)
      self.assertEqual(outputs.shape.as_list(), [None, timestep, output_shape])
      self.assertEqual(state.shape.as_list(), [None, output_shape])
      loss = losses.softmax_cross_entropy(predict, state)
      train_op = training.GradientDescentOptimizer(0.001).minimize(loss)

      sess.run([variables_lib.global_variables_initializer()])
      _, outputs, state = sess.run(
          [train_op, outputs, state], {inputs: x_train, predict: y_train})

      self.assertEqual(len(outputs), batch)
      self.assertEqual(len(state), batch)
Beispiel #15
0
  def testTensorArrayStateIsAccepted(self):
    cell = TensorArrayStateRNNCell()
    in_graph_mode = context.in_graph_mode()

    if in_graph_mode:
      inputs = array_ops.placeholder(dtypes.float32, shape=(1, 4, 1))
    else:
      inputs = np.array([[[1], [2], [3], [4]]], dtype=np.float32)

    with self.test_session() as sess:
      outputs, state = rnn.dynamic_rnn(
          cell, inputs, dtype=dtypes.float32, sequence_length=[4])
      state = (state[0], state[1].stack())
      if in_graph_mode:
        outputs, state = sess.run(
            [outputs, state], feed_dict={
                inputs: [[[1], [2], [3], [4]]]
            })

    if in_graph_mode:
      self.assertAllEqual(outputs, np.array([[[1], [2], [3], [4]]]))
      self.assertEqual(state[0], 4)
      self.assertAllEqual(state[1], np.array([[[1]], [[2]], [[3]], [[4]]]))
    else:
      self.assertAllEqual(outputs.numpy(), np.array([[[1], [2], [3], [4]]]))
      self.assertEqual(state[0].numpy(), 4)
      self.assertAllEqual(state[1].numpy(),
                          np.array([[[1]], [[2]], [[3]], [[4]]]))
Beispiel #16
0
  def testDeviceWrapperDynamicExecutionNodesAreAllProperlyLocated(self):
    if not test.is_gpu_available():
      # Can't perform this test w/o a GPU
      return

    with self.test_session(use_gpu=True) as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        x = array_ops.zeros([1, 1, 3])
        cell = rnn_cell_impl.DeviceWrapper(rnn_cell_impl.GRUCell(3), "/gpu:0")
        with ops.device("/cpu:0"):
          outputs, _ = rnn.dynamic_rnn(
              cell=cell, inputs=x, dtype=dtypes.float32)
        run_metadata = config_pb2.RunMetadata()
        opts = config_pb2.RunOptions(
            trace_level=config_pb2.RunOptions.FULL_TRACE)

        sess.run([variables_lib.global_variables_initializer()])
        _ = sess.run(outputs, options=opts, run_metadata=run_metadata)

      step_stats = run_metadata.step_stats
      ix = 0 if "gpu" in step_stats.dev_stats[0].device else 1
      gpu_stats = step_stats.dev_stats[ix].node_stats
      cpu_stats = step_stats.dev_stats[1 - ix].node_stats
      self.assertFalse([s for s in cpu_stats if "gru_cell" in s.node_name])
      self.assertTrue([s for s in gpu_stats if "gru_cell" in s.node_name])
Beispiel #17
0
  def testKerasAndTFRNNLayerOutputComparison(self):
    input_shape = 10
    output_shape = 5
    timestep = 4
    batch = 20
    (x_train, _), _ = testing_utils.get_test_data(
        train_samples=batch,
        test_samples=0,
        input_shape=(timestep, input_shape),
        num_classes=output_shape)
    fix_weights_generator = keras.layers.SimpleRNNCell(output_shape)
    fix_weights_generator.build((None, input_shape))
    weights = fix_weights_generator.get_weights()

    with self.session(graph=ops_lib.Graph()) as sess:
      inputs = array_ops.placeholder(
          dtypes.float32, shape=(None, timestep, input_shape))
      cell = keras.layers.SimpleRNNCell(output_shape)
      tf_out, tf_state = rnn.dynamic_rnn(
          cell, inputs, dtype=dtypes.float32)
      cell.set_weights(weights)
      [tf_out, tf_state] = sess.run([tf_out, tf_state], {inputs: x_train})
    with self.session(graph=ops_lib.Graph()) as sess:
      k_input = keras.Input(shape=(timestep, input_shape),
                            dtype=dtypes.float32)
      cell = keras.layers.SimpleRNNCell(output_shape)
      layer = keras.layers.RNN(cell, return_sequences=True, return_state=True)
      keras_out = layer(k_input)
      cell.set_weights(weights)
      k_out, k_state = sess.run(keras_out, {k_input: x_train})
    self.assertAllClose(tf_out, k_out)
    self.assertAllClose(tf_state, k_state)
  def _create_equivalent_canonical_rnn(self,
                                       cudnn_model,
                                       inputs,
                                       use_block_cell,
                                       scope="rnn"):
    if cudnn_model.rnn_mode is not "lstm":
      raise ValueError("%s is not supported!" % cudnn_model.rnn_mode)

    num_units = cudnn_model.num_units
    num_layers = cudnn_model.num_layers

    # To reuse cuDNN-trained models, must set
    # forget_bias, clip_cell = 0, False
    # In LSTMCell and LSTMBlockCell, forget_bias is added in addition to learned
    # bias, whereas cuDNN does not apply the additional bias.
    if use_block_cell:
      # pylint: disable=g-long-lambda
      single_cell = lambda: lstm_ops.LSTMBlockCell(num_units, forget_bias=0,
                                                   clip_cell=False)
      # pylint: enable=g-long-lambda
    else:
      single_cell = lambda: rnn_cell_impl.LSTMCell(num_units, forget_bias=0)
    cell = rnn_cell_impl.MultiRNNCell(
        [single_cell() for _ in range(num_layers)])
    return rnn.dynamic_rnn(
        cell, inputs, dtype=dtypes.float32, time_major=True, scope=scope)
def _create_multi_lstm_cell_ops(batch_size, num_units, input_depth,
                                num_layers, max_time, compiled):
  with variable_scope.variable_scope(
      "root",
      initializer=init_ops.random_uniform_initializer(-0.1, 0.1, seed=2)):
    inputs = variable_scope.get_variable(
        "inputs", initializer=random_ops.random_uniform(
            (max_time, batch_size, input_depth), seed=1))
    maybe_xla = lambda c: rnn_cell.CompiledWrapper(c) if compiled else c
    cell = core_rnn_cell_impl.MultiRNNCell(
        [maybe_xla(core_rnn_cell_impl.LSTMCell(num_units))
         for _ in range(num_layers)])
    initial_state = cell.zero_state(
        batch_size=batch_size, dtype=dtypes.float32)
    outputs, final_state = rnn.dynamic_rnn(
        cell=cell, inputs=inputs, initial_state=initial_state,
        time_major=True)
    flat_final_state = nest.flatten(final_state)
    trainable_variables = variables.trainable_variables()
    outputs_grad = gradients_impl.gradients(
        [outputs],
        trainable_variables + [inputs] + nest.flatten(initial_state))
    final_state_grad = gradients_impl.gradients(
        flat_final_state,
        trainable_variables + [inputs] + nest.flatten(initial_state))

    return {"outputs": outputs,
            "final_state": flat_final_state,
            "outputs_grad": outputs_grad,
            "final_state_grad": final_state_grad}
def RNN(inputs, lens, name, reuse):
    print ("Building network " + name)
    # Define weights
    weights = tf.Variable(tf.random_normal([__n_hidden, n_output]), name=name+"_weights")
    biases = tf.Variable(tf.random_normal([n_output]), name=name+"_biases")

    # Define a lstm cell with tensorflow
    outputs, states = rnn.dynamic_rnn(
        __cell_kind(__n_hidden),
        inputs,
        sequence_length=lens,
        dtype=tf.float32,
        scope=name,
        time_major=False)
    assert outputs.get_shape() == (__batch_size, __n_steps, __n_hidden)
    print ("Done building network " + name)

    #
    # All these asserts are actually documentation: they can't be out of date
    #

    outputs = tf.expand_dims(outputs, 2)
    assert outputs.get_shape() == (__batch_size, __n_steps, 1, __n_hidden)

    tiled_weights = tf.tile(tf.expand_dims(tf.expand_dims(weights, 0), 0), [__batch_size, __n_steps, 1, 1])
    assert tiled_weights.get_shape() == (__batch_size, __n_steps, __n_hidden, n_output)
    #assert tiled_weights.get_shape() == (1, 1, __n_hidden, n_output)
    # Linear activation, using rnn inner loop output for each char
    finals = tf.batch_matmul(outputs, tiled_weights) + biases
    assert finals.get_shape() == (__batch_size, __n_steps, 1, n_output)
    return tf.squeeze(finals)
  def __call__(self,
               inputs,
               initial_state=None,
               dtype=None,
               sequence_length=None,
               scope=None):
    is_list = isinstance(inputs, list)
    if self._use_dynamic_rnn:
      if is_list:
        inputs = array_ops.pack(inputs)
      outputs, state = rnn.dynamic_rnn(
          self._cell,
          inputs,
          sequence_length=sequence_length,
          initial_state=initial_state,
          dtype=dtype,
          time_major=True,
          scope=scope)
      if is_list:
        # Convert outputs back to list
        outputs = array_ops.unpack(outputs)
    else:  # non-dynamic rnn
      if not is_list:
        inputs = array_ops.unpack(inputs)
      outputs, state = rnn.rnn(self._cell,
                               inputs,
                               initial_state=initial_state,
                               dtype=dtype,
                               sequence_length=sequence_length,
                               scope=scope)
      if not is_list:
        # Convert outputs back to tensor
        outputs = array_ops.pack(outputs)

    return outputs, state
Beispiel #22
0
  def bidirectional_rnn(self, cell, inputs, lengths, scope=None):
    name = scope.name or "BiRNN"
    # Forward direction
    with vs.variable_scope(name + "_FW") as fw_scope:
      output_fw, output_state_fw = rnn.dynamic_rnn(cell, inputs, time_major=True, dtype=dtypes.float32,
                                                   sequence_length=lengths, scope=fw_scope)
    # Backward direction
    with vs.variable_scope(name + "_BW") as bw_scope:
      output_bw, output_state_bw = rnn.dynamic_rnn(cell, inputs, time_major=True, dtype=dtypes.float32,
                                                   sequence_length=lengths, scope=bw_scope)
    output_bw = tf.reverse_sequence(output_bw, tf.to_int64(lengths), seq_dim=0, batch_dim=1)

    outputs = output_fw + output_bw
    output_state = output_state_fw + output_state_bw

    return (outputs, output_state)
Beispiel #23
0
def ndlstm_base_dynamic(inputs, noutput, scope=None, reverse=False):
  """Run an LSTM, either forward or backward.

  This is a 1D LSTM implementation using dynamic_rnn and
  the TensorFlow LSTM op.

  Args:
    inputs: input sequence (length, batch_size, ninput)
    noutput: depth of output
    scope: optional scope name
    reverse: run LSTM in reverse

  Returns:
    Output sequence (length, batch_size, noutput)
  """
  with variable_scope.variable_scope(scope, "SeqLstm", [inputs]):
    # TODO(tmb) make batch size, sequence_length dynamic
    # example: sequence_length = tf.shape(inputs)[0]
    _, batch_size, _ = _shape(inputs)
    lstm_cell = core_rnn_cell_impl.BasicLSTMCell(noutput, state_is_tuple=False)
    state = array_ops.zeros([batch_size, lstm_cell.state_size])
    sequence_length = int(inputs.get_shape()[0])
    sequence_lengths = math_ops.to_int64(
        array_ops.fill([batch_size], sequence_length))
    if reverse:
      inputs = array_ops.reverse_v2(inputs, [0])
    outputs, _ = rnn.dynamic_rnn(
        lstm_cell, inputs, sequence_lengths, state, time_major=True)
    if reverse:
      outputs = array_ops.reverse_v2(outputs, [0])
    return outputs
Beispiel #24
0
 def _testDropoutWrapper(self, batch_size=None, time_steps=None,
                         parallel_iterations=None, **kwargs):
   with self.test_session() as sess:
     with variable_scope.variable_scope(
         "root", initializer=init_ops.constant_initializer(0.5)):
       if batch_size is None and time_steps is None:
         # 2 time steps, batch size 1, depth 3
         batch_size = 1
         time_steps = 2
         x = constant_op.constant(
             [[[2., 2., 2.]], [[1., 1., 1.]]], dtype=dtypes.float32)
         m = rnn_cell_impl.LSTMStateTuple(
             *[constant_op.constant([[0.1, 0.1, 0.1]], dtype=dtypes.float32)
              ] * 2)
       else:
         x = constant_op.constant(
             np.random.randn(time_steps, batch_size, 3).astype(np.float32))
         m = rnn_cell_impl.LSTMStateTuple(*[
             constant_op.constant(
                 [[0.1, 0.1, 0.1]] * batch_size, dtype=dtypes.float32)
         ] * 2)
       outputs, final_state = rnn.dynamic_rnn(
           cell=rnn_cell_impl.DropoutWrapper(
               rnn_cell_impl.LSTMCell(3), dtype=x.dtype, **kwargs),
           time_major=True,
           parallel_iterations=parallel_iterations,
           inputs=x,
           initial_state=m)
       sess.run([variables_lib.global_variables_initializer()])
       res = sess.run([outputs, final_state])
       self.assertEqual(res[0].shape, (time_steps, batch_size, 3))
       self.assertEqual(res[1].c.shape, (batch_size, 3))
       self.assertEqual(res[1].h.shape, (batch_size, 3))
       return res
Beispiel #25
0
  def benchmarkLSTMBlockCellBpropWithDynamicRNN(self):
    print("BlockLSTMCell backward propagation via dynamic_rnn().")
    print("--------------------------------------------------------------")
    print("LSTMBlockCell Seconds per inference.")
    print("batch_size,cell_size,input_size,time_steps,use_gpu,wall_time")
    iters = 10
    for config in benchmarking.dict_product({
        "batch_size": [1, 8, 13, 32, 67, 128],
        "cell_size": [128, 250, 512, 650, 1024, 1350],
        "time_steps": [40],
        "use_gpu": [True, False]
    }):
      with ops.Graph().as_default():
        with benchmarking.device(use_gpu=config["use_gpu"]):
          time_steps = config["time_steps"]
          batch_size = config["batch_size"]
          cell_size = input_size = config["cell_size"]
          inputs = variable_scope.get_variable(
              "x", [time_steps, batch_size, cell_size],
              trainable=False,
              dtype=dtypes.float32)
          with variable_scope.variable_scope(
              "rnn", reuse=variable_scope.AUTO_REUSE):
            w = variable_scope.get_variable(
                "rnn/lstm_cell/kernel",
                shape=[input_size + cell_size, cell_size * 4],
                dtype=dtypes.float32)
            b = variable_scope.get_variable(
                "rnn/lstm_cell/bias",
                shape=[cell_size * 4],
                dtype=dtypes.float32,
                initializer=init_ops.zeros_initializer())
            cell = lstm_ops.LSTMBlockCell(cell_size)
            outputs = rnn.dynamic_rnn(
                cell, inputs, time_major=True, dtype=dtypes.float32)
          grads = gradients_impl.gradients(outputs, [inputs, w, b])
          init_op = variables.global_variables_initializer()

        with session.Session() as sess:
          sess.run(init_op)
          wall_time = benchmarking.seconds_per_run(grads, sess, iters)

        # Print to stdout. If the TEST_REPORT_FILE_PREFIX environment variable
        # is set, this will produce a copy-paste-able CSV file.
        print(",".join(
            map(str, [
                batch_size, cell_size, cell_size, time_steps, config["use_gpu"],
                wall_time
            ])))
        benchmark_name_template = "_".join([
            "LSTMBlockCell_bprop", "BS%(batch_size)i", "CS%(cell_size)i",
            "IS%(cell_size)i", "TS%(time_steps)i", "gpu_%(use_gpu)s"
        ])

        self.report_benchmark(
            name=benchmark_name_template % config,
            iters=iters,
            wall_time=wall_time,
            extras=config)
Beispiel #26
0
  def testBatchSizeFromInput(self):
    cell = Plus1RNNCell()
    in_graph_mode = context.in_graph_mode()
    # With static batch size
    if in_graph_mode:
      inputs = array_ops.placeholder(dtypes.float32, shape=(3, 4, 5))
      initial_state = array_ops.placeholder(dtypes.float32, shape=(3, 5))
    else:
      inputs = np.zeros((3, 4, 5), dtype=np.float32)
      initial_state = np.zeros((3, 5), dtype=np.float32)

    # - Without initial_state
    outputs, state = rnn.dynamic_rnn(cell, inputs, dtype=dtypes.float32)
    if in_graph_mode:
      self.assertEqual(3, outputs.shape[0].value)
      self.assertEqual(3, state.shape[0].value)
    else:
      self.assertEqual(3, outputs.shape[0])
      self.assertEqual(3, state.shape[0])

    # - With initial_state
    outputs, state = rnn.dynamic_rnn(
        cell, inputs, initial_state=initial_state)
    if in_graph_mode:
      self.assertEqual(3, outputs.shape[0].value)
      self.assertEqual(3, state.shape[0].value)
    else:
      self.assertEqual(3, outputs.shape[0])
      self.assertEqual(3, state.shape[0])

    # Without static batch size
    # Tensor shapes are fully determined in Eager mode, so only run this
    # test in graph mode.
    if in_graph_mode:
      inputs = array_ops.placeholder(dtypes.float32, shape=(None, 4, 5))
      # - Without initial_state
      outputs, state = rnn.dynamic_rnn(cell, inputs, dtype=dtypes.float32)
      self.assertEqual(None, outputs.shape[0].value)
      self.assertEqual(None, state.shape[0].value)
      # - With initial_state
      outputs, state = rnn.dynamic_rnn(
          cell,
          inputs,
          initial_state=array_ops.placeholder(dtypes.float32, shape=(None, 5)))
      self.assertEqual(None, outputs.shape[0].value)
      self.assertEqual(None, state.shape[0].value)
Beispiel #27
0
  def _multi_seq_fn():
    """Decoding of highest scoring sequence."""

    # For simplicity, in shape comments, denote:
    # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output).
    num_tags = potentials.get_shape()[2].value

    # Computes forward decoding. Get last score and backpointers.
    crf_fwd_cell = CrfDecodeForwardRnnCell(transition_params)
    initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1])
    initial_state = array_ops.squeeze(initial_state, axis=[1])  # [B, O]
    inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1])  # [B, T-1, O]
    # Sequence length is not allowed to be less than zero.
    sequence_length_less_one = math_ops.maximum(
        constant_op.constant(0, dtype=sequence_length.dtype),
        sequence_length - 1)
    backpointers, last_score = rnn.dynamic_rnn(  # [B, T - 1, O], [B, O]
        crf_fwd_cell,
        inputs=inputs,
        sequence_length=sequence_length_less_one,
        initial_state=initial_state,
        time_major=False,
        dtype=dtypes.int32)
    backpointers = gen_array_ops.reverse_sequence(  # [B, T - 1, O]
        backpointers, sequence_length_less_one, seq_dim=1)

    # Computes backward decoding. Extract tag indices from backpointers.
    crf_bwd_cell = CrfDecodeBackwardRnnCell(num_tags)
    initial_state = math_ops.cast(math_ops.argmax(last_score, axis=1),  # [B]
                                  dtype=dtypes.int32)
    initial_state = array_ops.expand_dims(initial_state, axis=-1)  # [B, 1]
    decode_tags, _ = rnn.dynamic_rnn(  # [B, T - 1, 1]
        crf_bwd_cell,
        inputs=backpointers,
        sequence_length=sequence_length_less_one,
        initial_state=initial_state,
        time_major=False,
        dtype=dtypes.int32)
    decode_tags = array_ops.squeeze(decode_tags, axis=[2])  # [B, T - 1]
    decode_tags = array_ops.concat([initial_state, decode_tags],   # [B, T]
                                   axis=1)
    decode_tags = gen_array_ops.reverse_sequence(  # [B, T]
        decode_tags, sequence_length, seq_dim=1)

    best_score = math_ops.reduce_max(last_score, axis=1)  # [B]
    return decode_tags, best_score
Beispiel #28
0
  def testInvalidDtype(self):
    if context.executing_eagerly():
      inputs = np.zeros((3, 4, 5), dtype=np.int32)
    else:
      inputs = array_ops.placeholder(dtypes.int32, shape=(3, 4, 5))

    cells = [
        rnn_cell_impl.BasicRNNCell,
        rnn_cell_impl.GRUCell,
        rnn_cell_impl.BasicLSTMCell,
        rnn_cell_impl.LSTMCell,
    ]
    for cell_cls in cells:
      with self.cached_session():
        with self.assertRaisesRegexp(
            ValueError, "RNN cell only supports floating"):
          cell = cell_cls(2, dtype=dtypes.int32)
          rnn.dynamic_rnn(cell, inputs, dtype=dtypes.int32)
Beispiel #29
0
 def testScalarStateIsAccepted(self):
   cell = ScalarStateRNNCell()
   inputs = array_ops.placeholder(dtypes.float32, shape=(1, 4, 1))
   with self.test_session() as sess:
     outputs, state = rnn.dynamic_rnn(
         cell, inputs, dtype=dtypes.float32, sequence_length=[4])
     outputs, state = sess.run(
         [outputs, state], feed_dict={inputs: [[[1], [2], [3], [4]]]})
   self.assertAllEqual(outputs, [[[1], [2], [3], [4]]])
   self.assertEqual(state, 4)
Beispiel #30
0
    def __init__(self, max_words, num_classes, vocab_size, 
            embedding_size, num_hidden):

        # input, output, dropout placeholders
        self.text = tf.placeholder(tf.int32, [None, max_words], name="input_text")
        self.extra = tf.placeholder(tf.int32, [None, max_words], name="input_extra")
        self.output = tf.placeholder(tf.float32, [None, num_classes], name="output_y")
        self.sequence_lengths = tf.placeholder(tf.int32, [None], name="sequence_lengths")
        self.dropout_prob = tf.placeholder(tf.float32, name="dropout_probability")

        # Word embedding layer
        with tf.device("/cpu:0"), tf.name_scope("word_embedding"):
            embedding_matrix = tf.Variable(
                    tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), # random numbers between -1 and 1
                    name="embedding_matrix")
            self.lookup = tf.nn.embedding_lookup(embedding_matrix, self.text)

        # GRU
        with tf.name_scope("GRU"):
            output, state = rnn.dynamic_rnn(
                    rnn_cell.GRUCell(num_hidden),
                    self.lookup,
                    dtype=tf.float32,
                    sequence_length=self.sequence_lengths)
            output = tf.transpose(output, [1, 0, 2])
            self.gru = tf.gather(output, int(output.get_shape()[0]) - 1)

        # Add dropout
        with tf.name_scope("dropout"):
            self.dropout = tf.nn.dropout(self.gru, self.dropout_prob)

        # add in extra data and relu layer
        with tf.name_scope("extra_data"):
            combined = tf.concat(1, [self.dropout, self.extra])
            weights_e = tf.Variable(tf.truncated_normal([num_hidden, num_hidden], stddev=0.1), name="weights_extra")
            biases_e = tf.Variable(tf.constant(0.1, shape=[num_hidden]), name="biases_extra")
            processed = tf.relu(tf.matmul(combined, weights_e) + biases_e)

        # Final output
        with tf.name_scope("output"):
            weights = tf.Variable(tf.truncated_normal([num_hidden, num_classes], stddev=0.1), name="weights")
            biases = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="biases")
            unscaled = tf.matmul(processed, weights) + biases
            self.scores = tf.nn.softmax(unscaled, name="scores")
            self.predictions = tf.argmax(self.scores, dimension=1, name="predictions")

        # calculate loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(unscaled, self.output)
            self.loss = tf.reduce_mean(losses)

        # calculate accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.output, 1))
            self.accuracy = 100 * tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
Beispiel #31
0
  def testSimpleRNNCellAndBasicRNNCellComparison(self):
    input_shape = 10
    output_shape = 5
    timestep = 4
    batch = 20
    (x_train, _), _ = testing_utils.get_test_data(
        train_samples=batch,
        test_samples=0,
        input_shape=(timestep, input_shape),
        num_classes=output_shape)
    fix_weights_generator = keras.layers.SimpleRNNCell(output_shape)
    fix_weights_generator.build((None, input_shape))
    # The SimpleRNNCell contains 3 weights: kernel, recurrent_kernel, and bias
    # The BasicRNNCell contains 2 weight: kernel and bias, where kernel is
    # zipped [kernel, recurrent_kernel] in SimpleRNNCell.
    keras_weights = fix_weights_generator.get_weights()
    kernel, recurrent_kernel, bias = keras_weights
    tf_weights = [np.concatenate((kernel, recurrent_kernel)), bias]

    with self.session(graph=ops_lib.Graph()) as sess:
      inputs = array_ops.placeholder(
          dtypes.float32, shape=(None, timestep, input_shape))
      cell = keras.layers.SimpleRNNCell(output_shape)
      k_out, k_state = rnn.dynamic_rnn(
          cell, inputs, dtype=dtypes.float32)
      cell.set_weights(keras_weights)
      [k_out, k_state] = sess.run([k_out, k_state], {inputs: x_train})
    with self.session(graph=ops_lib.Graph()) as sess:
      inputs = array_ops.placeholder(
          dtypes.float32, shape=(None, timestep, input_shape))
      cell = rnn_cell_impl.BasicRNNCell(output_shape)
      tf_out, tf_state = rnn.dynamic_rnn(
          cell, inputs, dtype=dtypes.float32)
      cell.set_weights(tf_weights)
      [tf_out, tf_state] = sess.run([tf_out, tf_state], {inputs: x_train})

    self.assertAllClose(tf_out, k_out, atol=1e-5)
    self.assertAllClose(tf_state, k_state, atol=1e-5)
Beispiel #32
0
  def testBatchSizeFromInput(self):
    cell = Plus1RNNCell()
    in_eager_mode = context.executing_eagerly()
    # With static batch size
    if in_eager_mode:
      inputs = np.zeros((3, 4, 5), dtype=np.float32)
      initial_state = np.zeros((3, 5), dtype=np.float32)
    else:
      inputs = array_ops.placeholder(dtypes.float32, shape=(3, 4, 5))
      initial_state = array_ops.placeholder(dtypes.float32, shape=(3, 5))

    # - Without initial_state
    outputs, state = rnn.dynamic_rnn(cell, inputs, dtype=dtypes.float32)
    self.assertEqual(3, outputs.shape[0])
    self.assertEqual(3, state.shape[0])

    # - With initial_state
    outputs, state = rnn.dynamic_rnn(
        cell, inputs, initial_state=initial_state)
    self.assertEqual(3, outputs.shape[0])
    self.assertEqual(3, state.shape[0])

    # Without static batch size
    # Tensor shapes are fully determined with eager execution enabled,
    # so only run this test for graph construction.
    if not in_eager_mode:
      inputs = array_ops.placeholder(dtypes.float32, shape=(None, 4, 5))
      # - Without initial_state
      outputs, state = rnn.dynamic_rnn(cell, inputs, dtype=dtypes.float32)
      self.assertEqual(None, outputs.shape.dims[0].value)
      self.assertEqual(None, state.shape.dims[0].value)
      # - With initial_state
      outputs, state = rnn.dynamic_rnn(
          cell,
          inputs,
          initial_state=array_ops.placeholder(dtypes.float32, shape=(None, 5)))
      self.assertEqual(None, outputs.shape.dims[0].value)
      self.assertEqual(None, state.shape.dims[0].value)
Beispiel #33
0
 def time_aware_gru_net(self, hidden_units, input_data, input_length, type):
     if type == 'T-SeqRec':
         cell = self.build_time_aware_gru_cell_sigmoid(hidden_units)
     elif type == 'new':
         cell = self.build_time_aware_gru_cell_new(hidden_units)
     elif type == 'T_Gru_Extend':
         cell = self.build_time_aware_gru_cell_extend(hidden_units)
     #cell = self.build_cell(hidden_units)
     self.input_length = tf.reshape(input_length, [-1])
     outputs, _ = dynamic_rnn(cell,
                              inputs=input_data,
                              sequence_length=self.input_length,
                              dtype=tf.float32)
     return outputs
Beispiel #34
0
 def force():
     force_embed = tf.nn.embedding_lookup(embedding_tensor,
                                          force_tensor[:, :-1])
     decoder_input = tf.concat([
         input_tensor,
         tf.concat([tf.tile(init_tensor, [batch_size, 1, 1]), force_embed],
                   1)
     ], 2)
     decoder_input = tf.nn.dropout(decoder_input, keep_prob=keep_prob)
     force_decoder, _ = dynamic_rnn(decoder_cell,
                                    decoder_input,
                                    sequence_length=sequence_length,
                                    dtype=tf.float32)
     return force_decoder
Beispiel #35
0
    def add_lstm(self, inputs):

        if 'lstm_conf' in self.nn_config:
            lstm_conf = self.nn_config['lstm_conf']

            activation = None if lstm_conf['batch_norm'] else lstm_conf[
                'lstm_activation']

            return_seq = True if '1dCNN' in self.nn_config else False

            cell = LSTMCell(units=lstm_conf['lstm_units'],
                            activation=activation)

            if lstm_conf['method'] == 'dynamic_rnn':
                rnn_outputs1, states = dynamic_rnn(cell,
                                                   inputs,
                                                   dtype=tf.float32)
                lstm_outputs = tf.reshape(rnn_outputs1[:, -1, :],
                                          [-1, lstm_conf['lstm_units']])

            elif lstm_conf['method'] == 'keras_lstm_layer':
                lstm_outputs = LSTM(lstm_conf['lstm_units'],
                                    name="first_lstm",
                                    activation=activation,
                                    input_shape=(self.data_config['lookback'],
                                                 self.ins),
                                    return_sequences=return_seq)(inputs)

            else:
                rnn_layer = RNN(cell, return_sequences=return_seq)
                lstm_outputs = rnn_layer(inputs)  # [batch_size, neurons]
                if self.verbose > 0:
                    print(lstm_outputs.shape, 'before reshaping',
                          K.eval(tf.rank(lstm_outputs)))
                lstm_outputs = tf.reshape(lstm_outputs[:, :],
                                          [-1, lstm_conf['lstm_units']])
                if self.verbose > 0:
                    print(lstm_outputs.shape, 'after reshaping',
                          K.eval(tf.rank(lstm_outputs)))

            if lstm_conf['batch_norm']:
                rnn_outputs3 = BatchNormalization()(lstm_outputs)
                lstm_outputs = Activation('relu')(rnn_outputs3)

            if lstm_conf['dropout'] is not None:
                lstm_outputs = Dropout(lstm_conf['dropout'])(lstm_outputs)
        else:
            lstm_outputs = inputs

        return lstm_outputs
Beispiel #36
0
  def setup_decoder(self):
    self.decoder_state_output =[]
    if self.num_layers > 1:
      self.decoder_cell = rnn_cell.GRUCell(self.size)
      # self.decoder_cell = tf.contrib.rnn.GRUCell(self.size,kernel_initializer = tf.contrib.layers.xavier_initializer(dtype=tf.float32))
    self.attn_cell = GRUCellAttn(self.size, self.encoder_output, scope="DecoderAttnCell")

    with vs.variable_scope("Decoder"):
      inp = self.decoder_inputs
      for i in xrange(self.num_layers - 1):
        with vs.variable_scope("DecoderCell%d" % i) as scope:
          out, state_output = rnn.dynamic_rnn(self.decoder_cell, inp, time_major=True,
                                              dtype=dtypes.float32, sequence_length=self.target_length,
                                              scope=scope)
          inp = self.dropout(out)
          self.decoder_state_output.append(state_output)

      with vs.variable_scope("DecoderAttnCell") as scope:
        out, state_output = rnn.dynamic_rnn(self.attn_cell, inp, time_major=True,
                                            dtype=dtypes.float32, sequence_length=self.target_length,
                                            scope=scope)
        self.decoder_output = self.dropout(out)
        self.decoder_state_output.append(state_output)
def define_two_layer_LSTM(inputX, seqLengths, nHidden):
    lstm_cell = rnn_cell.LSTMCell(nHidden, state_is_tuple=True)
    cell = rnn_cell.MultiRNNCell([lstm_cell] * 2)

    initial = cell.zero_state(tf.shape(inputX)[0], tf.float32)

    outputs, _ = dynamic_rnn(cell,
                             inputX,
                             dtype=tf.float32,
                             sequence_length=seqLengths,
                             initial_state=initial,
                             time_major=False)

    return outputs, nHidden  # hidden*2 is number of actual hidden states.
 def model(self):
     """
     :param x: inputs of size [T, batch_size, input_size]
     :param W: matrix of fully-connected output layer weights
     :param b: vector of fully-connected output layer biases
     """
     cell = rnn_cell.BasicLSTMCell(self.hidden_dim)
     outputs, states = rnn.dynamic_rnn(cell, self.x, dtype=tf.float32)
     num_examples = tf.shape(self.x)[0]
     W_repeated = tf.tile(tf.expand_dims(self.W_out, 0),
                          [num_examples, 1, 1])
     out = tf.batch_matmul(outputs, W_repeated) + self.b_out
     out = tf.squeeze(out)
     return out
Beispiel #39
0
    def benchmarkLSTMBlockCellFpropWithDynamicRNN(self):
        print("BlockLSTMCell forward propagation via dynamic_rnn().")
        print("--------------------------------------------------------------")
        print("LSTMBlockCell Seconds per inference.")
        print("batch_size,cell_size,input_size,time_steps,use_gpu,wall_time")
        iters = 10
        for config in benchmarking.dict_product({
                "batch_size": [1, 32, 128],
                "cell_size": [32, 128, 512],
                "input_size": [128, 512],
                "time_steps": [10, 25, 100],
                "use_gpu": [True, False]
        }):
            with ops.Graph().as_default():
                with benchmarking.device(use_gpu=config["use_gpu"]):
                    inputs = variable_scope.get_variable(
                        "x", [
                            config["time_steps"], config["batch_size"],
                            config["input_size"]
                        ])
                    cell = lstm_ops.LSTMBlockCell(config["cell_size"])
                    outputs = rnn.dynamic_rnn(cell,
                                              inputs,
                                              time_major=True,
                                              dtype=dtypes.float32)
                    init_op = variables.global_variables_initializer()

                with session.Session() as sess:
                    sess.run(init_op)
                    wall_time = benchmarking.seconds_per_run(
                        outputs, sess, iters)

                # Print to stdout. If the TEST_REPORT_FILE_PREFIX environment variable
                # is set, this will produce a copy-paste-able CSV file.
                print(",".join(
                    map(str, [
                        config["batch_size"], config["cell_size"],
                        config["input_size"], config["time_steps"],
                        config["use_gpu"], wall_time
                    ])))
                benchmark_name_template = "_".join([
                    "LSTMBlockCell_fprop", "BS%(batch_size)i",
                    "CS%(cell_size)i", "IS%(input_size)i", "TS%(time_steps)i",
                    "gpu_%(use_gpu)s"
                ])

                self.report_benchmark(name=benchmark_name_template % config,
                                      iters=iters,
                                      wall_time=wall_time,
                                      extras=config)
Beispiel #40
0
def stacked_lstm(cell_fn,
                 input_tensor,
                 num_cells,
                 num_lstm_layers=1,
                 return_only_last_output=True):
    (x, t) = input_tensor
    for i in range(num_lstm_layers):
        x, _ = dynamic_rnn(cell=cell_fn(num_cells),
                           inputs=(x, t),
                           dtype=tf.float32,
                           scope='LSTM_' + str(i))
    if return_only_last_output:
        return tf.squeeze(x[:, -1, :])
    return x
Beispiel #41
0
    def modified_grnn_net(self,
                          hidden_units,
                          input_data,
                          input_length,
                          scope='modified_grnn'):

        cell = self.build_modified_grnn_cell(hidden_units)
        self.input_length = tf.reshape(input_length, [-1])
        outputs, _ = dynamic_rnn(cell,
                                 inputs=input_data,
                                 sequence_length=self.input_length,
                                 dtype=tf.float32,
                                 scope=scope)
        return outputs
Beispiel #42
0
    def _multi_seq_fn():
        """Forward computation of alpha values."""
        rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1])

        # Compute the alpha values in the forward algorithm in order to get the
        # partition function.
        forward_cell = CrfForwardRnnCell(transition_params)
        _, alphas = rnn.dynamic_rnn(cell=forward_cell,
                                    inputs=rest_of_input,
                                    sequence_length=sequence_lengths - 1,
                                    initial_state=first_input,
                                    dtype=dtypes.float32)
        log_norm = math_ops.reduce_logsumexp(alphas, [1])
        return log_norm
Beispiel #43
0
 def __init__(self,
              num_symbols,
              num_embed_units,
              num_units,
              num_labels,
              embed,
              learning_rate=0.001,
              max_gradient_norm=5.0):
     self.texts = tf.placeholder(tf.int32, [None, None]) # shape: sentence*max_word
     self.text_length = tf.placeholder(tf.int32, [None]) # shape: sentence
     self.labels = tf.placeholder(tf.int32, [None])      # shape: sentence
     self.keep_prob = tf.placeholder(tf.float32)
     
     self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=tf.float32)
     self.global_step = tf.Variable(0, trainable=False)
     self.epoch = tf.Variable(0, trainable=False)
     self.epoch_add_op = self.epoch.assign(self.epoch + 1)
     
     # build the embedding table (index to vector)
     self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed)
     
     self.embed_inputs = tf.nn.embedding_lookup(self.embed, self.texts)   # shape: sentence*max_word*num_embed_units
     fw_cell = DropoutWrapper(BasicLSTMCell(num_units), output_keep_prob=self.keep_prob)
     bw_cell = DropoutWrapper(BasicLSTMCell(num_units), output_keep_prob=self.keep_prob)
     
     middle_outputs, middle_states = bidirectional_dynamic_rnn(fw_cell, bw_cell, self.embed_inputs, self.text_length, dtype=tf.float32, scope="word_rnn")
     middle_outputs = tf.concat(middle_outputs, 2)   # shape: sentence*max_word*(2*num_units)
     
     middle_inputs = tf.expand_dims(tf.reduce_max(middle_outputs, axis=1), 0)    # shape: 1*sentence*(2*num_units)
     top_cell = DropoutWrapper(BasicLSTMCell(num_units), output_keep_prob=self.keep_prob)
     
     outputs, states = dynamic_rnn(top_cell, middle_inputs, dtype=tf.float32, scope="sentence_rnn")
     self.outputs = outputs[0]   # shape: sentence*num_units
     logits = tf.layers.dense(self.outputs, num_labels)
     
     self.loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=logits), name='loss')
     mean_loss = self.loss / tf.cast(tf.shape(self.labels)[0], dtype=tf.float32)
     self.predict_labels = tf.argmax(logits, 1, 'predict_labels', output_type=tf.int32)
     self.accuracy = tf.reduce_sum(tf.cast(tf.equal(self.labels, self.predict_labels), tf.int32), name='accuracy')
     
     self.params = tf.trainable_variables()
         
     # calculate the gradient of parameters
     opt = tf.train.AdamOptimizer(self.learning_rate)
     gradients = tf.gradients(mean_loss, self.params)
     clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients, max_gradient_norm)
     self.update = opt.apply_gradients(zip(clipped_gradients, self.params), global_step=self.global_step)
     
     self.saver = tf.train.Saver(max_to_keep=3, pad_step_number=True)
Beispiel #44
0
    def setup_decoder(self):  # decoder的设置
        # vs.variable_scope:变量命名,使得参数保持一致,https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-12-scope/
        with vs.variable_scope("Decoder"):
            inp = tf.nn.dropout(self.decoder_inputs, self.keep_prob)
            if self.num_layers > 1:
                with vs.variable_scope("RNN"):  # 理解成调用RNN
                    decoder_cell = rnn_cell.GRUCell(self.size)
                    decoder_cell = rnn_cell.DropoutWrapper(
                        decoder_cell, output_keep_prob=self.keep_prob)
                    self.decoder_cell = rnn_cell.MultiRNNCell(
                        [decoder_cell] * (self.num_layers - 1),
                        state_is_tuple=True)
                    inp, _ = rnn.dynamic_rnn(
                        self.decoder_cell,
                        inp,
                        self.tgt_len,
                        dtype=tf.float32,
                        time_major=True,
                        initial_state=self.decoder_cell.zero_state(
                            self.batch_size, dtype=tf.float32))

            with vs.variable_scope("Attn"):  # 理解为调用attention
                self.attn_cell = GRUCellAttn(self.size, self.len_inp,
                                             self.encoder_output,
                                             self.src_mask, self.decode_method)
                # 设置 attention
                self.decoder_output, _ = rnn.dynamic_rnn(
                    self.attn_cell,
                    inp,
                    self.tgt_len,
                    dtype=tf.float32,
                    time_major=True,
                    initial_state=self.attn_cell.zero_state(
                        self.batch_size,
                        dtype=tf.float32,
                    ))
Beispiel #45
0
def BuildFullModel():
  """Build the full model with conv,rnn,opt."""
  seq = []
  for i in range(4):
    with variable_scope.variable_scope('inp_%d' % i):
      seq.append(array_ops.reshape(BuildSmallModel(), [2, 1, -1]))

  cell = rnn_cell.BasicRNNCell(16)
  out = rnn.dynamic_rnn(
      cell, array_ops.concat(seq, axis=1), dtype=dtypes.float32)[0]

  target = array_ops.ones_like(out)
  loss = nn_ops.l2_loss(math_ops.reduce_mean(target - out))
  sgd_op = gradient_descent.GradientDescentOptimizer(1e-2)
  return sgd_op.minimize(loss)
    def _make_graph(self):
        x = tf.placeholder("float", [self.batch_size, self.seq_len, self.num_nodes, self.input_dim], name='input')
        y = tf.placeholder("float", [self.batch_size, self.num_nodes, self.output_dim], name='label')

        self.input = x
        self.label = y
        x = tf.reshape(x, [self.batch_size, self.seq_len, self.num_nodes * self.input_dim])
        outputs, state = dynamic_rnn(self.cell, x, dtype=tf.float32)
        outputs = tf.transpose(outputs, [1, 0, 2])
        pred = tf.contrib.layers.fully_connected(outputs[-1], self.num_nodes * self.output_dim, activation_fn=None)

        # Define loss and optimizer
        pred = tf.reshape(pred, [self.batch_size, self.num_nodes, self.output_dim], name='predictions')

        self.prediction = pred
Beispiel #47
0
    def generate_signal(self, signal_key, context, **kwargs):
        if signal_key == "loss":
            obs = context.get_signal('obs')
            get_actions_cell = GetActionsCell(self.policy)

            initial_state = get_actions_cell.zero_state(tf.shape(obs)[1], tf.float32)

            actions, _ = dynamic_rnn(
                get_actions_cell, obs, initial_state=initial_state,
                parallel_iterations=1, swap_memory=False, time_major=True)

            loss = self.env.build_trajectory_loss(actions, obs)
            return loss
        else:
            raise Exception("NotImplemented")
Beispiel #48
0
 def prediction(self):
     # Recurrent network.
     network = rnn_cell.GRUCell(self._num_hidden)
     network = rnn_cell.DropoutWrapper(network,
                                       output_keep_prob=self.dropout)
     network = rnn_cell.MultiRNNCell([network] * self._num_layers)
     output, _ = rnn.dynamic_rnn(network, data, dtype=tf.float32)
     # Select last output.
     output = tf.transpose(output, [1, 0, 2])
     last = tf.gather(output, int(output.get_shape()[0]) - 1)
     # Softmax layer.
     weight, bias = self._weight_and_bias(self._num_hidden,
                                          int(self.target.get_shape()[1]))
     prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
     return prediction
Beispiel #49
0
    def __call__(self, inputs, seq_len=None):
        if self.call_cnt == 0:
            self.cell = LSTMCell(
                self.output_dim,
                initializer=self.initializer(dtype=inputs.dtype))

        with tf.variable_scope(self.scope) as scope:
            self.check_reuse(scope)
            #if self.call_cnt ==0:
            #self.cell = LSTMCell(self.output_dim,initializer = self.initializer)
            #cell = BasicLSTMCell(self.output_dim)
            return rnn.dynamic_rnn(self.cell,
                                   inputs,
                                   seq_len,
                                   dtype=inputs.dtype)
Beispiel #50
0
def rnn_model(data):
	feature_columns_count = 3

	layer = {
		'weights': tf.Variable(tf.random_uniform([rnn_size, 1], -1, 1)),
		'biases': tf.Variable(tf.random_uniform([1]))
	}

	length = [feature_columns_count for _ in range(batch_size)]
	lstm_cell = rnn_cell.LSTMCell(rnn_size)
	outputs, _ = rnn.dynamic_rnn(lstm_cell, data, dtype=tf.float32, sequence_length=length)

	output = tf.matmul(last_relevant(outputs, feature_columns_count), layer['weights']) + layer['biases']

	return output
    def __init__(self, nact, rnn_units=256):
        cells = [GRUCell(rnn_units, kernel_initializer=orthogonal()) for _ in range(2)]
        self.gru = MultiRNNCell(cells)
        self.state = self.gru.zero_state(batch_size=1, dtype=tf.float32)

        self.obs_ph = tf.placeholder(dtype=tf.float32)
        fc1 = dense(self.obs_ph, rnn_units, activation=elu, kernel_initializer=orthogonal(), name='fc1')
        expand = tf.expand_dims(fc1, axis=0, name='expand')
        rnn_out, self.state = dynamic_rnn(self.gru, expand, initial_state=self.state)
        reshape = tf.reshape(rnn_out, shape=[-1, rnn_units], name='reshape')

        self.logits = dense(reshape, nact, kernel_initializer=orthogonal(0.01), name='logits')
        self.pi = tf.nn.softmax(self.logits, name='pi')
        self.action = boltzmann(self.pi)
        self.value = dense(self.logits, 1, kernel_initializer=orthogonal(), name='value')
  def _construct_rnn(self, features):
    """Apply an RNN to `features`.

    The `features` dict must contain `self._inputs_key`, and the corresponding
    input should be a `Tensor` of shape `[batch_size, padded_length, k]`
    where `k` is the dimension of the input for each element of a sequence.

    `activations` has shape `[batch_size, sequence_length, n]` where `n` is
    `self._target_column.num_label_columns`. In the case of a multiclass
    classifier, `n` is the number of classes.

    `final_state` has shape determined by `self._cell` and its dtype must match
    `self._dtype`.

    Args:
      features: a `dict` containing the input for the RNN and (optionally) an
        initial state and information about sequence lengths.

    Returns:
      activations: the output of the RNN, projected to the appropriate number of
        dimensions.
      final_state: the final state output by the RNN.

    Raises:
      KeyError: if `features` does not contain `self._inputs_key`.
    """
    with ops.name_scope('RNN'):
      inputs = features.get(self._inputs_key)
      if inputs is None:
        raise KeyError('features must contain the key {}'.format(
            self._inputs_key))
      if inputs.dtype != self._dtype:
        inputs = math_ops.cast(inputs, self._dtype)
      initial_state = features.get(self._initial_state_key)
      rnn_outputs, final_state = rnn.dynamic_rnn(
          cell=self._cell,
          inputs=inputs,
          initial_state=initial_state,
          dtype=self._dtype,
          parallel_iterations=self._parallel_iterations,
          swap_memory=self._swap_memory,
          time_major=False)
      activations = layers.fully_connected(
          inputs=rnn_outputs,
          num_outputs=self._target_column.num_label_columns,
          activation_fn=None,
          trainable=False)
      return activations, final_state
Beispiel #53
0
    def testDynamicDecodeRNNWithBasicTrainingSamplerMatchesDynamicRNN(self):
        sequence_length = [3, 4, 3, 1, 0]
        batch_size = 5
        max_time = 8
        input_depth = 7
        cell_depth = 10
        max_out = max(sequence_length)

        with self.test_session() as sess:
            inputs = np.random.randn(batch_size, max_time,
                                     input_depth).astype(np.float32)

            cell = core_rnn_cell.LSTMCell(cell_depth)
            zero_state = cell.zero_state(dtype=dtypes.float32,
                                         batch_size=batch_size)
            sampler = sampling_decoder.BasicTrainingSampler(
                inputs, sequence_length)
            my_decoder = sampling_decoder.BasicSamplingDecoder(
                cell=cell, sampler=sampler, initial_state=zero_state)

            # Match the variable scope of dynamic_rnn below so we end up
            # using the same variables
            with vs.variable_scope("rnn"):
                final_decoder_outputs, final_decoder_state = decoder.dynamic_decode_rnn(
                    my_decoder)

            with vs.variable_scope(vs.get_variable_scope(), reuse=True):
                final_rnn_outputs, final_rnn_state = rnn.dynamic_rnn(
                    cell,
                    inputs,
                    sequence_length=sequence_length,
                    initial_state=zero_state)

            sess.run(variables.global_variables_initializer())
            sess_results = sess.run({
                "final_decoder_outputs": final_decoder_outputs,
                "final_decoder_state": final_decoder_state,
                "final_rnn_outputs": final_rnn_outputs,
                "final_rnn_state": final_rnn_state
            })

            # Decoder only runs out to max_out; ensure values are identical
            # to dynamic_rnn, which also zeros out outputs and passes along state.
            self.assertAllClose(
                sess_results["final_decoder_outputs"].rnn_output,
                sess_results["final_rnn_outputs"][:, 0:max_out, :])
            self.assertAllClose(sess_results["final_decoder_state"],
                                sess_results["final_rnn_state"])
Beispiel #54
0
    def rnn_logit_fn(features, mode):
        """Recurrent Neural Network logit_fn.

    Args:
      features: This is the first item returned from the `input_fn`
                passed to `train`, `evaluate`, and `predict`. This should be a
                single `Tensor` or `dict` of same.
      mode: Optional. Specifies if this training, evaluation or prediction. See
            `ModeKeys`.

    Returns:
      A tuple of `Tensor` objects representing the logits and the sequence
      length mask.
    """
        with variable_scope.variable_scope(
                'sequence_input_layer',
                values=tuple(six.itervalues(features)),
                partitioner=input_layer_partitioner):
            sequence_input, sequence_length = seq_fc.sequence_input_layer(
                features=features, feature_columns=sequence_feature_columns)
            summary.histogram('sequence_length', sequence_length)

            if context_feature_columns:
                context_input = feature_column_lib.input_layer(
                    features=features, feature_columns=context_feature_columns)
                sequence_input = _concatenate_context_input(
                    sequence_input, context_input)

        cell = rnn_cell_fn(mode)
        # Ignore output state.
        rnn_outputs, _ = rnn.dynamic_rnn(cell=cell,
                                         inputs=sequence_input,
                                         sequence_length=sequence_length,
                                         dtype=dtypes.float32,
                                         time_major=False)

        if not return_sequences:
            rnn_outputs = _select_last_activations(rnn_outputs,
                                                   sequence_length)

        with variable_scope.variable_scope('logits', values=(rnn_outputs, )):
            logits = core_layers.dense(
                rnn_outputs,
                units=output_units,
                activation=None,
                kernel_initializer=init_ops.glorot_uniform_initializer())
        sequence_length_mask = array_ops.sequence_mask(sequence_length)
        return logits, sequence_length_mask
def run_lstm_mnist(lstm_cell=BasicLSTMCell, hidden_size=32, batch_size=256, steps=1000):
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    learning_rate = 0.001
    file_logger = FileLogger('log.tsv', ['step', 'training_loss', 'training_accuracy',
                                         'testing_loss', 'testing_accuracy'])
    x = tf.placeholder('float32', [batch_size, 784, 2 if lstm_cell == PhasedLSTMCell else 1])
    y_ = tf.placeholder('float32', [batch_size, 10])
    initial_states = (tf.random_normal([batch_size, hidden_size], stddev=0.1),
                      tf.random_normal([batch_size, hidden_size], stddev=0.1))
    outputs, _ = dynamic_rnn(lstm_cell(hidden_size), x, initial_state=initial_states, dtype=tf.float32)
    rnn_out = tf.squeeze(outputs[:, -1, :])

    fc0_w = create_weight_variable('fc0_w', [hidden_size, 10])
    fc0_b = create_bias_variable('fc0_b', [10])
    y = tf.matmul(rnn_out, fc0_w) + fc0_b

    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))
    grad_update = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
    correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
    sess.run(tf.global_variables_initializer())

    def transform_x(_x_):
        if lstm_cell == PhasedLSTMCell:
            t = np.reshape(np.tile(np.array(range(784)), (batch_size, 1)), (batch_size, 784))
            return np.squeeze(np.stack([_x_, t], axis=2))
        t_x = np.expand_dims(_x_, axis=2)
        return t_x

    test_images = mnist.test.images[0:256]
    test_labels = mnist.test.labels[0:256]

    for i in range(steps):
        batch = mnist.train.next_batch(batch_size)

        st = time()
        tr_feed_dict = {x: transform_x(batch[0]), y_: batch[1]}
        tr_loss, tr_acc, _ = sess.run([cross_entropy, accuracy, grad_update], feed_dict=tr_feed_dict)

        te_feed_dict = {x: transform_x(test_images), y_: test_labels}
        te_loss, te_acc = sess.run([cross_entropy, accuracy], feed_dict=te_feed_dict)

        print('Forward-Backward pass took {0:.2f}s to complete.'.format(time() - st))
        file_logger.write([i, tr_loss, tr_acc, te_loss, te_acc])

    file_logger.close()
Beispiel #56
0
def _tfGRU(x, initial_state, y):
  gru_cell = rnn_cell.GRUCell(
      num_hidden,
      name='gru_cell',
      kernel_initializer=init_ops.zeros_initializer(dtype=dataType),
      bias_initializer=init_ops.zeros_initializer(dtype=dataType))
  outputs, _ = rnn.dynamic_rnn(gru_cell,
                               x,
                               dtype=dataType,
                               initial_state=initial_state,
                               time_major=True)
  softmax = nn.softmax_cross_entropy_with_logits_v2(
      logits=outputs[-1], labels=array_ops.stop_gradient(y))
  loss = math_ops.reduce_mean(softmax)
  train = gradient_descent.GradientDescentOptimizer(lr).minimize(loss)
  return [loss, train]
Beispiel #57
0
    def time_gru_net(self,
                     hidden_units,
                     input_data,
                     input_length,
                     type,
                     scope='gru'):

        cell = self.build_time_aware_gru_cell_time(hidden_units)
        # cell = self.build_cell(hidden_units)
        self.input_length = tf.reshape(input_length, [-1])
        outputs, _ = dynamic_rnn(cell,
                                 inputs=input_data,
                                 sequence_length=self.input_length,
                                 dtype=tf.float32,
                                 scope=scope)
        return outputs
Beispiel #58
0
 def apply(self, is_train, x, mask=None):
     state = dynamic_rnn(self.cell_spec(is_train),
                         x,
                         mask,
                         dtype=tf.float32)[1]
     if isinstance(self.output, int):
         return state[self.output]
     else:
         if self.output is None:
             if not isinstance(state, tf.Tensor):
                 raise ValueError()
             return state
         for i, x in enumerate(state._fields):
             if x == self.output:
                 return state[i]
         raise ValueError()
Beispiel #59
0
  def testNoneDimsWithDynamicRNN(self):
    with self.test_session(use_gpu=self._use_gpu, graph=ops.Graph()) as sess:
      batch_size = 4
      cell_size = 5
      input_size = 6
      num_steps = 7

      cell = gru_ops.GRUBlockCell(cell_size)

      x = array_ops.placeholder(dtypes.float32, shape=(None, None, input_size))
      _, output = rnn.dynamic_rnn(
          cell, x, time_major=True, dtype=dtypes.float32)
      sess.run(variables.global_variables_initializer())
      feed = {}
      feed[x] = np.random.randn(num_steps, batch_size, input_size)
      sess.run(output, feed)
Beispiel #60
0
def dynamic_lstm_model_fn(batch_size, state_size, max_steps):
  # We make inputs and sequence_length constant so that multiple session.run
  # calls produce the same result.
  inputs = constant_op.constant(
      np.random.rand(batch_size, max_steps, state_size), dtype=dtypes.float32)
  sequence_length = constant_op.constant(
      np.random.randint(0, size=[batch_size], high=max_steps + 1),
      dtype=dtypes.int32)

  cell = rnn_cell.BasicLSTMCell(state_size)
  initial_state = cell.zero_state(batch_size, dtypes.float32)
  return inputs, rnn.dynamic_rnn(
      cell,
      inputs,
      sequence_length=sequence_length,
      initial_state=initial_state)