def __call__(self,
               inputs,
               initial_state=None,
               dtype=None,
               sequence_length=None,
               scope=None):
    is_list = isinstance(inputs, list)
    if self._use_dynamic_rnn:
      if is_list:
        inputs = array_ops.pack(inputs)
      outputs, state = rnn.dynamic_rnn(
          self._cell,
          inputs,
          sequence_length=sequence_length,
          initial_state=initial_state,
          dtype=dtype,
          time_major=True,
          scope=scope)
      if is_list:
        # Convert outputs back to list
        outputs = array_ops.unpack(outputs)
    else:  # non-dynamic rnn
      if not is_list:
        inputs = array_ops.unpack(inputs)
      outputs, state = rnn.rnn(self._cell,
                               inputs,
                               initial_state=initial_state,
                               dtype=dtype,
                               sequence_length=sequence_length,
                               scope=scope)
      if not is_list:
        # Convert outputs back to tensor
        outputs = array_ops.pack(outputs)

    return outputs, state
Beispiel #2
0
def seq2seq_inputs(x, y, input_length, output_length, sentinel=None, name=None):
  """Processes inputs for Sequence to Sequence models.

  Args:
    x: Input Tensor [batch_size, input_length, embed_dim].
    y: Output Tensor [batch_size, output_length, embed_dim].
    input_length: length of input x.
    output_length: length of output y.
    sentinel: optional first input to decoder and final output expected.
      If sentinel is not provided, zeros are used. Due to fact that y is not
      available in sampling time, shape of sentinel will be inferred from x.
    name: Operation name.

  Returns:
    Encoder input from x, and decoder inputs and outputs from y.
  """
  with ops.name_scope(name, "seq2seq_inputs", [x, y]):
    in_x = array_ops_.unpack(x, axis=1)
    y = array_ops_.unpack(y, axis=1)
    if not sentinel:
      # Set to zeros of shape of y[0], using x for batch size.
      sentinel_shape = array_ops_.pack(
          [array_ops_.shape(x)[0], y[0].get_shape()[1]])
      sentinel = array_ops_.zeros(sentinel_shape)
      sentinel.set_shape(y[0].get_shape())
    in_y = [sentinel] + y
    out_y = y + [sentinel]
    return in_x, in_y, out_y
Beispiel #3
0
 def testCannotInferNumFromNoneShape(self):
   x = array_ops.placeholder(np.float32, shape=(None,))
   with self.assertRaisesRegexp(ValueError,
                                r'Cannot infer num from shape \(\?,\)'):
     array_ops.unpack(x)
   with self.assertRaisesRegexp(ValueError,
                                r'Cannot infer num from shape \(\?,\)'):
     array_ops.unstack(x)
Beispiel #4
0
 def testAxisOutOfNegativeRange(self):
     a = constant_op.constant([[1, 2, 3], [4, 5, 6]], name='a')
     with self.assertRaisesRegexp(ValueError,
                                  r'axis = -3 not in \[-2, 2\)'):
         array_ops.unpack(a, axis=-3)
     with self.assertRaisesRegexp(ValueError,
                                  r'axis = -3 not in \[-2, 2\)'):
         array_ops.unstack(a, axis=-3)
Beispiel #5
0
 def testCannotInferNumFromNoneShape(self):
     x = array_ops.placeholder(np.float32, shape=(None, ))
     with self.assertRaisesRegexp(ValueError,
                                  r'Cannot infer num from shape \(\?,\)'):
         array_ops.unpack(x)
     with self.assertRaisesRegexp(ValueError,
                                  r'Cannot infer num from shape \(\?,\)'):
         array_ops.unstack(x)
Beispiel #6
0
 def testCannotInferNumFromUnknownShape(self):
     x = array_ops.placeholder(np.float32)
     with self.assertRaisesRegexp(ValueError,
                                  r'Cannot infer num from shape <unknown>'):
         array_ops.unpack(x)
     with self.assertRaisesRegexp(ValueError,
                                  r'Cannot infer num from shape <unknown>'):
         array_ops.unstack(x)
Beispiel #7
0
 def testCannotInferNumFromUnknownShape(self):
   x = array_ops.placeholder(np.float32)
   with self.assertRaisesRegexp(ValueError,
                                r'Cannot infer num from shape <unknown>'):
     array_ops.unpack(x)
   with self.assertRaisesRegexp(ValueError,
                                r'Cannot infer num from shape <unknown>'):
     array_ops.unstack(x)
Beispiel #8
0
        def attention(query, prev_states, b_a):
            """Put attention masks on hidden using hidden_features and query."""
            ds = []  # Results of attention reads will be stored here.
            if nest.is_sequence(query):  # If the query is a tuple, flatten it.
                query_list = nest.flatten(query)
                for q in query_list:  # Check that ndims == 2 if specified.
                    ndims = q.get_shape().ndims
                    if ndims:
                        assert ndims == 2
                query = array_ops.concat(1, query_list)
            for a in xrange(num_heads):
                with variable_scope.variable_scope("Attention_%d" % a):
                    y = linear(query, attention_vec_size_state, True)
                    y = array_ops.reshape(y,
                                          [-1, 1, 1, attention_vec_size_state])
                    # Attention mask is a softmax of v^T * tanh(...).

                    temp = hidden_features_states[a] + y
                    new_states = array_ops.squeeze(temp, [2])

                    new_states_list = array_ops.unpack(new_states, axis=1)
                    #print(temp.get_shape(), new_states.get_shape(), len(new_states_list), new_states_list[0].get_shape())
                    distract_states_list = []
                    for i, _ in enumerate(new_states_list):
                        temp = array_ops.reshape(prev_states[i], [-1, 1])
                        t1 = math_ops.matmul(temp, b_a)
                        print("b_a size and prev_states size",
                              temp.get_shape(), prev_states[i].get_shape(),
                              b_a.get_shape(), t1.get_shape())
                        distract_states_list.append(new_states_list[i] - t1)

                    distract_states = array_ops.pack(distract_states_list,
                                                     axis=1)

                    print(len(distract_states_list),
                          distract_states.get_shape())
                    s = math_ops.reduce_sum(
                        v_state[a] * math_ops.tanh(distract_states), [2])

                    print(s.get_shape())
                    a = nn_ops.softmax(s)
                    prev_states = array_ops.pack(prev_states, axis=1)
                    prev_states = prev_states + a

                    # Now calculate the attention-weighted vector d.
                    d = math_ops.reduce_sum(
                        array_ops.reshape(a, [-1, attn_length_state, 1, 1]) *
                        hidden_states, [1, 2])
                    ds.append(array_ops.reshape(d, [-1, attn_size_state]))
            return ds, array_ops.unpack(prev_states, axis=1)
Beispiel #9
0
 def testSimple(self):
     np.random.seed(7)
     with self.test_session(use_gpu=True):
         for shape in (2, ), (3, ), (2, 3), (3, 2), (4, 3, 2):
             data = np.random.randn(*shape)
             # Convert data to a single tensorflow tensor
             x = constant_op.constant(data)
             # Unpack into a list of tensors
             cs_unpacked = array_ops.unpack(x, num=shape[0])
             cs_unstacked = array_ops.unpack(x, num=shape[0])
             for cs in (cs_unpacked, cs_unstacked):
                 self.assertEqual(type(cs), list)
                 self.assertEqual(len(cs), shape[0])
                 cs = [c.eval() for c in cs]
                 self.assertAllEqual(cs, data)
Beispiel #10
0
 def testSimple(self):
   np.random.seed(7)
   with self.test_session(use_gpu=True):
     for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
       data = np.random.randn(*shape)
       # Convert data to a single tensorflow tensor
       x = constant_op.constant(data)
       # Unpack into a list of tensors
       cs_unpacked = array_ops.unpack(x, num=shape[0])
       cs_unstacked = array_ops.unpack(x, num=shape[0])
       for cs in (cs_unpacked, cs_unstacked):
         self.assertEqual(type(cs), list)
         self.assertEqual(len(cs), shape[0])
         cs = [c.eval() for c in cs]
         self.assertAllEqual(cs, data)
Beispiel #11
0
 def _cat_probs(self, log_probs):
   """Get a list of num_components batchwise probabilities."""
   which_softmax = nn_ops.log_softmax if log_probs else nn_ops.softmax
   cat_probs = which_softmax(self.cat.logits)
   cat_probs = array_ops.unpack(
       cat_probs, num=self.num_components, axis=-1)
   return cat_probs
Beispiel #12
0
 def __call__(self, inputs, state, scope=None):
   """Run this multi-layer cell on inputs, starting from state."""
   with vs.variable_scope(scope or type(self).__name__):  # "MultiRNNCell"
     cur_state_pos = 0
     cur_inp = inputs
     new_states = []
     for i, cell in enumerate(self._cells):
       with vs.variable_scope("Cell%d" % i):
         if self._state_is_tuple:
           if not nest.is_sequence(state):
             raise ValueError(
                 "Expected state to be a tuple of length %d, but received: %s"
                 % (len(self.state_size), state))
           cur_state = state[i]
         else:
           # print("STATE",state)
           """
           cur_state = array_ops.slice(
               state, [0, cur_state_pos], [-1, cell.state_size])
           """
           cur_state = array_ops.unpack(state)[i]
           # cur_state_pos += cell.state_size
         cur_inp, new_state = cell(cur_inp, cur_state)
         new_states.append(new_state)
   """
   new_states = (tuple(new_states) if self._state_is_tuple
                 else array_ops.concat(1, new_states))
   """
   new_states = array_ops.pack(new_states)
   return cur_inp, new_states
Beispiel #13
0
def _reverse_seq(input_seq, lengths):
    """Reverse a list of Tensors up to specified lengths.

  Args:
    input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features)
    lengths:   A tensor of dimension batch_size, containing lengths for each
               sequence in the batch. If "None" is specified, simply reverses
               the list.

  Returns:
    time-reversed sequence
  """
    if lengths is None:
        return list(reversed(input_seq))

    input_shape = tensor_shape.unknown_shape(ndims=input_seq[0].get_shape().ndims)
    for input_ in input_seq:
        input_shape.merge_with(input_.get_shape())
        input_.set_shape(input_shape)

    # Join into (time, batch_size, depth)
    s_joined = array_ops.pack(input_seq)

    # TODO(schuster, ebrevdo): Remove cast when reverse_sequence takes int32
    if lengths is not None:
        lengths = math_ops.to_int64(lengths)

    # Reverse along dimension 0
    s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1)
    # Split again into list
    result = array_ops.unpack(s_reversed)
    for r in result:
        r.set_shape(input_shape)
    return result
Beispiel #14
0
 def _cat_probs(self, log_probs):
   """Get a list of num_components batchwise probabilities."""
   which_softmax = nn_ops.log_softmax if log_probs else nn_ops.softmax
   cat_probs = which_softmax(self.cat.logits)
   cat_probs = array_ops.unpack(
       cat_probs, num=self.num_components, axis=-1)
   return cat_probs
Beispiel #15
0
def _reverse_seq(input_seq, lengths):
  """Reverse a list of Tensors up to specified lengths.

  Args:
    input_seq: Sequence of seq_len tensors of dimension (batch_size, depth)
    lengths:   A tensor of dimension batch_size, containing lengths for each
               sequence in the batch. If "None" is specified, simply
               reverses the list.

  Returns:
    time-reversed sequence
  """
  if lengths is None:
    return list(reversed(input_seq))

  for input_ in input_seq:
    input_.set_shape(input_.get_shape().with_rank(2))

  # Join into (time, batch_size, depth)
  s_joined = array_ops_.pack(input_seq)

  # Reverse along dimension 0
  s_reversed = array_ops_.reverse_sequence(s_joined, lengths, 0, 1)
  # Split again into list
  result = array_ops_.unpack(s_reversed)
  return result
Beispiel #16
0
    def testZeroLengthDim(self):
        with self.test_session():
            x = array_ops.zeros(shape=(0, 1, 2))
            y = array_ops.unpack(x, axis=1)[0].eval()
            self.assertEqual(y.shape, (0, 2))

            y = array_ops.unstack(x, axis=1)[0].eval()
            self.assertEqual(y.shape, (0, 2))
Beispiel #17
0
  def testZeroLengthDim(self):
    with self.test_session():
      x = array_ops.zeros(shape=(0, 1, 2))
      y = array_ops.unpack(x, axis=1)[0].eval()
      self.assertEqual(y.shape, (0, 2))

      y = array_ops.unstack(x, axis=1)[0].eval()
      self.assertEqual(y.shape, (0, 2))
Beispiel #18
0
    def testInferNum(self):
        with self.test_session():
            for shape in (2, ), (3, ), (2, 3), (3, 2), (4, 3, 2):
                x = array_ops.placeholder(np.float32, shape=shape)
                cs = array_ops.unpack(x)
                self.assertEqual(type(cs), list)
                self.assertEqual(len(cs), shape[0])

                cs = array_ops.unstack(x)
                self.assertEqual(type(cs), list)
                self.assertEqual(len(cs), shape[0])
Beispiel #19
0
  def testInferNum(self):
    with self.test_session():
      for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
        x = array_ops.placeholder(np.float32, shape=shape)
        cs = array_ops.unpack(x)
        self.assertEqual(type(cs), list)
        self.assertEqual(len(cs), shape[0])

        cs = array_ops.unstack(x)
        self.assertEqual(type(cs), list)
        self.assertEqual(len(cs), shape[0])
Beispiel #20
0
  def testAxis0Default(self):
    with self.test_session() as sess:
      a = constant_op.constant([[1, 2, 3], [4, 5, 6]], name='a')

      unpacked = sess.run(array_ops.unpack(a))
      unstacked = sess.run(array_ops.unstack(a))

    self.assertEqual(len(unpacked), 2)
    self.assertAllEqual(unpacked[0], [1, 2, 3])
    self.assertAllEqual(unpacked[1], [4, 5, 6])
    self.assertEqual(len(unstacked), 2)
    self.assertAllEqual(unstacked[0], [1, 2, 3])
    self.assertAllEqual(unstacked[1], [4, 5, 6])
Beispiel #21
0
    def testAxis0Default(self):
        with self.test_session() as sess:
            a = constant_op.constant([[1, 2, 3], [4, 5, 6]], name='a')

            unpacked = sess.run(array_ops.unpack(a))
            unstacked = sess.run(array_ops.unstack(a))

        self.assertEqual(len(unpacked), 2)
        self.assertAllEqual(unpacked[0], [1, 2, 3])
        self.assertAllEqual(unpacked[1], [4, 5, 6])
        self.assertEqual(len(unstacked), 2)
        self.assertAllEqual(unstacked[0], [1, 2, 3])
        self.assertAllEqual(unstacked[1], [4, 5, 6])
Beispiel #22
0
    def build(self):

        self.input_0 = tf.placeholder(
            tf.float32,
            [self.config.max_length_0_input, 1, self.config.embedding_size])
        self.input_0_length = tf.placeholder(tf.int32)

        self.input_1 = tf.placeholder(
            tf.float32,
            [self.config.max_length_0_input, 1, self.config.embedding_size])
        self.input_1_length = tf.placeholder(tf.int32)

        input_0 = array_ops.unpack(self.input_0)
        input_1 = array_ops.unpack(self.input_1)

        # bidirectional rnn
        cell = rnn_cell.GRUCell(self.config.embedding_size)

        initial_state_fw = array_ops.zeros(array_ops.pack([1,
                                                           cell.state_size]),
                                           dtype=tf.float32)
        initial_state_fw.set_shape([1, cell.state_size])
        initial_state_bw = array_ops.zeros(array_ops.pack([1,
                                                           cell.state_size]),
                                           dtype=tf.float32)
        initial_state_bw.set_shape([1, cell.state_size])

        states = bidirectional_rnn(
            cell,
            cell,
            input_0,
            initial_state_fw=initial_state_fw,
            initial_state_bw=initial_state_bw,
            dtype=tf.float32,
            # sequence_length=3
        )

        self.test = array_ops.pack(states)
Beispiel #23
0
 def _sample_n(self, n, seed=None):
     # We use 2 uniform random floats to generate polar random variates.
     # http://dl.acm.org/citation.cfm?id=179631
     # Theorem 2. Let G, H be iid variates, uniformly distributed on [0,1].
     # Let theta = 2*pi*H, let R = sqrt(df*(G^(-2/df) - 1)) for df > 0.
     # Let X = R*cos(theta), and let Y = R*sin(theta).
     # Then X ~ t_df and Y ~ t_df.
     # The variates X and Y are not independent.
     shape = array_ops.concat(0, ([2, n], self.batch_shape()))
     uniform = random_ops.random_uniform(shape=shape, dtype=self.dtype, seed=seed)
     samples_g, samples_h = array_ops.unpack(uniform, num=2)
     theta = (2.0 * math.pi) * samples_h
     r = math_ops.sqrt(self.df * (math_ops.pow(samples_g, -2 / self.df) - 1))
     samples = r * math_ops.cos(theta)
     return samples * self.sigma + self.mu
Beispiel #24
0
    def testAgainstNumpy(self):
        # For 1 to 5 dimensions.
        for i in range(1, 6):
            a = np.random.random(np.random.permutation(i) + 1)

            # For all the possible axis to split it, including negative indices.
            for j in range(-i, i):
                expected = np_split_squeeze(a, j)

                with self.test_session() as sess:
                    actual_unpack = sess.run(array_ops.unpack(a, axis=j))
                    actual_unstack = sess.run(array_ops.unstack(a, axis=j))

                self.assertAllEqual(expected, actual_unpack)
                self.assertAllEqual(expected, actual_unstack)
Beispiel #25
0
  def testAgainstNumpy(self):
    # For 1 to 5 dimensions.
    for i in range(1, 6):
      a = np.random.random(np.random.permutation(i) + 1)

      # For all the possible axis to split it, including negative indices.
      for j in range(-i, i):
        expected = np_split_squeeze(a, j)

        with self.test_session() as sess:
          actual_unpack = sess.run(array_ops.unpack(a, axis=j))
          actual_unstack = sess.run(array_ops.unstack(a, axis=j))

        self.assertAllEqual(expected, actual_unpack)
        self.assertAllEqual(expected, actual_unstack)
Beispiel #26
0
  def testGradientsAxis0(self):
    for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
      data = np.random.randn(*shape)
      shapes = [shape[1:]] * shape[0]
      for i in xrange(shape[0]):
        with self.test_session(use_gpu=True):
          x = constant_op.constant(data)
          cs = array_ops.unpack(x, num=shape[0])
          err = gradient_checker.compute_gradient_error(x, shape, cs[i],
                                                        shapes[i])
          self.assertLess(err, 1e-6)

          cs = array_ops.unstack(x, num=shape[0])
          err = gradient_checker.compute_gradient_error(x, shape, cs[i],
                                                        shapes[i])
          self.assertLess(err, 1e-6)
Beispiel #27
0
    def testGradientsAxis0(self):
        for shape in (2, ), (3, ), (2, 3), (3, 2), (4, 3, 2):
            data = np.random.randn(*shape)
            shapes = [shape[1:]] * shape[0]
            for i in xrange(shape[0]):
                with self.test_session(use_gpu=True):
                    x = constant_op.constant(data)
                    cs = array_ops.unpack(x, num=shape[0])
                    err = gradient_checker.compute_gradient_error(
                        x, shape, cs[i], shapes[i])
                    self.assertLess(err, 1e-6)

                    cs = array_ops.unstack(x, num=shape[0])
                    err = gradient_checker.compute_gradient_error(
                        x, shape, cs[i], shapes[i])
                    self.assertLess(err, 1e-6)
Beispiel #28
0
def call_rnn_bidir_dynamic(cell_encoder_fw, cell_encoder_bw, embeddings,
                           sequence_length, dtype):

    embeddings = array_ops.pack(embeddings, axis=1)
    encoder_outputs, encoder_state = rnn.bidirectional_dynamic_rnn(
        cell_encoder_fw,
        cell_encoder_bw,
        embeddings,
        sequence_length,
        dtype=dtype)

    encoder_outputs = array_ops.concat(2, encoder_outputs)
    encoder_state = array_ops.concat(1, encoder_state)

    encoder_outputs = array_ops.unpack(encoder_outputs, axis=1)
    return encoder_outputs, encoder_state
Beispiel #29
0
def _reverse_seq(input_seq, lengths):
  """Reverse a list of Tensors up to specified lengths.

  Args:
    input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features)
               or nested tuples of tensors.
    lengths:   A tensor of dimension batch_size, containing lengths for each
               sequence in the batch. If "None" is specified, simply reverses
               the list.

  Returns:
    time-reversed sequence
  """
  if lengths is None:
    return list(reversed(input_seq))

  input_is_tuple = nest.is_sequence(input_seq[0])
  flat_input_seq = (nest.flatten(input_) if input_is_tuple else [input_]
                    for input_ in input_seq)

  flat_results = [[] for _ in range(len(input_seq))]
  for sequence in zip(*flat_input_seq):
    input_shape = tensor_shape.unknown_shape(
        ndims=sequence[0].get_shape().ndims)
    for input_ in sequence:
      input_shape.merge_with(input_.get_shape())
      input_.set_shape(input_shape)

    # Join into (time, batch_size, depth)
    s_joined = array_ops.pack(sequence)

    # TODO(schuster, ebrevdo): Remove cast when reverse_sequence takes int32
    if lengths is not None:
      lengths = math_ops.to_int64(lengths)

    # Reverse along dimension 0
    s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1)
    # Split again into list
    result = array_ops.unpack(s_reversed)
    for r, flat_result in zip(result, flat_results):
      r.set_shape(input_shape)
      flat_result.append(r)

  results = [nest.pack_sequence_as(structure=input_, flat_sequence=flat_result)
             if input_is_tuple else flat_result[0]
             for input_, flat_result in zip(input_seq, flat_results)]
  return results
Beispiel #30
0
  def testGradientsAxis1(self):
    for shape in (2, 3), (3, 2), (4, 3, 2):
      data = np.random.randn(*shape)
      out_shape = list(shape)
      del out_shape[1]
      for i in xrange(shape[1]):
        with self.test_session(use_gpu=True):
          x = constant_op.constant(data)
          cs = array_ops.unpack(x, num=shape[1], axis=1)
          err = gradient_checker.compute_gradient_error(x, shape, cs[i],
                                                        out_shape)
          self.assertLess(err, 1e-6)

          cs = array_ops.unstack(x, num=shape[1], axis=1)
          err = gradient_checker.compute_gradient_error(x, shape, cs[i],
                                                        out_shape)
          self.assertLess(err, 1e-6)
Beispiel #31
0
    def testGradientsAxis1(self):
        for shape in (2, 3), (3, 2), (4, 3, 2):
            data = np.random.randn(*shape)
            out_shape = list(shape)
            del out_shape[1]
            for i in xrange(shape[1]):
                with self.test_session(use_gpu=True):
                    x = constant_op.constant(data)
                    cs = array_ops.unpack(x, num=shape[1], axis=1)
                    err = gradient_checker.compute_gradient_error(
                        x, shape, cs[i], out_shape)
                    self.assertLess(err, 1e-6)

                    cs = array_ops.unstack(x, num=shape[1], axis=1)
                    err = gradient_checker.compute_gradient_error(
                        x, shape, cs[i], out_shape)
                    self.assertLess(err, 1e-6)
Beispiel #32
0
def dense_to_sparse_tensor(dense_tensor, ignore_value=None):
    """Converts a dense Tensor to a SparseTensor, dropping ignore_value cells.

  Args:
    dense_tensor: An `Output`.
    ignore_value: Entries in `dense_tensor` equal to this value will be
      absent from the return `SparseTensor`. If `None`, default value of
      dense_tensor's dtype will be used (e.g. '' for `str`, 0 for `int`).

  Returns:
    A `SparseTensor` with the same shape as `dense_tensor`.

  Raises:
    ValueError: when `dense_tensor`'s rank is `None`.
  """
    with ops.name_scope("DenseToSparseTensor"):
        dense_t = ops.convert_to_tensor(dense_tensor)
        if dense_t.get_shape().ndims is None:
            # TODO(b/32318825): Implement dense_to_sparse_tensor for undefined rank.
            raise ValueError(
                "dense_tensor.get_shape() should be defined, got None.")
        if ignore_value is None:
            if dense_t.dtype == dtypes.string:
                # Exception due to TF strings are converted to numpy objects by default.
                ignore_value = ""
            else:
                ignore_value = dense_t.dtype.as_numpy_dtype()
        dense_shape = math_ops.cast(array_ops.shape(dense_t), dtypes.int64)
        indices = array_ops.where(
            math_ops.not_equal(dense_t,
                               math_ops.cast(ignore_value, dense_t.dtype)))
        index_dims = len(dense_t.get_shape())
        # Flattens the tensor and indices for use with gather.
        flat_tensor = array_ops.reshape(dense_t, [-1])
        flat_indices = indices[:, index_dims - 1]
        # Computes the correct flattened indices for 2d (or higher) tensors.
        if index_dims > 1:
            higher_dims = indices[:, :index_dims - 1]
            shape_multipliers = array_ops.pack(
                _multiplier_helper(array_ops.unpack(dense_shape)[1:]))
            offsets = math_ops.reduce_sum(math_ops.mul(higher_dims,
                                                       shape_multipliers),
                                          reduction_indices=[1])
            flat_indices = math_ops.add(flat_indices, offsets)
        values = array_ops.gather(flat_tensor, flat_indices)
        return sparse_tensor.SparseTensor(indices, values, dense_shape)
Beispiel #33
0
def _ImageDimensions(image):
    """Returns the dimensions of an image tensor.

  Args:
    image: A 3-D Tensor of shape `[height, width, channels]`.

  Returns:
    A list of `[height, width, channels]` corresponding to the dimensions of the
    input image.  Dimensions that are statically known are python integers,
    otherwise they are integer scalar tensors.
  """
    if image.get_shape().is_fully_defined():
        return image.get_shape().as_list()
    else:
        static_shape = image.get_shape().with_rank(3).as_list()
        dynamic_shape = array_ops.unpack(array_ops.shape(image), 3)
        return [s if s is not None else d for s, d in zip(static_shape, dynamic_shape)]
Beispiel #34
0
def _ImageDimensions(images, dynamic_shape=False):
  """Returns the dimensions of an image tensor.
  Args:
    images: 4-D Tensor of shape [batch, height, width, channels]
    dynamic_shape: Whether the input image has undertermined shape. If set to
      `True`, shape information will be retrieved at run time. Default to
      `False`.

  Returns:
    list of integers [batch, height, width, channels]
  """
  # A simple abstraction to provide names for each dimension. This abstraction
  # should make it simpler to switch dimensions in the future (e.g. if we ever
  # want to switch height and width.)
  if dynamic_shape:
    return array_ops.unpack(array_ops.shape(images))
  else:
    return images.get_shape().as_list()
Beispiel #35
0
def dense_to_sparse_tensor(dense_tensor, ignore_value=None):
  """Converts a dense Tensor to a SparseTensor, dropping ignore_value cells.

  Args:
    dense_tensor: A `Tensor`.
    ignore_value: Entries in `dense_tensor` equal to this value will be
      absent from the return `SparseTensor`. If `None`, default value of
      dense_tensor's dtype will be used (e.g. '' for `str`, 0 for `int`).

  Returns:
    A `SparseTensor` with the same shape as `dense_tensor`.

  Raises:
    ValueError: when `dense_tensor`'s rank is `None`.
  """
  with ops.name_scope("DenseToSparseTensor"):
    dense_t = ops.convert_to_tensor(dense_tensor)
    if dense_t.get_shape().ndims is None:
      # TODO(b/32318825): Implement dense_to_sparse_tensor for undefined rank.
      raise ValueError("dense_tensor.get_shape() should be defined, got None.")
    if ignore_value is None:
      if dense_t.dtype == dtypes.string:
        # Exception due to TF strings are converted to numpy objects by default.
        ignore_value = ""
      else:
        ignore_value = dense_t.dtype.as_numpy_dtype()
    dense_shape = math_ops.cast(array_ops.shape(dense_t), dtypes.int64)
    indices = array_ops.where(
        math_ops.not_equal(dense_t, math_ops.cast(ignore_value, dense_t.dtype)))
    index_dims = len(dense_t.get_shape())
    # Flattens the tensor and indices for use with gather.
    flat_tensor = array_ops.reshape(dense_t, [-1])
    flat_indices = indices[:, index_dims - 1]
    # Computes the correct flattened indices for 2d (or higher) tensors.
    if index_dims > 1:
      higher_dims = indices[:, :index_dims - 1]
      shape_multipliers = array_ops.pack(
          _multiplier_helper(array_ops.unpack(dense_shape)[1:]))
      offsets = math_ops.reduce_sum(
          math_ops.mul(higher_dims, shape_multipliers), reduction_indices=[1])
      flat_indices = math_ops.add(flat_indices, offsets)
    values = array_ops.gather(flat_tensor, flat_indices)
    return sparse_tensor.SparseTensor(indices, values, dense_shape)
Beispiel #36
0
def _ImageDimensions(images, static_only=True):
  """Returns the dimensions of an image tensor.

  Args:
    images: 4-D Tensor of shape `[batch, height, width, channels]`
    static_only: Boolean, whether to return only static shape.

  Returns:
    list of integers `[batch, height, width, channels]`, when static shape is
    fully defined or `static_only` is `True`.
    list of integer scalar tensors `[batch, height, width, channels]`, when
    static shape is not fully defined.
  """
  # A simple abstraction to provide names for each dimension. This abstraction
  # should make it simpler to switch dimensions in the future (e.g. if we ever
  # want to switch height and width.)
  if static_only or images.get_shape().is_fully_defined():
    return images.get_shape().as_list()
  else:
    return array_ops.unpack(array_ops.shape(images))
Beispiel #37
0
def _ImageDimensions(images, static_only=True):
    """Returns the dimensions of an image tensor.

  Args:
    images: 4-D Tensor of shape `[batch, height, width, channels]`
    static_only: Boolean, whether to return only static shape.

  Returns:
    list of integers `[batch, height, width, channels]`, when static shape is
    fully defined or `static_only` is `True`.
    list of integer scalar tensors `[batch, height, width, channels]`, when
    static shape is not fully defined.
  """
    # A simple abstraction to provide names for each dimension. This abstraction
    # should make it simpler to switch dimensions in the future (e.g. if we ever
    # want to switch height and width.)
    if static_only or images.get_shape().is_fully_defined():
        return images.get_shape().as_list()
    else:
        return array_ops.unpack(array_ops.shape(images))
Beispiel #38
0
  def sample(self, n, seed=None, name="sample"):
    """Sample `n` observations from the Student t Distributions.

    Args:
      n: `Scalar`, type int32, the number of observations to sample.
      seed: Python integer, the random seed.
      name: The name to give this op.

    Returns:
      samples: a `Tensor` of shape `(n,) + self.batch_shape + self.event_shape`
          with values of type `self.dtype`.
    """
    with ops.name_scope(self.name):
      with ops.op_scope([self._df, self._mu, self._sigma, n], name):
        n = ops.convert_to_tensor(n, name="n")
        n_val = tensor_util.constant_value(n)

        # We use 2 uniform random floats to generate polar random variates.
        # http://dl.acm.org/citation.cfm?id=179631
        # Theorem 2. Let G, H be iid variates, uniformly distributed on [0,1].
        # Let theta = 2*pi*H, let R = sqrt(df*(G^(-2/df) - 1)) for df > 0.
        # Let X = R*cos(theta), and let Y = R*sin(theta).
        # Then X ~ t_df and Y ~ t_df.
        # The variates X and Y are not independent.
        shape = array_ops.concat(0, [array_ops.pack([2, n]),
                                     self.batch_shape()])
        uniform = random_ops.random_uniform(shape=shape,
                                            dtype=self.dtype,
                                            seed=seed)
        samples_g, samples_h = array_ops.unpack(uniform, num=2)
        theta = (2 * np.pi) * samples_h
        r = math_ops.sqrt(self._df *
                          (math_ops.pow(samples_g, -2 / self._df) - 1))
        samples = r * math_ops.cos(theta)

        # Provide some hints to shape inference
        inferred_shape = tensor_shape.vector(n_val).concatenate(
            self.get_batch_shape())
        samples.set_shape(inferred_shape)

        return samples * self._sigma + self._mu
Beispiel #39
0
  def testBatch(self):
    # Build an arbitrary RGB image
    np.random.seed(7)
    batch_size = 5
    shape = (batch_size, 2, 7, 3)
    inp = np.random.rand(*shape).astype(np.float32)

    # Convert to HSV and back, as a batch and individually
    with self.test_session() as sess:
      batch0 = constant_op.constant(inp)
      batch1 = image_ops.rgb_to_hsv(batch0)
      batch2 = image_ops.hsv_to_rgb(batch1)
      split0 = array_ops.unpack(batch0)
      split1 = map(image_ops.rgb_to_hsv, split0)
      split2 = map(image_ops.hsv_to_rgb, split1)
      join1 = array_ops.pack(split1)
      join2 = array_ops.pack(split2)
      batch1, batch2, join1, join2 = sess.run([batch1, batch2, join1, join2])

    # Verify that processing batch elements together is the same as separate
    self.assertAllClose(batch1, join1)
    self.assertAllClose(batch2, join2)
    self.assertAllClose(batch2, inp)
Beispiel #40
0
    def call(self, inputs):
        shape = inputs.get_shape().as_list()
        input_dim = shape[-1]
        output_shape = shape[:-1] + [self.units]
        if len(output_shape) > 2:
            # Reshape the input to 2D.
            output_shape_tensors = array_ops.unpack(array_ops.shape(inputs))
            output_shape_tensors[-1] = self.units
            output_shape_tensor = array_ops.pack(output_shape_tensors)
            inputs = array_ops.reshape(inputs, [-1, input_dim])

        outputs = standard_ops.matmul(inputs, self.w)
        if self.use_bias:
            outputs = nn.bias_add(outputs, self.bias)

        if len(output_shape) > 2:
            # Reshape the output back to the original ndim of the input.
            outputs = array_ops.reshape(outputs, output_shape_tensor)
            outputs.set_shape(output_shape)

        if self.activation is not None:
            return self.activation(outputs)  # pylint: disable=not-callable
        return outputs
def hessians(ys, xs, name="hessians", colocate_gradients_with_ops=False,
            gate_gradients=False, aggregation_method=None):
  """Constructs the Hessian of sum of `ys` with respect to `x` in `xs`.

  `hessians()` adds ops to the graph to output the Hessian matrix of `ys`
  with respect to `xs`.  It returns a list of `Tensor` of length `len(xs)`
  where each tensor is the Hessian of `sum(ys)`. This function currently
  only supports evaluating the Hessian with respect to (a list of) one-
  dimensional tensors.

  The Hessian is a matrix of second-order partial derivatives of a scalar
  tensor (see https://en.wikipedia.org/wiki/Hessian_matrix for more details).

  Args:
    ys: A `Tensor` or list of tensors to be differentiated.
    xs: A `Tensor` or list of tensors to be used for differentiation.
    name: Optional name to use for grouping all the gradient ops together.
      defaults to 'hessians'.
    colocate_gradients_with_ops: See `gradients()` documentation for details.
    gate_gradients: See `gradients()` documentation for details.
    aggregation_method: See `gradients()` documentation for details.

  Returns:
    A list of Hessian matrices of `sum(y)` for each `x` in `xs`.

  Raises:
    LookupError: if one of the operations between `xs` and `ys` does not
      have a registered gradient function.
    ValueError: if the arguments are invalid or not supported. Currently,
      this function only supports one-dimensional `x` in `xs`.
  """
  xs = _AsList(xs)
  kwargs = {
      'colocate_gradients_with_ops': colocate_gradients_with_ops,
      'gate_gradients': gate_gradients,
      'aggregation_method': aggregation_method
    }
  # Compute a hessian matrix for each x in xs
  hessians = []
  for i, x in enumerate(xs):
    # Check dimensions
    ndims = x.get_shape().ndims
    if ndims is None:
      raise ValueError('Cannot compute Hessian because the dimensionality of '
                       'element number %d of `xs` cannot be determined' % i)
    elif ndims != 1:
      raise ValueError('Computing hessians is currently only supported for '
                       'one-dimensional tensors. Element number %d of `xs` has '
                       '%d dimensions.' % (i, ndims))
    with ops.name_scope(name + '_first_derivative'):
      # Compute the partial derivatives of the input with respect to all
      # elements of `x`
      _gradients = gradients(ys, x, **kwargs)[0]
      # Unpack the gradients into a list so we can take derivatives with
      # respect to each element
      _gradients = array_ops.unpack(_gradients)
    with ops.name_scope(name + '_second_derivative'):
      # Compute the partial derivatives with respect to each element of the list
      _hess = [gradients(_gradient, x, **kwargs)[0] for _gradient in _gradients]
      # Pack the list into a matrix and add to the list of hessians
      hessians.append(array_ops.stack(_hess, name=name))
  return hessians
Beispiel #42
0
def _PackGrad(op, grad):
  """Gradient for pack op."""
  return array_ops.unpack(grad, num=op.get_attr("N"))
def legacy_fully_connected(x,
                           num_output_units,
                           activation_fn=None,
                           weight_init=initializers.xavier_initializer(),
                           bias_init=init_ops.zeros_initializer,
                           name=None,
                           weight_collections=(ops.GraphKeys.WEIGHTS,),
                           bias_collections=(ops.GraphKeys.BIASES,),
                           output_collections=(ops.GraphKeys.ACTIVATIONS,),
                           trainable=True,
                           weight_regularizer=None,
                           bias_regularizer=None):
  # pylint: disable=anomalous-backslash-in-string
  r"""Adds the parameters for a fully connected layer and returns the output.
  A fully connected layer is generally defined as a matrix multiply:
  `y = f(w * x + b)` where `f` is given by `activation_fn`. If
  `activation_fn` is `None`, the result of `y = w * x + b` is
  returned.
  If `x` has shape [\\\(\\text{dim}_0, \\text{dim}_1, ..., \\text{dim}_n\\\)]
  with more than 2 dimensions (\\\(n > 1\\\)), then we repeat the matrix
  multiply along the first dimensions. The result r is a tensor of shape
  [\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`],
  where \\\( r_{i_0, ..., i_{n-1}, k} =
  \\sum_{0 \\leq j < \\text{dim}_n} x_{i_0, ... i_{n-1}, j} \cdot w_{j, k}\\\).
  This is accomplished by reshaping `x` to 2-D
  [\\\(\\text{dim}_0 \\cdot ... \\cdot \\text{dim}_{n-1}, \\text{dim}_n\\\)]
  before the matrix multiply and afterwards reshaping it to
  [\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`].
  This op creates `w` and optionally `b`. Bias (`b`) can be disabled by setting
  `bias_init` to `None`.
  The variable creation is compatible with `tf.variable_scope` and so can be
  reused with `tf.variable_scope` or `tf.make_template`.
  Most of the details of variable creation can be controlled by specifying the
  initializers (`weight_init` and `bias_init`) and in which collections to place
  the created variables (`weight_collections` and `bias_collections`; note that
  the variables are always added to the `VARIABLES` collection). The output of
  the layer can be placed in custom collections using `output_collections`.
  The collections arguments default to `WEIGHTS`, `BIASES` and `ACTIVATIONS`,
  respectively.
  A per layer regularization can be specified by setting `weight_regularizer`
  and `bias_regularizer`, which are applied to the weights and biases
  respectively, and whose output is added to the `REGULARIZATION_LOSSES`
  collection.
  Args:
    x: The input `Tensor`.
    num_output_units: The size of the output.
    activation_fn: A function that requires a single Tensor that is applied as a
      non-linearity. If None is used, do not apply any activation.
    weight_init: An optional weight initialization, defaults to
      `xavier_initializer`.
    bias_init: An initializer for the bias, defaults to 0. Set to `None` in
      order to disable bias.
    name: The name for this operation is used to name operations and to find
      variables. If specified it must be unique for this scope, otherwise a
      unique name starting with "fully_connected" will be created.  See
      `tf.variable_op_scope` for details.
    weight_collections: List of graph collections to which weights are added.
    bias_collections: List of graph collections to which biases are added.
    output_collections: List of graph collections to which outputs are added.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    weight_regularizer: A regularizer like the result of
      `l1_regularizer` or `l2_regularizer`. Used for weights.
    bias_regularizer: A regularizer like the result of
      `l1_regularizer` or `l2_regularizer`. Used for biases.
  Returns:
    The output of the fully connected layer.
  Raises:
    ValueError: if x has rank less than 2 or if its last dimension is not set.
  """
  with variable_scope.variable_op_scope([x], name, 'fully_connected'):
    dims = x.get_shape().dims
    if dims is None:
      raise ValueError('dims of x must be known but is None')
    if len(dims) < 2:
      raise ValueError('rank of x must be at least 2 not: %d' % len(dims))
    num_input_units = dims[-1].value
    if num_input_units is None:
      raise ValueError('last dimension of x must be known but is None')
    dtype = x.dtype.base_dtype

    weight_collections = set(list(weight_collections or []) +
                             [ops.GraphKeys.VARIABLES])
    w = variable_scope.get_variable('weights',
                                    shape=[num_input_units, num_output_units],
                                    dtype=dtype,
                                    initializer=weight_init,
                                    collections=weight_collections,
                                    regularizer=weight_regularizer,
                                    trainable=trainable)
    x_2_dim = x if len(dims) <= 2 else array_ops.reshape(x,
                                                         [-1, num_input_units])
    y = standard_ops.matmul(x_2_dim, w)

    if bias_init is not None:
      bias_collections = set(list(bias_collections or []) +
                             [ops.GraphKeys.VARIABLES])
      b = variable_scope.get_variable('bias',
                                      shape=[num_output_units],
                                      dtype=dtype,
                                      initializer=bias_init,
                                      collections=bias_collections,
                                      regularizer=bias_regularizer,
                                      trainable=trainable)

      y = nn.bias_add(y, b)

    if len(dims) > 2:
      out_shape = array_ops.unpack(array_ops.shape(x))
      out_shape[-1] = num_output_units

      y = array_ops.reshape(y, array_ops.pack(out_shape))

      static_shape = x.get_shape().as_list()
      static_shape[-1] = num_output_units
      y.set_shape(static_shape)

    return _apply_activation(y, activation_fn, output_collections)
def fully_connected(inputs,
                    num_outputs,
                    activation_fn=nn.relu,
                    normalizer_fn=None,
                    normalizer_params=None,
                    weights_initializer=initializers.xavier_initializer(),
                    weights_regularizer=None,
                    biases_initializer=init_ops.zeros_initializer,
                    biases_regularizer=None,
                    reuse=None,
                    variables_collections=None,
                    outputs_collections=None,
                    trainable=True,
                    scope=None):
  """Adds a fully connected layer.
  `fully_connected` creates a variable called `weights`, representing a fully
  connected weight matrix, which is multiplied by the `inputs` to produce a
  `Tensor` of hidden units. If a `normalizer_fn` is provided (such as
  `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is
  None and a `biases_initializer` is provided then a `biases` variable would be
  created and added the hidden units. Finally, if `activation_fn` is not `None`,
  it is applied to the hidden units as well.
  Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened
  prior to the initial matrix multiply by `weights`.
  Args:
    inputs: A tensor of with at least rank 2 and value for the last dimension,
      i.e. `[batch_size, depth]`, `[None, None, None, channels]`.
    num_outputs: Integer, the number of output units in the layer.
    activation_fn: activation function.
    normalizer_fn: normalization function to use instead of `biases`. If
      `normalize_fn` is provided then `biases_initializer` and
      `biases_regularizer` are ignored and `biases` are not created nor added.
    normalizer_params: normalization function parameters.
    weights_initializer: An initializer for the weights.
    weights_regularizer: Optional regularizer for the weights.
    biases_initializer: An initializer for the biases. If None skip biases.
    biases_regularizer: Optional regularizer for the biases.
    reuse: whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional list of collections for all the variables or
      a dictionary containing a different list of collections per variable.
    outputs_collections: collection to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_op_scope.
  Returns:
     the tensor variable representing the result of the series of operations.
  Raises:
    ValueError: if x has rank less than 2 or if its last dimension is not set.
  """
  if not isinstance(num_outputs, int):
    raise ValueError('num_outputs should be integer, got %s.', num_outputs)
  with variable_scope.variable_op_scope([inputs],
                                        scope,
                                        'fully_connected',
                                        reuse=reuse) as sc:
    dtype = inputs.dtype.base_dtype
    num_input_units = utils.last_dimension(inputs.get_shape(), min_rank=2)

    static_shape = inputs.get_shape().as_list()
    static_shape[-1] = num_outputs

    out_shape = array_ops.unpack(array_ops.shape(inputs))
    out_shape[-1] = num_outputs

    weights_shape = [num_input_units, num_outputs]
    weights_collections = utils.get_variable_collections(
        variables_collections, 'weights')
    weights = variables.model_variable('weights',
                                       shape=weights_shape,
                                       dtype=dtype,
                                       initializer=weights_initializer,
                                       regularizer=weights_regularizer,
                                       collections=weights_collections,
                                       trainable=trainable)
    if len(static_shape) > 2:
      # Reshape inputs
      inputs = array_ops.reshape(inputs, [-1, num_input_units])
    outputs = standard_ops.matmul(inputs, weights)
    if normalizer_fn:
      normalizer_params = normalizer_params or {}
      outputs = normalizer_fn(outputs, **normalizer_params)
    else:
      if biases_initializer is not None:
        biases_collections = utils.get_variable_collections(
            variables_collections, 'biases')
        biases = variables.model_variable('biases',
                                          shape=[num_outputs,],
                                          dtype=dtype,
                                          initializer=biases_initializer,
                                          regularizer=biases_regularizer,
                                          collections=biases_collections,
                                          trainable=trainable)
        outputs = nn.bias_add(outputs, biases)
    if len(static_shape) > 2:
      # Reshape back outputs
      outputs = array_ops.reshape(outputs, array_ops.pack(out_shape))
      outputs.set_shape(static_shape)
    if activation_fn:
      outputs = activation_fn(outputs)
    return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
Beispiel #45
0
    def __init__(self):

        self.max_len = 300
        self.seg_len = 5
        self.batch_size = 100
        self.number_of_layers = 1
        self.dim = 59+2
        self.num_epoch=10000
        self.num_hidden_1=2
        self.num_hidden_2=2

        self.input=tf.placeholder(tf.float32,[self.max_len,None,self.dim])
        self.target = tf.placeholder(tf.float32, [None, 2])
        self.keep_prob = tf.placeholder(tf.float32)
        input=array_ops.unpack(self.input)
        batch_size = tf.shape(self.input)[1]

        def _rnn(cell, inputs):
            with tf.variable_scope("GRU_RNN") as scope:
                state = cell.zero_state(batch_size, tf.float32)
                for time, input_ in enumerate(inputs):
                    if time > 0: scope.reuse_variables()
                    output, state = cell(input_, state)
                return state

        def h_rnn(input):
            i=0
            num_layer=0
            layer=[input]
            while True:
                print(num_layer)
                layer.append([])
                _input=layer[num_layer]
                length = len(_input)
                with tf.variable_scope("RNN_"+str(num_layer)) as scope:
                    cell=rnn_cell.BasicLSTMCell(self.dim)
                    cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=self.keep_prob)
                    stacked_cell = rnn_cell.MultiRNNCell([cell] * self.number_of_layers)
                    i=0
                    while i<length:
                        state = _rnn(stacked_cell, _input[i:min(i+self.seg_len,length)])
                        layer[num_layer+1].append(state)
                        scope.reuse_variables()
                        i+=self.seg_len
                num_layer+=1
                if length<=self.seg_len:
                    break

            return layer[num_layer][0]



        state = h_rnn(input)

        with tf.variable_scope("NN", initializer=tf.random_uniform_initializer()):
            self.W_1 = tf.get_variable("W_1", [state.get_shape()[1],self.num_hidden_1])
            self.b_1 = tf.get_variable("b_1", [self.num_hidden_1])
            # self.W_2 = tf.get_variable("W_2", [self.num_hidden_1,self.num_hidden_2])
            # self.b_2 = tf.get_variable("b_2", [self.num_hidden_2])

        y_1 = tf.matmul(state, self.W_1)+self.b_1
        # y_1 = tf.nn.sigmoid(tf.matmul(state, self.W_1)+self.b_1)
        # y_2 = tf.matmul(y_1, self.W_2)+self.b_2
        self.y_pred = tf.nn.softmax(y_1)
        self.cross_entropy = -tf.reduce_mean(self.target*tf.log(self.y_pred))

        correct_prediction = tf.equal(tf.argmax(self.target, 1), tf.argmax(self.y_pred, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        accuracy_summary = tf.scalar_summary("accuracy", self.accuracy)
        ce_summ = tf.scalar_summary("cross entropy", self.cross_entropy)
        self.merged = tf.merge_all_summaries()


        # Optimizer.
        global_step = tf.Variable(0)
        # optimizer = tf.train.GradientDescentOptimizer(0.1)
        optimizer = tf.train.AdamOptimizer(0.01)
        gradients, v = zip(*optimizer.compute_gradients(self.cross_entropy))
        gradients, _ = tf.clip_by_global_norm(gradients, 10)
        self.optimizer= optimizer.apply_gradients(zip(gradients, v), global_step=global_step)
Beispiel #46
0
  def __call__(self,
               inputs,
               initial_state=None,
               dtype=None,
               sequence_length=None,
               scope=None):
    """Run this LSTM on inputs, starting from the given state.

    Args:
      inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]`
        or a list of `time_len` tensors of shape `[batch_size, input_size]`.
      initial_state: a tuple `(initial_cell_state, initial_output)` with tensors
        of shape `[batch_size, self._num_units]`. If this is not provided, the
        cell is expected to create a zero initial state of type `dtype`.
      dtype: The data type for the initial state and expected output. Required
        if `initial_state` is not provided or RNN state has a heterogeneous
        dtype.
      sequence_length: Specifies the length of each sequence in inputs. An
        `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0,
        time_len).`
        Defaults to `time_len` for each element.
      scope: `VariableScope` for the created subgraph; defaults to class name.

    Returns:
      A pair containing:

      - Output: A `3-D` tensor of shape `[time_len, batch_size, output_size]`
        or a list of time_len tensors of shape `[batch_size, output_size]`,
        to match the type of the `inputs`.
      - Final state: a tuple `(cell_state, output)` matching `initial_state`.

    Raises:
      ValueError: in case of shape mismatches
    """
    with vs.variable_scope(scope or type(self).__name__):
      is_list = isinstance(inputs, list)
      if is_list:
        inputs = array_ops.pack(inputs)
      inputs_shape = inputs.get_shape().with_rank(3)
      if not inputs_shape[2]:
        raise ValueError("Expecting inputs_shape[2] to be set: %s" %
                         inputs_shape)
      batch_size = inputs_shape[1].value
      if batch_size is None:
        batch_size = array_ops.shape(inputs)[1]
      time_len = inputs_shape[0].value
      if time_len is None:
        time_len = array_ops.shape(inputs)[0]

      # Provide default values for initial_state and dtype
      if initial_state is None:
        if dtype is None:
          raise ValueError(
              "Either initial_state or dtype needs to be specified")
        z = array_ops.zeros(
            array_ops.pack([batch_size, self.num_units]), dtype=dtype)
        initial_state = z, z
      else:
        if len(initial_state) != 2:
          raise ValueError(
              "Expecting initial_state to be a tuple with length 2 or None")
        if dtype is None:
          dtype = initial_state[0].dtype

      # create the actual cell
      if sequence_length is not None:
        sequence_length = ops.convert_to_tensor(sequence_length)
      initial_cell_state, initial_output = initial_state  # pylint: disable=unpacking-non-sequence
      cell_states, outputs = self._call_cell(inputs, initial_cell_state,
                                             initial_output, dtype,
                                             sequence_length)

      if sequence_length is not None:
        # Mask out the part beyond sequence_length
        mask = array_ops.transpose(
            array_ops.sequence_mask(
                sequence_length, time_len, dtype=dtype), [1, 0])
        mask = array_ops.tile(
            array_ops.expand_dims(mask, [-1]), [1, 1, self.num_units])
        outputs *= mask
        # Prepend initial states to cell_states and outputs for indexing to work
        # correctly,since we want to access the last valid state at
        # sequence_length - 1, which can even be -1, corresponding to the
        # initial state.
        mod_cell_states = array_ops.concat(
            0, [array_ops.expand_dims(initial_cell_state, [0]), cell_states])
        mod_outputs = array_ops.concat(
            0, [array_ops.expand_dims(initial_output, [0]), outputs])
        final_cell_state = self._gather_states(mod_cell_states, sequence_length,
                                               batch_size)
        final_output = self._gather_states(mod_outputs, sequence_length,
                                           batch_size)
      else:
        # No sequence_lengths used: final state is the last state
        final_cell_state = cell_states[-1]
        final_output = outputs[-1]

      if is_list:
        # Input was a list, so return a list
        outputs = array_ops.unpack(outputs)

      return outputs, (final_cell_state, final_output)