コード例 #1
0
 def testBasicLSTMCell(self):
   for dtype in [dtypes.float16, dtypes.float32]:
     np_dtype = dtype.as_numpy_dtype
     with self.test_session(graph=ops.Graph()) as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         x = array_ops.zeros([1, 2], dtype=dtype)
         m = array_ops.zeros([1, 8], dtype=dtype)
         cell = rnn_cell_impl.MultiRNNCell(
             [
                 rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False)
                 for _ in range(2)
             ],
             state_is_tuple=False)
         self.assertEqual(cell.dtype, None)
         g, out_m = cell(x, m)
         # Layer infers the input type.
         self.assertEqual(cell.dtype, dtype.name)
         expected_variable_names = [
             "root/multi_rnn_cell/cell_0/basic_lstm_cell/%s:0" %
             rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_0/basic_lstm_cell/%s:0" %
             rnn_cell_impl._BIAS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_1/basic_lstm_cell/%s:0" %
             rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_1/basic_lstm_cell/%s:0" %
             rnn_cell_impl._BIAS_VARIABLE_NAME
         ]
         self.assertEqual(expected_variable_names,
                          [v.name for v in cell.trainable_variables])
         self.assertFalse(cell.non_trainable_variables)
         sess.run([variables_lib.global_variables_initializer()])
         res = sess.run([g, out_m], {
             x.name: np.array([[1., 1.]]),
             m.name: 0.1 * np.ones([1, 8])
         })
         self.assertEqual(len(res), 2)
         variables = variables_lib.global_variables()
         self.assertEqual(expected_variable_names, [v.name for v in variables])
         # The numbers in results were not calculated, this is just a
         # smoke test.
         self.assertAllClose(res[0], np.array(
             [[0.240, 0.240]], dtype=np_dtype), 1e-2)
         expected_mem = np.array(
             [[0.689, 0.689, 0.448, 0.448, 0.398, 0.398, 0.240, 0.240]],
             dtype=np_dtype)
         self.assertAllClose(res[1], expected_mem, 1e-2)
       with variable_scope.variable_scope(
           "other", initializer=init_ops.constant_initializer(0.5)):
         # Test BasicLSTMCell with input_size != num_units.
         x = array_ops.zeros([1, 3], dtype=dtype)
         m = array_ops.zeros([1, 4], dtype=dtype)
         g, out_m = rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False)(x, m)
         sess.run([variables_lib.global_variables_initializer()])
         res = sess.run(
             [g, out_m], {
                 x.name: np.array([[1., 1., 1.]], dtype=np_dtype),
                 m.name: 0.1 * np.ones([1, 4], dtype=np_dtype)
             })
         self.assertEqual(len(res), 2)
コード例 #2
0
ファイル: topn.py プロジェクト: AliMiraftab/tensorflow
 def __init__(self, max_id, shortlist_size=100, name_prefix=''):
   """Creates a new TopN."""
   self.shortlist_size = shortlist_size
   # id_to_score contains all the scores we are tracking.
   self.id_to_score = variable_scope.get_variable(
       name=name_prefix + 'id_to_score',
       dtype=dtypes.float32,
       shape=[max_id],
       initializer=init_ops.constant_initializer(dtypes.float32.min))
   # sl_ids and sl_scores together satisfy four invariants:
   # 1) If sl_ids[i] != -1, then
   #    id_to_score[sl_ids[i]] = sl_scores[i] >= sl_scores[0]
   # 2) sl_ids[0] is the number of i > 0 for which sl_ids[i] != -1.
   # 3) If id_to_score[i] > sl_scores[0], then
   #    sl_ids[j] = i for some j.
   # 4) If sl_ids[i] == -1, then sl_scores[i] = tf.float32.min.
   self.sl_ids = variable_scope.get_variable(
       name=name_prefix + 'shortlist_ids',
       dtype=dtypes.int64,
       shape=[shortlist_size + 1],
       initializer=init_ops.constant_initializer(-1))
   # Ideally, we would set self.sl_ids[0] = 0 here.  But then it is hard
   # to pass that control dependency to the other other Ops.  Instead, we
   # have insert, remove and get_best all deal with the fact that
   # self.sl_ids[0] == -1 actually means the shortlist size is 0.
   self.sl_scores = variable_scope.get_variable(
       name=name_prefix + 'shortlist_scores',
       dtype=dtypes.float32,
       shape=[shortlist_size + 1],
       initializer=init_ops.constant_initializer(dtypes.float32.min))
   # TopN keeps track of its internal data dependencies, so the user
   # doesn't have to.
   self.last_ops = []
コード例 #3
0
  def testTraining(self):
    """Tests a gradient descent step for a simple model."""
    with self.test_session() as session:
      with self.test_scope():
        with variable_scope.variable_scope("ascope", use_resource=True):
          w = variable_scope.get_variable(
              "w",
              shape=[4, 2],
              dtype=dtypes.float32,
              initializer=init_ops.constant_initializer(
                  np.array([[1, 2], [3, 4], [5, 6], [7, 8]], dtype=np.float32)))
          b = variable_scope.get_variable(
              "b",
              shape=[2],
              dtype=dtypes.float32,
              initializer=init_ops.constant_initializer(
                  np.array([2, 3], dtype=np.float32)))

          x = array_ops.placeholder(dtypes.float32, shape=[1, 4])
          y = math_ops.matmul(x, w) + b
          loss = math_ops.reduce_sum(y)
          optimizer = GradientDescentOptimizer(0.1)
          train = optimizer.minimize(loss)

      session.run(variables.global_variables_initializer())
      session.run(train, {x: np.array([[7, 3, 5, 9]], dtype=np.float32)})
      vw, vb = session.run([w, b])
      self.assertAllClose(
          np.array(
              [[0.3, 1.3], [2.7, 3.7], [4.5, 5.5], [6.1, 7.1]],
              dtype=np.float32),
          vw,
          rtol=1e-4)
      self.assertAllClose(np.array([1.9, 2.9], dtype=np.float32), vb, rtol=1e-4)
コード例 #4
0
ファイル: rnn.py プロジェクト: imito/odin
def _init_input2hidden(ops, rnn_mode, input_mode, W_init, input_dims, hidden_dims):
  # N represent the number of gates
  if 'rnn' in rnn_mode:
    N = 1
    msg = '(W_hid)'
  elif rnn_mode == 'gru':
    N = 3
    msg = '(W_input_to_updategate, W_input_to_resetgate, W_input_to_hiddenupdate)'
  elif rnn_mode == 'lstm':
    N = 4
    msg = '(W_input_to_inputgate, W_input_to_forgetgate, W_input_to_hidden, W_input_to_outputgate)'
  # ====== check input ====== #
  if input_mode != 'skip':
    ops.get_variable_nnop(initializer=W_init, shape=(input_dims, hidden_dims * N),
                     name='W_in', roles=Weight)
    if input_mode == 'norm':
      ops.get_variable_nnop(initializer=init_ops.constant_initializer(0.), shape=(hidden_dims * N,),
                            name='beta', roles=BatchNormShiftParameter)
      ops.get_variable_nnop(initializer=init_ops.constant_initializer(1.), shape=(hidden_dims * N,),
                            name='gamma', roles=BatchNormScaleParameter)
      ops.get_variable_nnop(initializer=init_ops.constant_initializer(0.), shape=(hidden_dims * N,),
                            name='mean', roles=BatchNormPopulationMean)
      ops.get_variable_nnop(initializer=init_ops.constant_initializer(1.), shape=(hidden_dims * N,),
                            name='inv_std', roles=BatchNormPopulationInvStd)
  # skip input mode
  elif input_dims != hidden_dims and \
  input_dims != hidden_dims * N: # 3 gates + 1 hid_update
    raise Exception('Skip input mode, input trailing_dimension=%d '
                    '(the final dim) must equal to the number of hidden '
                    'units (tied input connection), or %d-th the number '
                    'of hidden units = %d, which include: ' + msg %
                    (input_dims, N, hidden_dims * N))
コード例 #5
0
  def test_works_correctly_side_vars(self):
    with self.test_session() as sess:
      x_ = np.float32(2.1)  # Adding extra tenth to force imprecision.
      y_ = np.float32(3.1)
      x = variable_scope.get_variable(
          name="x",
          shape=[],
          dtype=dtypes.float32,
          initializer=init_ops.constant_initializer(x_))
      y = variable_scope.get_variable(
          name="y",
          shape=[],
          dtype=dtypes.float32,
          initializer=init_ops.constant_initializer(y_))
      sess.run([variables.global_variables_initializer()])

      f = lambda x: x * y
      g = lambda z: math_ops.square(x) * y

      fx = cg.custom_gradient(f(x), g(x), x)
      gx = gradients_impl.gradients(fx, variables.trainable_variables())
      [x_, fx_, gx_] = sess.run([x, fx, gx[0]])
      gy_ = gx[1]

      self.assertEqual(x_ * y_, fx_)
      self.assertEqual(np.square(x_) * y_, gx_)
      self.assertEqual(None, gy_)
コード例 #6
0
  def _TestOptimizerSupportHelper(self, opt):
    num_layers = 4
    num_units = 2
    batch_size = 8
    direction = CUDNN_RNN_UNIDIRECTION
    dir_count = 1

    with ops.Graph().as_default() as g:
      kernel_initializer = init_ops.constant_initializer(0.)
      bias_initializer = init_ops.constant_initializer(0.)
      inputs = random_ops.random_uniform([
          num_layers * dir_count, batch_size, num_units], dtype=dtypes.float32)

      lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units,
                                 direction=direction,
                                 kernel_initializer=kernel_initializer,
                                 bias_initializer=bias_initializer,
                                 name="awesome_lstm")
      outputs, _ = lstm(inputs)
      loss = math_ops.reduce_sum(outputs)
      optimizer = self._GetOptimizer(opt)
      train_op = optimizer.minimize(loss)

    with self.test_session(use_gpu=True, graph=g) as sess:
      sess.run(variables.global_variables_initializer())
      sess.run(train_op)
コード例 #7
0
 def testGRUCell(self):
   with self.test_session() as sess:
     with variable_scope.variable_scope(
         "root", initializer=init_ops.constant_initializer(0.5)):
       x = array_ops.zeros([1, 2])
       m = array_ops.zeros([1, 2])
       g, _ = rnn_cell_impl.GRUCell(2)(x, m)
       sess.run([variables_lib.global_variables_initializer()])
       res = sess.run(
           [g], {x.name: np.array([[1., 1.]]),
                 m.name: np.array([[0.1, 0.1]])})
       # Smoke test
       self.assertAllClose(res[0], [[0.175991, 0.175991]])
     with variable_scope.variable_scope(
         "other", initializer=init_ops.constant_initializer(0.5)):
       x = array_ops.zeros(
           [1, 3])  # Test GRUCell with input_size != num_units.
       m = array_ops.zeros([1, 2])
       g, _ = rnn_cell_impl.GRUCell(2)(x, m)
       sess.run([variables_lib.global_variables_initializer()])
       res = sess.run(
           [g],
           {x.name: np.array([[1., 1., 1.]]),
            m.name: np.array([[0.1, 0.1]])})
       # Smoke test
       self.assertAllClose(res[0], [[0.156736, 0.156736]])
コード例 #8
0
 def testInvalidGlobalStep(self):
   with ops.Graph().as_default() as g, self.test_session(graph=g):
     x = array_ops.placeholder(dtypes.float32, [])
     var = variable_scope.get_variable(
         "test", [], initializer=init_ops.constant_initializer(10))
     loss = math_ops.abs(var * x)
     with self.assertRaises(AttributeError):
       optimizers_lib.optimize_loss(
           loss,
           global_step=constant_op.constant(
               43, dtype=dtypes.int64),
           learning_rate=0.1,
           optimizer="SGD")
     with self.assertRaises(TypeError):
       optimizers_lib.optimize_loss(
           loss,
           global_step=variable_scope.get_variable(
               "global_step", [],
               trainable=False,
               dtype=dtypes.float64,
               initializer=init_ops.constant_initializer(
                   0.0, dtype=dtypes.float64)),
           learning_rate=0.1,
           optimizer="SGD")
     with self.assertRaises(ValueError):
       optimizers_lib.optimize_loss(
           loss,
           global_step=variable_scope.get_variable(
               "global_step", [1],
               trainable=False,
               dtype=dtypes.int64,
               initializer=init_ops.constant_initializer(
                   [0], dtype=dtypes.int64)),
           learning_rate=0.1,
           optimizer="SGD")
コード例 #9
0
  def test_works_correctly_vector_of_vars(self):
    with self.test_session() as sess:
      x = variable_scope.get_variable(
          name="x",
          shape=[],
          dtype=dtypes.float32,
          initializer=init_ops.constant_initializer(2))
      y = variable_scope.get_variable(
          name="y",
          shape=[],
          dtype=dtypes.float32,
          initializer=init_ops.constant_initializer(3))
      sess.run([variables.global_variables_initializer()])

      f = lambda z: z[0] * z[1]
      g = lambda z: z[0]**2 * z[1]**2 / 2

      z = array_ops.stack([x, y])
      fz = cg.custom_gradient(f(z), g(z), z, axis=0)
      gz = gradients_impl.gradients(fz, variables.trainable_variables())
      [z_, fz_, gx_, gy_] = sess.run([z, fz, gz[0], gz[1]])

      self.assertEqual(f(z_), fz_)
      self.assertEqual(g(z_), gx_)
      self.assertEqual(g(z_), gy_)
コード例 #10
0
  def test_works_correctly_fx_gx_manually_stopped(self):
    with self.test_session() as sess:
      x_ = np.float32(2.1)  # Adding extra tenth to force imprecision.
      y_ = np.float32(3.1)
      x = variable_scope.get_variable(
          name="x",
          shape=[],
          dtype=dtypes.float32,
          initializer=init_ops.constant_initializer(x_))
      y = variable_scope.get_variable(
          name="y",
          shape=[],
          dtype=dtypes.float32,
          initializer=init_ops.constant_initializer(y_))
      sess.run([variables.global_variables_initializer()])

      stop = array_ops.stop_gradient  # For readability.

      # Basically we need to stop the `x` portion of `f`. And when we supply the
      # arg to `custom_gradient` we need to stop the complement, i.e., the `y`
      # part.
      f = lambda x: stop(x) * y
      g = lambda x: stop(math_ops.square(x)) * y
      fx = cg.custom_gradient(f(x), g(x), x + stop(y),
                              fx_gx_manually_stopped=True)

      gx = gradients_impl.gradients(fx, variables.trainable_variables())
      [x_, fx_, gx_, gy_] = sess.run([x, fx, gx[0], gx[1]])

      self.assertEqual(x_ * y_, fx_)
      self.assertEqual(np.square(x_) * y_, gx_)
      self.assertEqual(x_, gy_)
コード例 #11
0
ファイル: gru_ops.py プロジェクト: wchan/tensorflow
def gru(cell_size, sequence_len, xs, name=None, scope=None):
  r"""gru

  args:
    sequence_len: a `tensor` of type `int64`.
    cell_size: an `int`.
    xs: a list of at least 1 `tensor` objects of type `float32`.
    name: a name for the operation (optional).

  returns:
    a tuple of `tensor` objects (rs, zs, rhs, gs, hs).
    rs: a list with the same number of `tensor` objects as `xs` of `tensor` objects of type `float32`.
    zs: a list with the same number of `tensor` objects as `xs` of `tensor` objects of type `float32`.
    rhs: a list with the same number of `tensor` objects as `xs` of `tensor` objects of type `float32`.
    gs: a list with the same number of `tensor` objects as `xs` of `tensor` objects of type `float32`.
    hs: a list with the same number of `tensor` objects as `xs` of `tensor` objects of type `float32`.
  """
  with vs.variable_scope(scope or "Gru"):
    input_size = xs[0].get_shape()[1].value

    wxr = vs.get_variable("wxr", [input_size, cell_size])
    whr = vs.get_variable("whr", [cell_size, cell_size])
    wxz = vs.get_variable("wxz", [input_size, cell_size])
    whz = vs.get_variable("whz", [cell_size, cell_size])
    wxh = vs.get_variable("wxh", [input_size, cell_size])
    whh = vs.get_variable("whh", [cell_size, cell_size])

    br = vs.get_variable("br", [cell_size], initializer=init_ops.constant_initializer(1.0))
    bz = vs.get_variable("bz", [cell_size], initializer=init_ops.constant_initializer(1.0))
    bh = vs.get_variable("bh", [cell_size], initializer=init_ops.constant_initializer(0.0))

    return gen_gru_ops._gru(cell_size=cell_size, sequence_len=sequence_len,
        wxr=wxr, whr=whr, wxz=wxz, whz=whz, wxh=wxh, whh=whh, br=br, bz=bz,
        bh=bh, xs=xs, name=name)
コード例 #12
0
ファイル: gru_ops.py プロジェクト: wchan/tensorflow
def gru_cell(cell_size, sequence_len, h_prev, x, name=None, scope=None, time_idx=None):
  r"""GRU Cell

  Args:
    sequence_len: A `Tensor` of type `int64`.
    h_prev: A `Tensor` of type `float32`.
    x: A `Tensor` of type `float32`.
    cell_size: An `int`.
    name: A name for the operation (optional).

  Returns:
    A tuple of `Tensor` objects (r, z, rh, g, h).
    r: A `Tensor` of type `float32`.
    z: A `Tensor` of type `float32`.
    rh: A `Tensor` of type `float32`.
    g: A `Tensor` of type `float32`.
    h: A `Tensor` of type `float32`.
  """
  with vs.variable_scope(scope or "GruCell"):
    input_size = x.get_shape()[1].value

    wxr = vs.get_variable("wxr", [input_size, cell_size])
    whr = vs.get_variable("whr", [cell_size, cell_size])
    wxz = vs.get_variable("wxz", [input_size, cell_size])
    whz = vs.get_variable("whz", [cell_size, cell_size])
    wxh = vs.get_variable("wxh", [input_size, cell_size])
    whh = vs.get_variable("whh", [cell_size, cell_size])

    br = vs.get_variable("br", [cell_size], initializer=init_ops.constant_initializer(1.0))
    bz = vs.get_variable("bz", [cell_size], initializer=init_ops.constant_initializer(1.0))
    bh = vs.get_variable("bh", [cell_size], initializer=init_ops.constant_initializer(0.0))

    return gen_gru_ops._gru_cell(cell_size=cell_size, sequence_len=sequence_len,
        wxr=wxr, whr=whr, wxz=wxz, whz=whz, wxh=wxh, whh=whh, br=br, bz=bz,
        bh=bh, h_prev=h_prev, x=x, name=name, time_idx=time_idx)
コード例 #13
0
 def testIndyGRUCell(self):
   with self.test_session() as sess:
     with variable_scope.variable_scope(
         "root", initializer=init_ops.constant_initializer(0.5)):
       x = array_ops.zeros([1, 2])
       m = array_ops.zeros([1, 2])
       g, _ = contrib_rnn_cell.IndyGRUCell(2)(x, m)
       sess.run([variables_lib.global_variables_initializer()])
       res = sess.run([g], {
           x.name: np.array([[1., 1.]]),
           m.name: np.array([[0.1, 0.1]])
       })
       # Smoke test
       self.assertAllClose(res[0], [[0.185265, 0.17704]])
     with variable_scope.variable_scope(
         "other", initializer=init_ops.constant_initializer(0.5)):
       # Test IndyGRUCell with input_size != num_units.
       x = array_ops.zeros([1, 3])
       m = array_ops.zeros([1, 2])
       g, _ = contrib_rnn_cell.IndyGRUCell(2)(x, m)
       sess.run([variables_lib.global_variables_initializer()])
       res = sess.run([g], {
           x.name: np.array([[1., 1., 1.]]),
           m.name: np.array([[0.1, 0.1]])
       })
       # Smoke test
       self.assertAllClose(res[0], [[0.155127, 0.157328]])
コード例 #14
0
ファイル: gru_ops.py プロジェクト: Crazyonxh/tensorflow
  def __call__(self, x, h_prev, scope=None):
    """GRU cell."""
    with vs.variable_scope(scope or type(self).__name__):
      input_size = x.get_shape().with_rank(2)[1]

      # Check if the input size exist.
      if input_size is None:
        raise ValueError("Expecting input_size to be set.")

      # Check cell_size == state_size from h_prev.
      cell_size = h_prev.get_shape().with_rank(2)[1]
      if cell_size != self._cell_size:
        raise ValueError("Shape of h_prev[1] incorrect: cell_size %i vs %s" %
                         (self._cell_size, cell_size))

      if cell_size is None:
        raise ValueError("cell_size from `h_prev` should not be None.")

      w_ru = vs.get_variable("w_ru", [input_size + self._cell_size,
                                      self._cell_size * 2])
      b_ru = vs.get_variable(
          "b_ru", [self._cell_size * 2],
          initializer=init_ops.constant_initializer(1.0))
      w_c = vs.get_variable("w_c",
                            [input_size + self._cell_size, self._cell_size])
      b_c = vs.get_variable(
          "b_c", [self._cell_size],
          initializer=init_ops.constant_initializer(0.0))

      _gru_block_cell = gen_gru_ops.gru_block_cell  # pylint: disable=invalid-name
      _, _, _, new_h = _gru_block_cell(
          x=x, h_prev=h_prev, w_ru=w_ru, w_c=w_c, b_ru=b_ru, b_c=b_c)

      return new_h, new_h
コード例 #15
0
  def testRenorm(self):
    shape = (4, 3)
    xt = array_ops.placeholder(dtypes.float32, shape)
    momentum = 0.99
    renorm_momentum = 0.8
    rmax = 1.1
    rmin = 0.9
    dmax = 0.1
    gamma = 2.
    beta = 3.
    epsilon = 0.001
    bn = normalization_layers.BatchNormalization(
        axis=1,
        gamma_initializer=init_ops.constant_initializer(gamma),
        beta_initializer=init_ops.constant_initializer(beta),
        epsilon=epsilon,
        momentum=momentum,
        renorm=True,
        renorm_clipping={'rmax': rmax, 'rmin': rmin, 'dmax': dmax},
        renorm_momentum=renorm_momentum)
    training = array_ops.placeholder(dtypes.bool)
    yt = bn.apply(xt, training=training)

    moving_mean = 0.
    moving_variance = 1.
    renorm_mean = renorm_stddev = 0.
    renorm_weight = 0.
    with self.test_session(use_gpu=True) as sess:
      sess.run(variables.global_variables_initializer())
      for _ in range(5):
        x = np.random.random(shape)

        mean = x.mean(0)
        stddev = np.sqrt(x.var(0) + epsilon)
        adj_mean = renorm_mean + (1. - renorm_weight) * mean
        adj_stddev = renorm_stddev + (1. - renorm_weight) * stddev
        r = (stddev / adj_stddev).clip(rmin, rmax)
        d = ((mean - adj_mean) / adj_stddev).clip(-dmax, dmax)
        y_train = ((x - mean) / stddev * r + d) * gamma + beta
        renorm_mean += (mean - renorm_mean) * (1. - renorm_momentum)
        renorm_stddev += (stddev - renorm_stddev) * (1. - renorm_momentum)
        renorm_weight += (1. - renorm_weight) * (1. - renorm_momentum)
        moving_mean += (renorm_mean / renorm_weight -
                        moving_mean) * (1. - momentum)
        moving_variance += ((renorm_stddev / renorm_weight) ** 2 - epsilon -
                            moving_variance) * (1. - momentum)

        y_test = ((x - moving_mean) / (moving_variance + epsilon) ** 0.5 *
                  gamma) + beta

        yt_val_train, _, _ = sess.run([yt] + bn.updates,
                                      feed_dict={xt: x, training: True})
        yt_val_test, _, _ = sess.run([yt] + bn.updates,
                                     feed_dict={xt: x, training: False})

        self.assertAllClose(y_train, yt_val_train, atol=1e-5)
        self.assertAllClose(y_test, yt_val_test, atol=1e-5)
コード例 #16
0
ファイル: init_ops_test.py プロジェクト: HughKu/tensorflow
 def testInvalidValueTypeForConstantInitializerCausesTypeError(self):
   c = constant_op.constant([1.0, 2.0, 3.0])
   with self.assertRaisesRegexp(
       TypeError, r"Invalid type for initial value: .*Tensor.*"):
     init_ops.constant_initializer(c, dtype=dtypes.float32)
   v = variables.Variable([3.0, 2.0, 1.0])
   with self.assertRaisesRegexp(
       TypeError, r"Invalid type for initial value: .*Variable.*"):
     init_ops.constant_initializer(v, dtype=dtypes.float32)
コード例 #17
0
ファイル: rnn_cell.py プロジェクト: KalraA/tensorflow
 def _norm(self, inp, scope):
   with vs.variable_scope(scope) as scope:
     shape = inp.get_shape()[-1:]
     gamma_init = init_ops.constant_initializer(self._g)
     beta_init = init_ops.constant_initializer(self._b)
     gamma = vs.get_variable("gamma", shape=shape, initializer=gamma_init)  # pylint: disable=unused-variable
     beta = vs.get_variable("beta", shape=shape, initializer=beta_init)  # pylint: disable=unused-variable
     normalized = layers.layer_norm(inp, reuse=True, scope=scope)
     return normalized
コード例 #18
0
ファイル: models.py プロジェクト: codealphago/ML-KWS-for-MCU
 def _norm(self, inp, scope):
   shape = inp.get_shape()[-1:]
   gamma_init = init_ops.constant_initializer(self._g)
   beta_init = init_ops.constant_initializer(self._b)
   with vs.variable_scope(scope):
     # Initialize beta and gamma for use by layer_norm.
     vs.get_variable("gamma", shape=shape, initializer=gamma_init)
     vs.get_variable("beta", shape=shape, initializer=beta_init)
   normalized = layers.layer_norm(inp, reuse=True, scope=scope)
   return normalized
コード例 #19
0
 def testBasicLSTMCell(self):
   with self.test_session() as sess:
     with variable_scope.variable_scope(
         "root", initializer=init_ops.constant_initializer(0.5)):
       x = array_ops.zeros([1, 2])
       m = array_ops.zeros([1, 8])
       cell = rnn_cell_impl.MultiRNNCell(
           [
               rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False)
               for _ in range(2)
           ],
           state_is_tuple=False)
       g, out_m = cell(x, m)
       expected_variable_names = [
           "root/multi_rnn_cell/cell_0/basic_lstm_cell/%s:0" %
           rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
           "root/multi_rnn_cell/cell_0/basic_lstm_cell/%s:0" %
           rnn_cell_impl._BIAS_VARIABLE_NAME,
           "root/multi_rnn_cell/cell_1/basic_lstm_cell/%s:0" %
           rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
           "root/multi_rnn_cell/cell_1/basic_lstm_cell/%s:0" %
           rnn_cell_impl._BIAS_VARIABLE_NAME
       ]
       self.assertEqual(
           expected_variable_names, [v.name for v in cell.trainable_variables])
       self.assertFalse(cell.non_trainable_variables)
       sess.run([variables_lib.global_variables_initializer()])
       res = sess.run(
           [g, out_m],
           {x.name: np.array([[1., 1.]]),
            m.name: 0.1 * np.ones([1, 8])})
       self.assertEqual(len(res), 2)
       variables = variables_lib.global_variables()
       self.assertEqual(expected_variable_names, [v.name for v in variables])
       # The numbers in results were not calculated, this is just a smoke test.
       self.assertAllClose(res[0], [[0.24024698, 0.24024698]])
       expected_mem = np.array([[
           0.68967271, 0.68967271, 0.44848421, 0.44848421, 0.39897051,
           0.39897051, 0.24024698, 0.24024698
       ]])
       self.assertAllClose(res[1], expected_mem)
     with variable_scope.variable_scope(
         "other", initializer=init_ops.constant_initializer(0.5)):
       x = array_ops.zeros(
           [1, 3])  # Test BasicLSTMCell with input_size != num_units.
       m = array_ops.zeros([1, 4])
       g, out_m = rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False)(x, m)
       sess.run([variables_lib.global_variables_initializer()])
       res = sess.run(
           [g, out_m],
           {x.name: np.array([[1., 1., 1.]]),
            m.name: 0.1 * np.ones([1, 4])})
       self.assertEqual(len(res), 2)
コード例 #20
0
def _setup_model():
  x = array_ops.placeholder(dtypes.float32, [])
  var = variable_scope.get_variable(
      "test", [], initializer=init_ops.constant_initializer(10))
  loss = math_ops.abs(var * x)
  global_step = variable_scope.get_variable(
      "global_step", [],
      trainable=False,
      dtype=dtypes.int64,
      initializer=init_ops.constant_initializer(
          0, dtype=dtypes.int64))
  return x, var, loss, global_step
コード例 #21
0
  def testLayerBasic(self):
    num_layers = 4
    num_units = 2
    batch_size = 8
    direction = CUDNN_RNN_UNIDIRECTION
    dir_count = 1

    with vs.variable_scope("main"):
      kernel_initializer = init_ops.constant_initializer(0.)
      bias_initializer = init_ops.constant_initializer(0.)
      inputs = random_ops.random_uniform([
          num_layers * dir_count, batch_size, num_units], dtype=dtypes.float32)

      lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units,
                                 direction=direction,
                                 kernel_initializer=kernel_initializer,
                                 bias_initializer=bias_initializer,
                                 name="awesome_lstm")

      # Build the layer
      outputs1, _ = lstm(inputs)
      # Reuse the layer
      outputs2, _ = lstm(inputs)

      total_sum1 = math_ops.reduce_sum(outputs1)
      total_sum2 = math_ops.reduce_sum(outputs2)

    with vs.variable_scope("main", reuse=True):
      lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units,
                                 direction=direction,
                                 kernel_initializer=kernel_initializer,
                                 bias_initializer=bias_initializer,
                                 name="awesome_lstm")

      # Reuse the layer
      outputs3, _ = lstm(inputs)
      total_sum3 = math_ops.reduce_sum(outputs3)

    self.assertEqual(1, len(variables.trainable_variables()))
    self.assertEqual(1, len(ops.get_collection(ops.GraphKeys.SAVEABLE_OBJECTS)))
    self.assertEqual("main/awesome_lstm/opaque_kernel",
                     variables.trainable_variables()[0].op.name)

    with self.test_session(use_gpu=True) as sess:
      sess.run(variables.global_variables_initializer())
      (total_sum1_v, total_sum2_v, total_sum3_v) = sess.run(
          [total_sum1, total_sum2, total_sum3])
      self.assertEqual(0, total_sum1_v)
      self.assertEqual(0, total_sum2_v)
      self.assertEqual(0, total_sum3_v)
コード例 #22
0
  def testGhostBN4DimsAxis1(self):
    shape = [6, 3, 10, 10]
    num_virtual_batches = 3
    beta = 2.
    gamma = 3.
    momentum = 0.8
    epsilon = 1e-3
    moving_means = np.zeros([1, 3, 3, 1, 1], dtype=np.float32)
    moving_vars = np.ones([1, 3, 3, 1, 1], dtype=np.float32)

    inp = array_ops.placeholder(dtypes.float32, shape)
    is_training = array_ops.placeholder(dtypes.bool)
    bn = normalization_layers.BatchNormalization(
        axis=1,
        momentum=momentum,
        epsilon=epsilon,
        beta_initializer=init_ops.constant_initializer(beta),
        gamma_initializer=init_ops.constant_initializer(gamma),
        num_virtual_batches=num_virtual_batches,
        fused=False)      # NCHW is unsupported by CPU fused batch norm
    out = bn.apply(inp, training=is_training)
    ghost_shape = ([shape[0] // num_virtual_batches, num_virtual_batches] +
                   shape[1:])

    with self.test_session(use_gpu=True) as sess:
      sess.run(variables.global_variables_initializer())
      for _ in range(5):
        x = np.random.random(shape)

        sub_batched = np.reshape(x, ghost_shape)
        means = np.mean(sub_batched, axis=(0, 3, 4), keepdims=True)
        variances = np.var(sub_batched, axis=(0, 3, 4), keepdims=True)
        moving_means = moving_means * momentum + means * (1. - momentum)
        moving_vars = moving_vars * momentum + variances * (1. - momentum)

        y_train = ((sub_batched - means) /
                   (variances + epsilon) ** 0.5 * gamma) + beta
        y_test = ((sub_batched - moving_means) /
                  (moving_vars + epsilon) ** 0.5 * gamma) + beta

        y_train = np.reshape(y_train, shape)
        y_test = np.reshape(y_test, shape)

        y_val_train, _, _ = sess.run([out] + bn.updates,
                                     feed_dict={inp: x, is_training: True})
        y_val_test = sess.run(out, feed_dict={inp: x, is_training: False})

        self.assertAllClose(y_train, y_val_train, atol=1e-2)
        self.assertAllClose(y_test, y_val_test, atol=1e-2)
コード例 #23
0
ファイル: tf_lstm.py プロジェクト: ScartleRoy/TF_LSTM_seq_bn
def batch_norm(x, deterministic, alpha=0.9, shift=True, scope='bn'):
    with vs.variable_scope(scope):
        dtype = x.dtype
        input_shape = x.get_shape().as_list()
        feat_dim = input_shape[-1]
        axes = range(len(input_shape)-1)
        
        if shift:
            beta = vs.get_variable(
                    scope+"_beta", shape=[feat_dim],
                    initializer=init_ops.zeros_initializer, dtype=dtype)
        else:
            beta = vs.get_variable(
                scope+"_beta", shape=[feat_dim],
                initializer=init_ops.zeros_initializer, 
                dtype=dtype, trainable=False)
        
        gamma = vs.get_variable(
                    scope+"_gamma", shape=[feat_dim],
                    initializer=init_ops.constant_initializer(0.1), dtype=dtype)
        
        mean = vs.get_variable(scope+"_mean", shape=[feat_dim],
                                       initializer=init_ops.zeros_initializer,
                                       dtype=dtype, trainable=False)
        
        var = vs.get_variable(scope+"_var", shape=[feat_dim],
                                          initializer=init_ops.ones_initializer,
                                          dtype=dtype, trainable=False)
        
        counter = vs.get_variable(scope+"_counter", shape=[],
                                          initializer=init_ops.constant_initializer(0),
                                          dtype=tf.int64, trainable=False)
        
        zero_cnt = vs.get_variable(scope+"_zero_cnt", shape=[],
                                          initializer=init_ops.constant_initializer(0),
                                          dtype=tf.int64, trainable=False)
        
        batch_mean, batch_var = moments(x, axes, name=scope+'_moments')
        
        mean, var = cond(math_ops.equal(counter, zero_cnt), lambda: (batch_mean, batch_var), 
                         lambda: (mean, var))
        
         
        mean, var, counter = cond(deterministic, lambda: (mean, var, counter), 
                                 lambda: ((1-alpha) * batch_mean + alpha * mean, 
                                         (1-alpha) * batch_var + alpha * var, 
                                         counter + 1))
        normed = batch_normalization(x, mean, var, beta, gamma, 1e-8)
    return normed
コード例 #24
0
  def testMultiRNNCellWithStateTuple(self):
    with self.test_session() as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        x = array_ops.zeros([1, 2])
        m_bad = array_ops.zeros([1, 4])
        m_good = (array_ops.zeros([1, 2]), array_ops.zeros([1, 2]))

        # Test incorrectness of state
        with self.assertRaisesRegexp(ValueError, "Expected state .* a tuple"):
          core_rnn_cell_impl.MultiRNNCell(
              [core_rnn_cell_impl.GRUCell(2) for _ in range(2)],
              state_is_tuple=True)(x, m_bad)

        _, ml = core_rnn_cell_impl.MultiRNNCell(
            [core_rnn_cell_impl.GRUCell(2) for _ in range(2)],
            state_is_tuple=True)(x, m_good)

        sess.run([variables.global_variables_initializer()])
        res = sess.run(ml, {
            x.name: np.array([[1., 1.]]),
            m_good[0].name: np.array([[0.1, 0.1]]),
            m_good[1].name: np.array([[0.1, 0.1]])
        })

        # The numbers in results were not calculated, this is just a
        # smoke test.  However, these numbers should match those of
        # the test testMultiRNNCell.
        self.assertAllClose(res[0], [[0.175991, 0.175991]])
        self.assertAllClose(res[1], [[0.13248, 0.13248]])
コード例 #25
0
 def _testDropoutWrapper(self, batch_size=None, time_steps=None,
                         parallel_iterations=None, **kwargs):
   with self.test_session() as sess:
     with variable_scope.variable_scope(
         "root", initializer=init_ops.constant_initializer(0.5)):
       if batch_size is None and time_steps is None:
         # 2 time steps, batch size 1, depth 3
         batch_size = 1
         time_steps = 2
         x = constant_op.constant(
             [[[2., 2., 2.]], [[1., 1., 1.]]], dtype=dtypes.float32)
         m = rnn_cell_impl.LSTMStateTuple(
             *[constant_op.constant([[0.1, 0.1, 0.1]], dtype=dtypes.float32)
              ] * 2)
       else:
         x = constant_op.constant(
             np.random.randn(time_steps, batch_size, 3).astype(np.float32))
         m = rnn_cell_impl.LSTMStateTuple(*[
             constant_op.constant(
                 [[0.1, 0.1, 0.1]] * batch_size, dtype=dtypes.float32)
         ] * 2)
       outputs, final_state = rnn.dynamic_rnn(
           cell=rnn_cell_impl.DropoutWrapper(
               rnn_cell_impl.LSTMCell(3), dtype=x.dtype, **kwargs),
           time_major=True,
           parallel_iterations=parallel_iterations,
           inputs=x,
           initial_state=m)
       sess.run([variables_lib.global_variables_initializer()])
       res = sess.run([outputs, final_state])
       self.assertEqual(res[0].shape, (time_steps, batch_size, 3))
       self.assertEqual(res[1].c.shape, (batch_size, 3))
       self.assertEqual(res[1].h.shape, (batch_size, 3))
       return res
コード例 #26
0
 def testCoupledInputForgetGateLSTMCell(self):
   with self.test_session() as sess:
     num_units = 2
     state_size = num_units * 2
     batch_size = 3
     input_size = 4
     expected_output = np.array(
         [[0.121753, 0.121753],
          [0.103349, 0.103349],
          [0.100178, 0.100178]],
         dtype=np.float32)
     expected_state = np.array(
         [[0.137523, 0.137523, 0.121753, 0.121753],
          [0.105450, 0.105450, 0.103349, 0.103349],
          [0.100742, 0.100742, 0.100178, 0.100178]],
         dtype=np.float32)
     with variable_scope.variable_scope(
         "root", initializer=init_ops.constant_initializer(0.5)):
       x = array_ops.zeros([batch_size, input_size])
       m = array_ops.zeros([batch_size, state_size])
       output, state = rnn_cell.CoupledInputForgetGateLSTMCell(
           num_units=num_units, forget_bias=1.0)(x, m)
       sess.run([variables.global_variables_initializer()])
       res = sess.run([output, state], {
           x.name:
               np.array([[1., 1., 1., 1.],
                         [2., 2., 2., 2.],
                         [3., 3., 3., 3.]]),
           m.name:
               0.1 * np.ones((batch_size, state_size))
       })
       # This is a smoke test: Only making sure expected values didn't change.
       self.assertEqual(len(res), 2)
       self.assertAllClose(res[0], expected_output)
       self.assertAllClose(res[1], expected_state)
コード例 #27
0
ファイル: cudnn_rnn_ops.py プロジェクト: Dr4KK/tensorflow
 def call(self, inputs, state):
   """Gated recurrent unit (GRU) with nunits cells."""
   with vs.variable_scope("gates"):  # Reset gate and update gate.
     # We start with bias of 1.0 to not reset and not update.
     bias_ones = self._bias_initializer
     if self._bias_initializer is None:
       dtype = inputs.dtype
       bias_ones = init_ops.constant_initializer(1.0, dtype=dtype)
     # pylint: disable=protected-access
     value = math_ops.sigmoid(
         rnn_cell_impl._linear([inputs, state], 2 * self._num_units, True,
                               bias_ones, self._kernel_initializer))
     r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
     # pylint: enable=protected-access
   with vs.variable_scope("candidate"):
     # pylint: disable=protected-access
     with vs.variable_scope("input_projection"):
       hi = rnn_cell_impl._linear(inputs, self._num_units, True,
                                  self._bias_initializer,
                                  self._kernel_initializer)
     with vs.variable_scope("hidden_projection"):
       hh = r * (rnn_cell_impl._linear(state, self._num_units, True,
                                       self._bias_initializer,
                                       self._kernel_initializer))
     # pylint: enable=protected-access
     c = self._activation(hi + hh)
   new_h = u * state + (1 - u) * c
   return new_h, new_h
コード例 #28
0
 def testLSTMCell(self):
   with self.test_session() as sess:
     num_units = 8
     num_proj = 6
     state_size = num_units + num_proj
     batch_size = 3
     input_size = 2
     with variable_scope.variable_scope(
         "root", initializer=init_ops.constant_initializer(0.5)):
       x = array_ops.zeros([batch_size, input_size])
       m = array_ops.zeros([batch_size, state_size])
       cell = rnn_cell_impl.LSTMCell(
           num_units=num_units,
           num_proj=num_proj,
           forget_bias=1.0,
           state_is_tuple=False)
       output, state = cell(x, m)
       sess.run([variables_lib.global_variables_initializer()])
       res = sess.run([output, state], {
           x.name: np.array([[1., 1.], [2., 2.], [3., 3.]]),
           m.name: 0.1 * np.ones((batch_size, state_size))
       })
       self.assertEqual(len(res), 2)
       # The numbers in results were not calculated, this is mostly just a
       # smoke test.
       self.assertEqual(res[0].shape, (batch_size, num_proj))
       self.assertEqual(res[1].shape, (batch_size, state_size))
       # Different inputs so different outputs and states
       for i in range(1, batch_size):
         self.assertTrue(
             float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) > 1e-6)
         self.assertTrue(
             float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) > 1e-6)
コード例 #29
0
  def testDeviceWrapperDynamicExecutionNodesAreAllProperlyLocated(self):
    if not test.is_gpu_available():
      # Can't perform this test w/o a GPU
      return

    with self.test_session(use_gpu=True) as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        x = array_ops.zeros([1, 1, 3])
        cell = rnn_cell_impl.DeviceWrapper(rnn_cell_impl.GRUCell(3), "/gpu:0")
        with ops.device("/cpu:0"):
          outputs, _ = rnn.dynamic_rnn(
              cell=cell, inputs=x, dtype=dtypes.float32)
        run_metadata = config_pb2.RunMetadata()
        opts = config_pb2.RunOptions(
            trace_level=config_pb2.RunOptions.FULL_TRACE)

        sess.run([variables_lib.global_variables_initializer()])
        _ = sess.run(outputs, options=opts, run_metadata=run_metadata)

      step_stats = run_metadata.step_stats
      ix = 0 if "gpu" in step_stats.dev_stats[0].device else 1
      gpu_stats = step_stats.dev_stats[ix].node_stats
      cpu_stats = step_stats.dev_stats[1 - ix].node_stats
      self.assertFalse([s for s in cpu_stats if "gru_cell" in s.node_name])
      self.assertTrue([s for s in gpu_stats if "gru_cell" in s.node_name])
コード例 #30
0
  def testBasicLSTMCellWithDropout(self):

    def _is_close(x, y, digits=4):
      delta = x - y
      return delta < 10**(-digits)

    def _is_close_in(x, items, digits=4):
      for i in items:
        if _is_close(x, i, digits):
          return True
      return False

    keep_prob = 0.5
    c_high = 2.9998924946
    c_low = 0.999983298578
    h_low = 0.761552567265
    h_high = 0.995008519604
    num_units = 5
    allowed_low = [2, 3]

    with self.test_session() as sess:
      with variable_scope.variable_scope(
          "other", initializer=init_ops.constant_initializer(1)):
        x = array_ops.zeros([1, 5])
        c = array_ops.zeros([1, 5])
        h = array_ops.zeros([1, 5])
        state = core_rnn_cell_impl.LSTMStateTuple(c, h)
        cell = rnn_cell.LayerNormBasicLSTMCell(
            num_units, layer_norm=False, dropout_keep_prob=keep_prob)

        g, s = cell(x, state)
        sess.run([variables.global_variables_initializer()])
        res = sess.run([g, s], {
            x.name: np.ones([1, 5]),
            c.name: np.ones([1, 5]),
            h.name: np.ones([1, 5]),
        })

        # Since the returned tensors are of size [1,n]
        # get the first component right now.
        actual_h = res[0][0]
        actual_state_c = res[1].c[0]
        actual_state_h = res[1].h[0]

        # For each item in `c` (the cell inner state) check that
        # it is equal to one of the allowed values `c_high` (not
        # dropped out) or `c_low` (dropped out) and verify that the
        # corresponding item in `h` (the cell activation) is coherent.
        # Count the dropped activations and check that their number is
        # coherent with the dropout probability.
        dropped_count = 0
        self.assertTrue((actual_h == actual_state_h).all())
        for citem, hitem in zip(actual_state_c, actual_state_h):
          self.assertTrue(_is_close_in(citem, [c_low, c_high]))
          if _is_close(citem, c_low):
            self.assertTrue(_is_close(hitem, h_low))
            dropped_count += 1
          elif _is_close(citem, c_high):
            self.assertTrue(_is_close(hitem, h_high))
        self.assertIn(dropped_count, allowed_low)
コード例 #31
0
ファイル: optimizers.py プロジェクト: Utsal20/poGANmon
def optimize_loss(loss,
                  global_step,
                  learning_rate,
                  optimizer,
                  gradient_noise_scale=None,
                  gradient_multipliers=None,
                  clip_gradients=None,
                  learning_rate_decay_fn=None,
                  update_ops=None,
                  variables=None,
                  name=None,
                  summaries=None,
                  colocate_gradients_with_ops=False,
                  increment_global_step=True):
  """Given loss and parameters for optimizer, returns a training op.

  Various ways of passing optimizers include:

  - by string specifying the name of the optimizer. See OPTIMIZER_CLS_NAMES
      for full list. E.g. `optimize_loss(..., optimizer='Adam')`.
  - by function taking learning rate `Tensor` as argument and returning an
      `Optimizer` instance. E.g. `optimize_loss(...,
      optimizer=lambda lr: tf.train.MomentumOptimizer(lr, momentum=0.5))`.
    Alternatively, if `learning_rate` is `None`, the function takes no
    arguments. E.g. `optimize_loss(..., learning_rate=None,
      optimizer=lambda: tf.train.MomentumOptimizer(0.5, momentum=0.5))`.
  - by a subclass of `Optimizer` having a single-argument constructor
      (the argument is the learning rate), such as AdamOptimizer or
      AdagradOptimizer. E.g. `optimize_loss(...,
      optimizer=tf.train.AdagradOptimizer)`.
  - by an instance of a subclass of `Optimizer`.
      E.g., `optimize_loss(..., optimizer=tf.train.AdagradOptimizer(0.5))`.

  Args:
    loss: Scalar `Tensor`.
    global_step: Scalar int `Tensor`, step counter to update on each step
                 unless `increment_global_step` is `False`. If not supplied,
                 it will be fetched from the default graph (see
                 `tf.train.get_global_step` for details). If it has
                 not been created, no step will be incremented with each weight
                 update. `learning_rate_decay_fn` requires `global_step`.
    learning_rate: float or `Tensor`, magnitude of update per each training
                   step. Can be `None`.
    optimizer: string, class or optimizer instance, used as trainer.
               string should be name of optimizer, like 'SGD',
                 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant.
               class should be sub-class of `tf.Optimizer` that implements
                 `compute_gradients` and `apply_gradients` functions.
               optimizer instance should be instantiation of `tf.Optimizer`
                 sub-class and have `compute_gradients` and `apply_gradients`
                 functions.
    gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this
                          value.
    gradient_multipliers: dict of variables or variable names to floats.
                          If present, gradients for specified
                          variables will be multiplied by given constant.
    clip_gradients: float, callable or `None`. If float, is provided, a global
      clipping is applied to prevent the norm of the gradient to exceed this
      value. Alternatively, a callable can be provided e.g.: adaptive_clipping.
      This callable takes a `list` of `(gradients, variables)` `tuple`s and
      returns the same thing with the gradients modified.
    learning_rate_decay_fn: function, takes `learning_rate` and `global_step`
                            `Tensor`s, returns `Tensor`.
                            Can be used to implement any learning rate decay
                            functions.
                            For example: `tf.train.exponential_decay`.
                            Ignored if `learning_rate` is not supplied.
    update_ops: list of update `Operation`s to execute at each step. If `None`,
                uses elements of UPDATE_OPS collection. The order of execution
                between `update_ops` and `loss` is non-deterministic.
    variables: list of variables to optimize or
               `None` to use all trainable variables.
    name: The name for this operation is used to scope operations and summaries.
    summaries: List of internal quantities to visualize on tensorboard. If not
               set, the loss, the learning rate, and the global norm of the
               gradients will be reported. The complete list of possible values
               is in OPTIMIZER_SUMMARIES.
    colocate_gradients_with_ops: If True, try colocating gradients with the
                                 corresponding op.
    increment_global_step: Whether to increment `global_step`. If your model
      calls `optimize_loss` multiple times per training step (e.g. to optimize
      different parts of the model), use this arg to avoid incrementing
      `global_step` more times than necessary.

  Returns:
    Training op.

  Raises:
    ValueError: if:
        * `loss` is an invalid type or shape.
        * `global_step` is an invalid type or shape.
        * `learning_rate` is an invalid type or value.
        * `optimizer` has the wrong type.
        * `clip_gradients` is neither float nor callable.
        * `learning_rate` and `learning_rate_decay_fn` are supplied, but no
          `global_step` is available.
        * `gradients` is empty.
  """
  loss = ops.convert_to_tensor(loss)
  contrib_framework.assert_scalar(loss)
  if global_step is None:
    global_step = train.get_global_step()
  else:
    train.assert_global_step(global_step)
  with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]):
    # Update ops take UPDATE_OPS collection if not provided.
    if update_ops is None:
      update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS))
    # Make sure update ops are ran before computing loss.
    if update_ops:
      loss = control_flow_ops.with_dependencies(list(update_ops), loss)

    # Learning rate variable, with possible decay.
    lr = None
    if learning_rate is not None:
      if (isinstance(learning_rate, ops.Tensor) and
          learning_rate.get_shape().ndims == 0):
        lr = learning_rate
      elif isinstance(learning_rate, float):
        if learning_rate < 0.0:
          raise ValueError("Invalid learning_rate %s.", learning_rate)
        lr = vs.get_variable(
            "learning_rate", [],
            trainable=False,
            initializer=init_ops.constant_initializer(learning_rate))
      else:
        raise ValueError("Learning rate should be 0d Tensor or float. "
                         "Got %s of type %s" % (str(learning_rate),
                                                str(type(learning_rate))))
    if summaries is None:
      summaries = ["loss", "learning_rate", "global_gradient_norm"]
    else:
      for summ in summaries:
        if summ not in OPTIMIZER_SUMMARIES:
          raise ValueError("Summaries should be one of [%s], you provided %s." %
                           (", ".join(OPTIMIZER_SUMMARIES), summ))
    if learning_rate is not None and learning_rate_decay_fn is not None:
      if global_step is None:
        raise ValueError("global_step is required for learning_rate_decay_fn.")
      lr = learning_rate_decay_fn(lr, global_step)
      if "learning_rate" in summaries:
        summary.scalar("learning_rate", lr)

    # Create optimizer, given specified parameters.
    if isinstance(optimizer, six.string_types):
      if lr is None:
        raise ValueError("Learning rate is None, but should be specified if "
                         "optimizer is string (%s)." % optimizer)
      if optimizer not in OPTIMIZER_CLS_NAMES:
        raise ValueError(
            "Optimizer name should be one of [%s], you provided %s." %
            (", ".join(OPTIMIZER_CLS_NAMES), optimizer))
      opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
    elif (isinstance(optimizer, type) and
          issubclass(optimizer, optimizer_.Optimizer)):
      if lr is None:
        raise ValueError("Learning rate is None, but should be specified if "
                         "optimizer is class (%s)." % optimizer)
      opt = optimizer(learning_rate=lr)
    elif isinstance(optimizer, optimizer_.Optimizer):
      opt = optimizer
    elif callable(optimizer):
      if learning_rate is not None:
        opt = optimizer(lr)
      else:
        opt = optimizer()
      if not isinstance(opt, optimizer_.Optimizer):
        raise ValueError("Unrecognized optimizer: function should return "
                         "subclass of Optimizer. Got %s." % str(opt))
    else:
      raise ValueError("Unrecognized optimizer: should be string, "
                       "subclass of Optimizer, instance of "
                       "subclass of Optimizer or function with one argument. "
                       "Got %s." % str(optimizer))

    # All trainable variables, if specific variables are not specified.
    if variables is None:
      variables = vars_.trainable_variables()

    # Compute gradients.
    gradients = opt.compute_gradients(
        loss,
        variables,
        colocate_gradients_with_ops=colocate_gradients_with_ops)

    # Optionally add gradient noise.
    if gradient_noise_scale is not None:
      gradients = _add_scaled_noise_to_gradients(gradients,
                                                 gradient_noise_scale)

    # Multiply some gradients.
    if gradient_multipliers is not None:
      gradients = _multiply_gradients(gradients, gradient_multipliers)
      if not gradients:
        raise ValueError(
            "Empty list of (gradient, var) pairs encountered. This is most "
            "likely to be caused by an improper value of gradient_multipliers.")

    if "global_gradient_norm" in summaries or "gradient_norm" in summaries:
      summary.scalar("global_norm/gradient_norm",
                     clip_ops.global_norm(list(zip(*gradients))[0]))

    # Optionally clip gradients by global norm.
    if isinstance(clip_gradients, float):
      gradients = _clip_gradients_by_norm(gradients, clip_gradients)
    elif callable(clip_gradients):
      gradients = clip_gradients(gradients)
    elif clip_gradients is not None:
      raise ValueError(
          "Unknown type %s for clip_gradients" % type(clip_gradients))

    # Add scalar summary for loss.
    if "loss" in summaries:
      summary.scalar("loss", loss)

    # Add histograms for variables, gradients and gradient norms.
    for gradient, variable in gradients:
      if isinstance(gradient, ops.IndexedSlices):
        grad_values = gradient.values
      else:
        grad_values = gradient

      if grad_values is not None:
        var_name = variable.name.replace(":", "_")
        if "gradients" in summaries:
          summary.histogram("gradients/%s" % var_name, grad_values)
        if "gradient_norm" in summaries:
          summary.scalar("gradient_norm/%s" % var_name,
                         clip_ops.global_norm([grad_values]))

    if clip_gradients is not None and ("global_gradient_norm" in summaries or
                                       "gradient_norm" in summaries):
      summary.scalar("global_norm/clipped_gradient_norm",
                     clip_ops.global_norm(list(zip(*gradients))[0]))

    # Create gradient updates.
    grad_updates = opt.apply_gradients(
        gradients,
        global_step=global_step if increment_global_step else None,
        name="train")

    # Ensure the train_tensor computes grad_updates.
    train_tensor = control_flow_ops.with_dependencies([grad_updates], loss)

    return train_tensor
コード例 #32
0
def optimize_loss(losses,
                  global_step,
                  learning_rate,
                  optimizer,
                  num_gpus=1,
                  gradient_noise_scale=None,
                  gradient_multipliers=None,
                  clip_gradients=None,
                  learning_rate_decay_fn=None,
                  update_ops=None,
                  variables=None,
                  name=None,
                  summaries=None):
    """Given loss and parameters for optimizer, returns a training op.
  Args:
    loss: Tensor, 0 dimensional.
    global_step: Tensor, step counter for each update.
    learning_rate: float or Tensor, magnitude of update per each training step.
    optimizer: string, class or optimizer instance, used as trainer.
               string should be name of optimizer, like 'SGD',
                 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant.
               class should be sub-class of tf.Optimizer that implements
                 `compute_gradients` and `apply_gradients` functions.
               optimizer instance should be instantion of tf.Optimizer sub-class
                 and have `compute_gradients` and `apply_gradients` functions.
    gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this
                          value.
    gradient_multipliers: dict of variables or variable names to floats.
                          If present, gradients for specified
                          variables will be multiplied by given constant.
    clip_gradients: float or `None`, clips gradients by this value.
    moving_average_decay: Deprecated. float or None, takes into account previous
                          loss to make learning smoother due to outliers.
    learning_rate_decay_fn: function, takes `learning_rate` and `global_step`
                            `Tensor`s, returns `Tensor`.
                            Can be used to implement any learning rate decay
                            functions.
                            For example: tf.train.exponential_decay.
    update_ops: list of update `Operation`s to execute at each step. If `None`,
                uses elements of UPDATE_OPS collection.
    variables: list of variables to optimize or
               `None` to use all trainable variables.
    name: The name for this operation is used to scope operations and summaries.
    summaries: List of internal quantities to visualize on tensorboard. If not
               set only the loss and the learning rate will be reported. The
               complete list is in OPTIMIZER_SUMMARIES.
  Returns:
    Training op.
  Raises:
    ValueError: if optimizer is wrong type.
  """
    with vs.variable_scope(name, "OptimizeLoss", losses + [global_step]):
        # # Update ops take UPDATE_OPS collection if not provided.
        # if update_ops is None:
        #   update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS))
        # # Make sure update ops are ran before computing loss.
        # if update_ops:
        #   #loss = control_flow_ops.with_dependencies(list(update_ops), loss)
        #   raise ValueError('update ops not supported yet for multi gpu')

        # Learning rate variable, with possible decay.
        if (isinstance(learning_rate, ops.Tensor)
                and learning_rate.get_shape().ndims == 0):
            lr = learning_rate
        elif isinstance(learning_rate, float):
            lr = vs.get_variable(
                "learning_rate", [],
                trainable=False,
                initializer=init_ops.constant_initializer(learning_rate))
        else:
            raise ValueError("Learning rate should be 0d Tensor or float. "
                             "Got %s of type %s" %
                             (str(learning_rate), str(type(learning_rate))))
        if summaries is None:
            summaries = ["loss", "learning_rate"]
        if learning_rate_decay_fn is not None:
            lr = learning_rate_decay_fn(lr, global_step)
            if "learning_rate" in summaries:
                summary.scalar("learning_rate", lr)

        # Create optimizer, given specified parameters.
        if isinstance(optimizer, six.string_types):
            if optimizer not in OPTIMIZER_CLS_NAMES:
                raise ValueError(
                    "Optimizer name should be one of [%s], you provided %s." %
                    (", ".join(OPTIMIZER_CLS_NAMES), optimizer))
            opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
        elif isinstance(optimizer, type) and issubclass(
                optimizer, optimizer_.Optimizer):
            opt = optimizer(learning_rate=lr)
        elif isinstance(optimizer, optimizer_.Optimizer):
            opt = optimizer
        else:
            raise ValueError("Unrecognized optimizer: should be string, "
                             "subclass of Optimizer or instance of "
                             "subclass of Optimizer. Got %s." % str(optimizer))

        # Calculate the gradients for each model tower.
        tower_grads = []
        for i in range(num_gpus):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope('%s_%d' % ('tower', i)) as scope:
                    # All trainable variables, if specific variables are not specified.

                    #if variables is None:
                    #  variables = vars_.trainable_variables()
                    # Compute gradients.
                    loss = losses[i]
                    #gradients = opt.compute_gradients(loss, variables)
                    gradients = opt.compute_gradients(loss)

                    # Optionally add gradient noise.
                    if gradient_noise_scale is not None:
                        gradients = _add_scaled_noise_to_gradients(
                            gradients, gradient_noise_scale)
                    # Multiply some gradients.
                    if gradient_multipliers is not None:
                        gradients = _multiply_gradients(
                            gradients, gradient_multipliers)
                    # Optionally clip gradients by global norm.
                    if clip_gradients is not None:
                        gradients = _clip_gradients_by_norm(
                            gradients, clip_gradients)

                    tower_grads.append(gradients)

        # Add scalar summary for loss.
        if "loss" in summaries:
            summary.scalar("learning_rate", lr)

        #@TODO chg now just remove below  TODO FIXME add gradient monitor
        ## Add histograms for variables, gradients and gradient norms.
        #for gradient, variable in gradients:
        #  if isinstance(gradient, ops.IndexedSlices):
        #    grad_values = gradient.values
        #  else:
        #    grad_values = gradient

        #  if grad_values is not None:
        #    if "gradients" in summaries:
        #      logging_ops.histogram_summary(variable.name + "/gradients",
        #                                    grad_values)
        #    if "gradient_norm" in summaries:
        #      logging_ops.histogram_summary(variable.name + "/gradient_norm",
        #                                    clip_ops.global_norm([grad_values]))

        #if FLAGS.monitor_level > 1 and FLAGS.num_gpus == 0:
        #  melt.monitor_gradients_from_loss(loss)

        gradients = average_gradients(tower_grads)

        # Create gradient updates.
        grad_updates = opt.apply_gradients(gradients,
                                           global_step=global_step,
                                           name="train")
        # # Make sure total_loss is valid.
        # final_loss = array_ops.check_numerics(loss, "Loss is inf or nan")

        # # Ensure the train_tensor computes grad_updates.
        # train_tensor = control_flow_ops.with_dependencies([grad_updates], final_loss)

        #return train_tensor
        return grad_updates
コード例 #33
0
ファイル: lstm_ops.py プロジェクト: HowieYang0/notmnist-ex
    def _call_cell(self, inputs, initial_cell_state, initial_output, dtype,
                   sequence_length):
        """Run this LSTM on inputs, starting from the given state.

    Args:
      inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]`
      initial_cell_state: initial value for cell state, shape `[batch_size,
        self._num_units]`
      initial_output: initial value of cell output, shape `[batch_size,
        self._num_units]`
      dtype: The data type for the initial state and expected output.
      sequence_length: Specifies the length of each sequence in inputs. An
        `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0,
        time_len)` or None.

    Returns:
      A pair containing:

      - Cell state (cs): A `3-D` tensor of shape `[time_len, batch_size,
                         output_size]`
      - Output (h): A `3-D` tensor of shape `[time_len, batch_size,
                    output_size]`
    """

        inputs_shape = inputs.get_shape().with_rank(3)
        time_len = inputs_shape[0].value
        if time_len is None:
            time_len = array_ops.shape(inputs)[0]
        input_size = inputs_shape[2].value
        w = vs.get_variable(
            "weights", [input_size + self._num_units, self._num_units * 4],
            dtype=dtype)
        b = vs.get_variable("biases", [w.get_shape().with_rank(2)[1]],
                            initializer=init_ops.constant_initializer(0.0),
                            dtype=dtype)
        if self._use_peephole:
            wci = vs.get_variable("w_i_diag", [self._num_units], dtype=dtype)
            wco = vs.get_variable("w_o_diag", [self._num_units], dtype=dtype)
            wcf = vs.get_variable("w_f_diag", [self._num_units], dtype=dtype)
        else:
            wci = wco = wcf = array_ops.zeros([self._num_units], dtype=dtype)

        if sequence_length is None:
            max_seq_len = time_len
        else:
            max_seq_len = math_ops.to_int64(
                math_ops.reduce_max(sequence_length))

        _, cs, _, _, _, _, h = _lstm_ops_so.block_lstm(
            seq_len_max=max_seq_len,
            x=inputs,
            cs_prev=initial_cell_state,
            h_prev=initial_output,
            w=w,
            wci=wci,
            wco=wco,
            wcf=wcf,
            b=b,
            forget_bias=self._forget_bias,
            cell_clip=self._cell_clip,
            use_peephole=self._use_peephole)
        return cs, h
コード例 #34
0
    def __call__(self, input, state, scope, reuse=True):
        # the scope business gives a namespace to our weight variable matrix names
        with tf.variable_scope(scope,reuse=reuse): 
            # input has shape [batch_size, input_size]
            # state has shape [batch_size, state_size]
            
            # We divide up a state vector as follows:
            #
            # h = (h0,r,w,M)
            #
            # where
            #
            #   - h0 is the controller internal state (size controller_state_size)
            #   - r is the read address weights (size memory_address_size)
            #   - w is the write address weights (size memory_address_size)
            #   - M is the memory state (size memory_address_size*memory_content_size)
            #
            # Viewed as a matrix of shape [mas,mcs] the rows of M index memory locations.
            #
            # NOTE: these vectors are all batched, so in the following h0 has shape
            # [batch_size, controller_state_size], for example.
            
            css = self._controller_state_size
            mas = self._memory_address_size
            mcs = self._memory_content_size
            powers = self._powers # the powers of the rotation matrix we allow
            
            h0, r, w, M = tf.split(state, [css, mas, mas, mas * mcs], 1)
            
            # Now generate the s, q, e, a vectors
            W_s = tf.get_variable("W_s", [css,len(powers)])
            B_s = tf.get_variable("B_s", [len(powers)])
            s = tf.nn.softmax(tf.matmul(h0,W_s) + B_s) # shape [batch_size,len(powers)]

            W_q = tf.get_variable("W_q", [css,len(powers)])
            B_q = tf.get_variable("B_q", [len(powers)])
            q = tf.nn.softmax(tf.matmul(h0,W_q) + B_q) # shape [batch_size,len(powers)]

            W_e = tf.get_variable("W_e", [css,mcs])
            B_e = tf.get_variable("B_e", [mcs])
            e = tf.nn.softmax(tf.matmul(h0,W_e) + B_e) # shape [batch_size,mcs]

            W_a = tf.get_variable("W_a", [css,mcs])
            B_a = tf.get_variable("B_a", [mcs])
            a = tf.nn.softmax(tf.matmul(h0,W_a) + B_a) # shape [batch_size,mcs]

            # Add and forget on the memory
            # TODO: not sure if matrix_diag is slow
            M = tf.reshape(M, [-1, mas, mcs])
            erase_term = tf.matmul( M, tf.matrix_diag(e) ) # shape [batch_size, mas, mcs]
            add_term = tf.matmul( tf.reshape(w,[-1,mas,1]), tf.reshape(a,[-1,1,mcs]) ) # shape [batch_size, mas, mcs]
            M_new = M - erase_term + add_term
            M_new = tf.reshape(M_new, [-1, mas * mcs])
            
            # Do the rotations of the read and write addresses
            # r has shape [batch_size,mas]
            Rtensor = rotation_tensor(mas,powers)

            # yields a tensor of shape [batch_size, mas, mas]
            # each row of which is \sum_i q_i R^i, and this batch
            # of matrices is then applied to r to generate r_new
            # NOTE: These are actually batch matmuls (tf.batch_matmul
            # went away with v1.0, matmul now does it automatically on the
            # first index)
            r_new = tf.matmul( tf.reshape(r, [-1,1,mas]),
                                tf.tensordot( q, Rtensor, [[1], [0]] ) )
            w_new = tf.matmul( tf.reshape(w, [-1,1,mas]),
                                tf.tensordot( s, Rtensor, [[1], [0]] ) )
                                
            r_new = tf.reshape( r_new, [-1,mas] )
            w_new = tf.reshape( w_new, [-1,mas] )

            H = tf.get_variable("H", [css,css])
            U = tf.get_variable("U", [self._input_size,css])
            B = tf.get_variable("B", [css], initializer=init_ops.constant_initializer(0.0))
        
            V = tf.get_variable("V", [mcs,css]) # converts from memory to controller state
            Mr = tf.matmul( M, tf.reshape(r,[-1,mas,1]), transpose_a=True )
            Mr = tf.reshape( Mr, [-1,mcs] )
            
            h0_new = self._activation(tf.matmul(h0, H) + tf.matmul(Mr,V) + tf.matmul(input,U) + B)
        
            state_new = tf.concat([h0_new, r_new, w_new, M_new], 1)   
        return h0_new, state_new
コード例 #35
0
        def func():
            embedding = tf.constant(
                np.ones([vocab_size, embedding_size], dtype=np.float32))
            state_val = np.reshape([
                np.ones([num_units], dtype=np.float32) * i
                for i in range(batch_size)
            ], [batch_size, num_units])
            encoder_state = LSTMStateTuple(state_val, state_val)

            cell_initializer = init_ops.constant_initializer(
                np.array(
                    [[
                        -0.9592235, 0.42451382, 0.7437744, -0.54485345,
                        -0.80763197, 0.19663906, -0.22738314, 0.7762785,
                        0.7464578, 0.27227187, 0.7661047, 0.3596425,
                        -0.8528242, -0.89316916, -0.48946142, 0.87882376
                    ],
                     [
                         0.86586094, -0.75018406, 0.25992537, -0.69368935,
                         0.2515502, -0.26379275, 0.8954313, 0.5759742,
                         -0.7753072, -0.4388857, 0.95751476, -0.82085776,
                         -0.9467752, -0.37055635, -0.18570113, -0.86504984
                     ],
                     [
                         0.02305841, 0.3850248, 0.893692, -0.6866486,
                         -0.83703446, -0.9828961, 0.3989377, -0.59993076,
                         0.5330808, 0.6916566, 0.98468065, -0.6047034,
                         0.10823512, 0.34599304, -0.7834821, -0.7852347
                     ],
                     [
                         0.81643987, 0.31507468, -0.51369476, -0.12273741,
                         0.9701307, -0.79669356, -0.34496522, -0.88750815,
                         -0.17995334, 0.34707904, -0.09201193, 0.5363934,
                         -0.87229705, -0.5073328, -0.95894027, 0.5481839
                     ],
                     [
                         -0.84093595, -0.2341497, -0.86047816, 0.43370056,
                         -0.39073753, 0.37730122, 0.48026466, 0.3004985,
                         -0.60727096, 0.9043884, -0.37619448, 0.22490788,
                         -0.03739262, 0.61672115, 0.478899, -0.40780973
                     ],
                     [
                         0.31202435, -0.22045255, -0.6087918, 0.95115066,
                         0.00199413, -0.688287, -0.1103518, 0.4169519,
                         0.7913246, -0.9844644, -0.6193857, 0.38659644,
                         -0.4726901, -0.44781208, -0.5174744, -0.605911
                     ],
                     [
                         0.66771054, 0.34912825, 0.22297978, -0.4990945,
                         0.24057317, -0.5540829, 0.92277217, 0.74939895,
                         -0.35278273, -0.21587133, -0.28613377, -0.8794241,
                         -0.40119147, 0.67175174, -0.22741508, 0.37898326
                     ]],
                    dtype=np.float32))
            dense_initializer = init_ops.constant_initializer(
                np.array([[
                    0.56177187, -0.6233454, 0.73997784, 0.35032558, 0.6479795
                ], [
                    0.6831174, -0.34233975, 0.39330363, 0.45177555, -0.49649096
                ], [
                    -0.98890066, 0.6175642, 0.09800482, -0.6721206, 0.48805737
                ], [0.19671416, 0.2623148, 0.742548, 0.13555217, 0.56009054]],
                         dtype=np.float32))

            cell = LSTMCell(num_units=num_units,
                            initializer=cell_initializer,
                            state_is_tuple=True)

            helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                embedding=embedding,
                start_tokens=tf.tile([go_token], [batch_size]),
                end_token=end_token)

            output_layer = tf.layers.Dense(
                vocab_size, kernel_initializer=dense_initializer)
            decoder = tf.contrib.seq2seq.BasicDecoder(
                cell=cell,
                helper=helper,
                initial_state=encoder_state,
                output_layer=output_layer)

            outputs, state, sequence_lengths = tf.contrib.seq2seq.dynamic_decode(
                decoder=decoder, maximum_iterations=6)

            return tf.identity(outputs.rnn_output, name="rnn_output"), \
                   tf.identity(outputs.sample_id, name="sample_id"), \
                   tf.identity(state, name="state"), \
                   tf.identity(sequence_lengths, name="sequence_lengths")
def _conv(args,
          filter_size,
          output_channels,
          bias,
          inh_mult=1.5,
          exc_mult=3,
          bias_start=0.0,
          activation=None,
          initializers=None,
          dtype=tf.float32):
    """Convolution.
  Args:
    args: a Tensor or a list of Tensors of dimension 3D, 4D or 5D,
    batch x n, Tensors.
    filter_size: int tuple of filter height and width.
    output_channels: int, number of convolutional kernels.
    bias: Whether to use biases in the convolution layer.
    bias_start: starting value to initialize the bias; 0 by default.
  Returns:
    A 3D, 4D, or 5D Tensor with shape [batch ... num_features]
  Raises:
    ValueError: if some of the arguments has unspecified or wrong shape.
  """
    def get_initializer():
        initializer = tf.glorot_uniform_initializer(
            seed=None,
            dtype=tf.float32,
        )
        return initializer

    # Calculate the total size of arguments on dimension 1.
    total_arg_size_depth = 0
    shapes = [a.get_shape().as_list() for a in args]
    shape_length = len(shapes[0])
    n_args = len(args)
    input, hidden = args
    if n_args > 2:
        raise ValueError("Expected only two " "arguments (input, hidden)")

    for shape in shapes:
        if len(shape) != 4:
            raise ValueError("Expected only 4-D arrays of "
                             "form [n,h,w,c] for performing 2D convolutions")
        if len(shape) != len(shapes[0]):
            raise ValueError("Conv Linear expects all args "
                             "to be of same Dimension: %s" % str(shapes))
    # import ipdb; ipdb.set_trace()
    x_arg_depth = shapes[0][-1]
    h_arg_depth = shapes[1][-1]
    conv_op = nn_ops.conv2d
    strides = shape_length * [1]
    # TODO: Check extent of long-range inhibition
    f_h, f_w = filter_size
    f_h_inh, f_w_inh = int(f_h * inh_mult), int(f_w * inh_mult)
    f_h_exc, f_w_exc = int(f_h * exc_mult), int(f_w * exc_mult)
    filter_size_inh = [f_h_inh, f_w_inh]
    filter_size_exc = [f_h_exc, f_w_exc]

    # Build input and hidden kernels
    x_kernel = vs.get_variable("input_kernel",
                               filter_size +
                               [x_arg_depth, output_channels * 4],
                               initializer=get_initializer(),
                               dtype=tf.float32)

    # Build hidden state kernels
    h_kernel_gates = vs.get_variable("hidden_kernel_g",
                                     filter_size +
                                     [x_arg_depth, output_channels * 3],
                                     initializer=get_initializer(),
                                     dtype=tf.float32)
    # TODO: find optimal l1 strength
    h_kernel_inh = vs.get_variable(
        "hidden_kernel_inh",
        filter_size_inh + [x_arg_depth, output_channels],
        initializer=get_initializer(),
        regularizer=tf.contrib.layers.l1_regularizer(1e-2),
        dtype=tf.float32)
    h_kernel_exc = vs.get_variable(
        "hidden_kernel_exc",
        filter_size_exc + [x_arg_depth, output_channels],
        initializer=get_initializer(),
        regularizer=tf.contrib.layers.l1_regularizer(1e-2),
        dtype=tf.float32)

    res_x = conv_op(input, x_kernel, strides, padding="SAME")

    res_h_gates = conv_op(hidden, h_kernel_gates, strides, padding="SAME")
    res_h_inh = conv_op(hidden, h_kernel_inh, strides, padding="SAME")
    res_h_exc = conv_op(hidden, h_kernel_exc, strides, padding="SAME")
    if not bias:
        return res
    bias_input = vs.get_variable("biases_input", [output_channels * 4],
                                 dtype=tf.float32,
                                 initializer=init_ops.constant_initializer(
                                     bias_start, dtype=dtype))
    bias_hidden_gates = vs.get_variable(
        "biases_hidden_g", [output_channels * 3],
        dtype=tf.float32,
        initializer=init_ops.constant_initializer(bias_start, dtype=dtype))
    bias_hidden_exc = vs.get_variable(
        "biases_hidden_e", [output_channels],
        dtype=tf.float32,
        initializer=init_ops.constant_initializer(bias_start, dtype=dtype))
    bias_hidden_inh = vs.get_variable(
        "biases_hidden_i", [output_channels],
        dtype=tf.float32,
        initializer=init_ops.constant_initializer(bias_start, dtype=dtype))
    res_input = tf.math.add(res_x, bias_input, name='conv_input_gates')
    res_hidden_gates = tf.math.add(res_h_gates,
                                   bias_hidden_gates,
                                   name='conv_hidden_gates')
    res_hidden_inh = tf.math.add(res_h_inh,
                                 bias_hidden_inh,
                                 name='conv_hidden_inh')
    res_hidden_exc = tf.math.add(res_h_exc,
                                 bias_hidden_exc,
                                 name='conv_hidden_exc')
    return (res_input, res_hidden_gates, res_hidden_exc, res_hidden_inh)
コード例 #37
0
 def testBasicLSTMCell(self):
   for dtype in [dtypes.float16, dtypes.float32]:
     np_dtype = dtype.as_numpy_dtype
     with self.test_session(graph=ops.Graph()) as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         x = array_ops.zeros([1, 2], dtype=dtype)
         m = array_ops.zeros([1, 8], dtype=dtype)
         cell = rnn_cell_impl.MultiRNNCell(
             [
                 rnn_cell_impl.BasicLSTMCell(
                     2, state_is_tuple=False)
                 for _ in range(2)
             ],
             state_is_tuple=False)
         self.assertEqual(cell.dtype, None)
         g, out_m = cell(x, m)
         # Layer infers the input type.
         self.assertEqual(cell.dtype, dtype.name)
         expected_variable_names = [
             "root/multi_rnn_cell/cell_0/basic_lstm_cell/%s:0" %
             rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_0/basic_lstm_cell/%s:0" %
             rnn_cell_impl._BIAS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_1/basic_lstm_cell/%s:0" %
             rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_1/basic_lstm_cell/%s:0" %
             rnn_cell_impl._BIAS_VARIABLE_NAME
         ]
         self.assertEqual(
             expected_variable_names,
             [v.name for v in cell.trainable_variables])
         self.assertFalse(cell.non_trainable_variables)
         sess.run([variables_lib.global_variables_initializer()])
         res = sess.run(
             [g, out_m],
             {x.name: np.array([[1., 1.]]),
              m.name: 0.1 * np.ones([1, 8])})
         self.assertEqual(len(res), 2)
         variables = variables_lib.global_variables()
         self.assertEqual(expected_variable_names, [v.name for v in variables])
         # The numbers in results were not calculated, this is just a
         # smoke test.
         self.assertAllClose(
             res[0], np.array([[0.240, 0.240]], dtype=np_dtype), 1e-2)
         expected_mem = np.array(
             [[0.689, 0.689, 0.448, 0.448, 0.398, 0.398, 0.240, 0.240]],
             dtype=np_dtype)
         self.assertAllClose(res[1], expected_mem, 1e-2)
       with variable_scope.variable_scope(
           "other", initializer=init_ops.constant_initializer(0.5)):
         # Test BasicLSTMCell with input_size != num_units.
         x = array_ops.zeros([1, 3], dtype=dtype)
         m = array_ops.zeros([1, 4], dtype=dtype)
         g, out_m = rnn_cell_impl.BasicLSTMCell(
             2, state_is_tuple=False)(x, m)
         sess.run([variables_lib.global_variables_initializer()])
         res = sess.run(
             [g, out_m],
             {x.name: np.array([[1., 1., 1.]], dtype=np_dtype),
              m.name: 0.1 * np.ones([1, 4], dtype=np_dtype)})
         self.assertEqual(len(res), 2)
コード例 #38
0
def alexnet_v2(inputs,
               num_classes=1000,
               is_training=True,
               dropout_keep_prob=0.5,
               spatial_squeeze=True,
               scope='alexnet_v2'):
  """AlexNet version 2.

  Described in: http://arxiv.org/pdf/1404.5997v2.pdf
  Parameters from:
  github.com/akrizhevsky/cuda-convnet2/blob/master/layers/
  layers-imagenet-1gpu.cfg

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 224x224. To use in fully
        convolutional mode, set spatial_squeeze to false.
        The LRN layers have been removed and change the initializers from
        random_normal_initializer to xavier_initializer.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.

  Returns:
    the last op containing the log predictions and end_points dict.
  """
  with variable_scope.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
    end_points_collection = sc.original_name_scope + '_end_points'
    # Collect outputs for conv2d, fully_connected and max_pool2d.
    with arg_scope(
        [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],
        outputs_collections=[end_points_collection]):
      net = layers.conv2d(
          inputs, 64, [11, 11], 4, padding='VALID', scope='conv1')
      net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool1')
      net = layers.conv2d(net, 192, [5, 5], scope='conv2')
      net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool2')
      net = layers.conv2d(net, 384, [3, 3], scope='conv3')
      net = layers.conv2d(net, 384, [3, 3], scope='conv4')
      net = layers.conv2d(net, 256, [3, 3], scope='conv5')
      net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool5')

      # Use conv2d instead of fully_connected layers.
      with arg_scope(
          [layers.conv2d],
          weights_initializer=trunc_normal(0.005),
          biases_initializer=init_ops.constant_initializer(0.1)):
        net = layers.conv2d(net, 4096, [5, 5], padding='VALID', scope='fc6')
        net = layers_lib.dropout(
            net, dropout_keep_prob, is_training=is_training, scope='dropout6')
        net = layers.conv2d(net, 4096, [1, 1], scope='fc7')
        net = layers_lib.dropout(
            net, dropout_keep_prob, is_training=is_training, scope='dropout7')
        net = layers.conv2d(
            net,
            num_classes, [1, 1],
            activation_fn=None,
            normalizer_fn=None,
            biases_initializer=init_ops.zeros_initializer(),
            scope='fc8')

      # Convert end_points_collection into a end_point dict.
      end_points = utils.convert_collection_to_dict(end_points_collection)
      if spatial_squeeze:
        net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed')
        end_points[sc.name + '/fc8'] = net
      return net, end_points
コード例 #39
0
ファイル: optimizers.py プロジェクト: instadeep/Mobile-ai
def optimize_loss(loss,
                  global_step,
                  learning_rate,
                  optimizer,
                  gradient_noise_scale=None,
                  gradient_multipliers=None,
                  clip_gradients=None,
                  moving_average_decay=None,
                  learning_rate_decay_fn=None,
                  update_ops=None,
                  variables=None,
                  name=None,
                  summaries=None):
    """Given loss and parameters for optimizer, returns a training op.

  Args:
    loss: Tensor, 0 dimensional.
    global_step: Tensor, step counter for each update.
    learning_rate: float or Tensor, magnitude of update per each training step.
    optimizer: string, class or optimizer instance, used as trainer.
               string should be name of optimizer, like 'SGD',
                 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant.
               class should be sub-class of tf.Optimizer that implements
                 `compute_gradients` and `apply_gradients` functions.
               optimizer instance should be instantion of tf.Optimizer sub-class
                 and have `compute_gradients` and `apply_gradients` functions.
    gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this
                          value.
    gradient_multipliers: dict of variables or variable names to floats.
                          If present, gradients for specified
                          variables will be multiplied by given constant.
    clip_gradients: float or `None`, clips gradients by this value.
    moving_average_decay: Deprecated. float or None, takes into account previous
                          loss to make learning smoother due to outliers.
    learning_rate_decay_fn: function, takes `learning_rate` and `global_step`
                            `Tensor`s, returns `Tensor`.
                            Can be used to implement any learning rate decay
                            functions.
                            For example: tf.train.exponential_decay.
    update_ops: list of update `Operation`s to execute at each step. If `None`,
                uses elements of UPDATE_OPS collection.
    variables: list of variables to optimize or
               `None` to use all trainable variables.
    name: The name for this operation is used to scope operations and summaries.
    summaries: List of internal quantities to visualize on tensorboard. If not
               set only the loss and the learning rate will be reported. The
               complete list is in OPTIMIZER_SUMMARIES.

  Returns:
    Training op.

  Raises:
    ValueError: if optimizer is wrong type.
  """
    with vs.variable_op_scope([loss, global_step], name, "OptimizeLoss"):
        # Update ops take UPDATE_OPS collection if not provided.
        if update_ops is None:
            update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS))
        # Make sure update ops are ran before computing loss.
        if update_ops:
            with ops.control_dependencies(update_ops):
                barrier = control_flow_ops.no_op(name="update_barrier")
            loss = control_flow_ops.with_dependencies([barrier], loss)

        # Moving average of the loss with decay.
        # TODO(b/30439864): moving_average_decay should be removed.
        if moving_average_decay is not None:
            logging.warn("'moving_average_decay' is deprecated. Please use "
                         "tensorboard's builtin averaging instead.")
            # Generate moving averages of the loss.
            loss_averages = train.ExponentialMovingAverage(
                moving_average_decay, name="avg")
            loss_averages_op = loss_averages.apply([loss])
            logging_ops.scalar_summary("loss/mean",
                                       loss_averages.average(loss))
            loss = control_flow_ops.with_dependencies([loss_averages_op], loss)

        # Learning rate variable, with possible decay.
        if (isinstance(learning_rate, ops.Tensor)
                and learning_rate.get_shape().ndims == 0):
            lr = learning_rate
        elif isinstance(learning_rate, float):
            lr = vs.get_variable(
                "learning_rate", [],
                trainable=False,
                initializer=init_ops.constant_initializer(learning_rate))
        else:
            raise ValueError("Learning rate should be 0d Tensor or float. "
                             "Got %s of type %s" %
                             (str(learning_rate), str(type(learning_rate))))
        if summaries is None:
            summaries = ["loss", "learning_rate"]
        if learning_rate_decay_fn is not None:
            lr = learning_rate_decay_fn(lr, global_step)
            if "learning_rate" in summaries:
                logging_ops.scalar_summary("learning_rate", lr)

        # Create optimizer, given specified parameters.
        if isinstance(optimizer, six.string_types):
            if optimizer not in OPTIMIZER_CLS_NAMES:
                raise ValueError(
                    "Optimizer name should be one of [%s], you provided %s." %
                    (", ".join(OPTIMIZER_CLS_NAMES), optimizer))
            opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
        elif isinstance(optimizer, type) and issubclass(
                optimizer, optimizer_.Optimizer):
            opt = optimizer(learning_rate=lr)
        elif isinstance(optimizer, optimizer_.Optimizer):
            opt = optimizer
        else:
            raise ValueError("Unrecognized optimizer: should be string, "
                             "subclass of Optimizer or instance of "
                             "subclass of Optimizer. Got %s." % str(optimizer))

        # All trainable variables, if specific variables are not specified.
        if variables is None:
            variables = vars_.trainable_variables()

        # Compute gradients.
        gradients = opt.compute_gradients(loss, variables)

        # Optionally add gradient noise.
        if gradient_noise_scale is not None:
            gradients = _add_scaled_noise_to_gradients(gradients,
                                                       gradient_noise_scale)

        # Multiply some gradients.
        if gradient_multipliers is not None:
            gradients = _multiply_gradients(gradients, gradient_multipliers)

        # Optionally clip gradients by global norm.
        if clip_gradients is not None:
            gradients = _clip_gradients_by_norm(gradients, clip_gradients)

        # Add scalar summary for loss.
        if "loss" in summaries:
            logging_ops.scalar_summary("loss", loss)

        # Add histograms for variables, gradients and gradient norms.
        for gradient, variable in gradients:
            if isinstance(gradient, ops.IndexedSlices):
                grad_values = gradient.values
            else:
                grad_values = gradient

            if grad_values is not None:
                if "gradients" in summaries:
                    logging_ops.histogram_summary(variable.name + "/gradients",
                                                  grad_values)
                if "gradient_norm" in summaries:
                    logging_ops.histogram_summary(
                        variable.name + "/gradient_norm",
                        clip_ops.global_norm([grad_values]))

        # Create gradient updates.
        grad_updates = opt.apply_gradients(gradients,
                                           global_step=global_step,
                                           name="train")
        # Make sure total_loss is valid.
        final_loss = array_ops.check_numerics(loss, "Loss is inf or nan")

        # Ensure the train_tensor computes grad_updates.
        train_tensor = control_flow_ops.with_dependencies([grad_updates],
                                                          final_loss)

        return train_tensor
コード例 #40
0
ファイル: model_functions.py プロジェクト: thoklei/rnn_memory
def get_rnn_cell(cell_type, config):
    if (cell_type == 'rnn'):
        cell = tf.contrib.rnn.BasicRNNCell(config.layer_dim,
                                           dtype=config.dtype)
    elif (cell_type == 'multi_rnn'):
        cell = tf.nn.rnn_cell.MultiRNNCell([
            tf.contrib.rnn.BasicRNNCell(config.layer_dim, dtype=config.dtype)
            for _ in range(4)
        ])
    elif (cell_type == 'lstm'):
        cell = tf.contrib.rnn.BasicLSTMCell(config.layer_dim,
                                            dtype=config.dtype)
    elif (cell_type == 'multi_lstm'):
        cell = tf.nn.rnn_cell.MultiRNNCell([
            tf.contrib.rnn.DropoutWrapper(
                tf.nn.rnn_cell.LSTMCell(config.layer_dim, dtype=config.dtype),
                output_keep_prob=config.dropout_keep_prob) for _ in range(2)
        ])
    elif (cell_type == 'irnn'):
        cell = IRNNCell(config.layer_dim, dtype=config.dtype)
    elif (cell_type == 'multi_irnn'):
        cell = tf.nn.rnn_cell.MultiRNNCell(
            [IRNNCell(config.layer_dim, dtype=config.dtype) for _ in range(4)])
    elif (cell_type == 'fast_weights'):
        cell = FastWeightCell(num_units=config.layer_dim,
                              lam=config.fw_lambda,
                              eta=config.fw_eta,
                              layer_norm=config.fw_layer_norm,
                              norm_gain=config.norm_gain,
                              norm_shift=config.norm_shift,
                              activation=config.fw_activation,
                              dtype=config.dtype)
    elif (cell_type == 'multi_fw'):
        cell = tf.nn.rnn_cell.MultiRNNCell([
            FastWeightCell(num_units=config.layer_dim,
                           lam=config.fw_lambda,
                           eta=config.fw_eta,
                           layer_norm=config.fw_layer_norm,
                           norm_gain=config.norm_gain,
                           norm_shift=config.norm_shift,
                           activation=tf.nn.relu,
                           dtype=config.dtype,
                           kernel_initializer=init_ops.constant_initializer(
                               value=np.concatenate(
                                   (np.random.normal(loc=0.0,
                                                     scale=0.001,
                                                     size=(config.input_dim,
                                                           config.layer_dim)),
                                    np.identity(config.layer_dim)), 0),
                               dtype=config.dtype))
            for _ in range(config.layers)
        ])
    elif (cell_type == 'identity_fw'):
        cell = FastWeightCell(
            num_units=config.layer_dim,
            lam=config.fw_lambda,
            eta=config.fw_eta,
            layer_norm=config.fw_layer_norm,
            norm_gain=config.norm_gain,
            norm_shift=config.norm_shift,
            activation=tf.nn.tanh,
            dtype=config.dtype,
            kernel_initializer=init_ops.constant_initializer(
                value=np.concatenate((np.random.normal(
                    loc=0.0,
                    scale=0.001,
                    size=(config.input_dim, config.layer_dim)),
                                      np.identity(config.layer_dim)), 0),
                dtype=config.dtype))
    elif (cell_type == 'hybrid_front'):
        first_cell = FastWeightCell(
            num_units=config.layer_dim,
            lam=config.fw_lambda,
            eta=config.fw_eta,
            layer_norm=config.fw_layer_norm,
            norm_gain=config.norm_gain,
            norm_shift=config.norm_shift,
            activation=tf.nn.relu,
            dtype=config.dtype,
            kernel_initializer=init_ops.constant_initializer(
                value=np.concatenate((np.random.normal(
                    loc=0.0,
                    scale=0.001,
                    size=(config.input_dim, config.layer_dim)),
                                      np.identity(config.layer_dim)), 0),
                dtype=config.dtype))
        cell = tf.nn.rnn_cell.MultiRNNCell([
            first_cell,
            IRNNCell(config.layer_dim),
            IRNNCell(config.layer_dim)
        ])
    elif (cell_type == 'hybrid_back'):
        first_cell = FastWeightCell(
            num_units=config.layer_dim,
            lam=config.fw_lambda,
            eta=config.fw_eta,
            layer_norm=config.fw_layer_norm,
            norm_gain=config.norm_gain,
            norm_shift=config.norm_shift,
            activation=tf.nn.relu,
            dtype=config.dtype,
            kernel_initializer=init_ops.constant_initializer(
                value=np.concatenate((np.random.normal(
                    loc=0.0,
                    scale=0.001,
                    size=(config.input_dim, config.layer_dim)),
                                      np.identity(config.layer_dim)), 0),
                dtype=config.dtype))
        cell = tf.nn.rnn_cell.MultiRNNCell([
            IRNNCell(config.layer_dim),
            IRNNCell(config.layer_dim), first_cell
        ])
    elif (cell_type == 'dynamic_fast_weights'):
        cell = DynamicFastWeightCell(num_units=config.layer_dim,
                                     sequence_length=config.input_length,
                                     lam=config.fw_lambda,
                                     eta=config.fw_eta,
                                     layer_norm=config.fw_layer_norm,
                                     norm_gain=config.norm_gain,
                                     norm_shift=config.norm_shift,
                                     activation=config.fw_activation,
                                     batch_size=config.batchsize,
                                     num_inner_loops=config.fw_inner_loops,
                                     dtype=config.dtype)
    elif (cell_type == 'autoconceptor'):
        cell = Autoconceptor(num_units=config.layer_dim,
                             alpha=config.c_alpha,
                             lam=config.c_lambda,
                             batchsize=config.batchsize,
                             activation=config.c_activation,
                             layer_norm=config.c_layer_norm,
                             dtype=config.dtype)
    else:
        raise ValueError("Cell type not understood.")

    return cell
コード例 #41
0
 def fc(x, num_units_out):
     return layers.Dense(
         num_units_out,
         kernel_initializer=init_ops.constant_initializer(0.1),
         bias_initializer=init_ops.constant_initializer(0.0))(x)
コード例 #42
0
    def __init__(self, params, tree_num, training):
        self.tree = variable_scope.get_variable(
            name=self.get_tree_name('tree', tree_num),
            dtype=dtypes.int32,
            shape=[params.max_nodes, 2],
            initializer=init_ops.constant_initializer(-2))
        self.tree_thresholds = variable_scope.get_variable(
            name=self.get_tree_name('tree_thresholds', tree_num),
            shape=[params.max_nodes],
            initializer=init_ops.constant_initializer(-1.0))
        self.end_of_tree = variable_scope.get_variable(
            name=self.get_tree_name('end_of_tree', tree_num),
            dtype=dtypes.int32,
            initializer=constant_op.constant([1]))
        self.start_epoch = variable_scope.get_variable(
            name=self.get_tree_name('start_epoch', tree_num),
            dtype=dtypes.int32,
            shape=[params.max_nodes],
            initializer=init_ops.constant_initializer(0))

        if training:
            self.node_to_accumulator_map = variable_scope.get_variable(
                name=self.get_tree_name('node_to_accumulator_map', tree_num),
                shape=[params.max_nodes],
                dtype=dtypes.int32,
                initializer=init_ops.constant_initializer(-1))
            self.accumulator_to_node_map = variable_scope.get_variable(
                name=self.get_tree_name('accumulator_to_node_map', tree_num),
                shape=[params.max_fertile_nodes],
                dtype=dtypes.int32,
                initializer=init_ops.constant_initializer(-1))

            self.candidate_split_features = variable_scope.get_variable(
                name=self.get_tree_name('candidate_split_features', tree_num),
                shape=[
                    params.max_fertile_nodes, params.num_splits_to_consider
                ],
                dtype=dtypes.int32,
                initializer=init_ops.constant_initializer(-1))
            self.candidate_split_thresholds = variable_scope.get_variable(
                name=self.get_tree_name('candidate_split_thresholds',
                                        tree_num),
                shape=[
                    params.max_fertile_nodes, params.num_splits_to_consider
                ],
                initializer=init_ops.constant_initializer(0.0))

        # Statistics shared by classification and regression.
        self.node_sums = variable_scope.get_variable(
            name=self.get_tree_name('node_sums', tree_num),
            shape=[params.max_nodes, params.num_output_columns],
            initializer=init_ops.constant_initializer(0.0))

        if training:
            self.candidate_split_sums = variable_scope.get_variable(
                name=self.get_tree_name('candidate_split_sums', tree_num),
                shape=[
                    params.max_fertile_nodes, params.num_splits_to_consider,
                    params.num_output_columns
                ],
                initializer=init_ops.constant_initializer(0.0))
            self.accumulator_sums = variable_scope.get_variable(
                name=self.get_tree_name('accumulator_sums', tree_num),
                shape=[params.max_fertile_nodes, params.num_output_columns],
                initializer=init_ops.constant_initializer(-1.0))

            # Regression also tracks second order stats.
            if params.regression:
                self.node_squares = variable_scope.get_variable(
                    name=self.get_tree_name('node_squares', tree_num),
                    shape=[params.max_nodes, params.num_output_columns],
                    initializer=init_ops.constant_initializer(0.0))

                self.candidate_split_squares = variable_scope.get_variable(
                    name=self.get_tree_name('candidate_split_squares',
                                            tree_num),
                    shape=[
                        params.max_fertile_nodes,
                        params.num_splits_to_consider,
                        params.num_output_columns
                    ],
                    initializer=init_ops.constant_initializer(0.0))

                self.accumulator_squares = variable_scope.get_variable(
                    name=self.get_tree_name('accumulator_squares', tree_num),
                    shape=[
                        params.max_fertile_nodes, params.num_output_columns
                    ],
                    initializer=init_ops.constant_initializer(-1.0))

            else:
                self.node_squares = constant_op.constant(
                    0.0, name=self.get_tree_name('node_squares', tree_num))

                self.candidate_split_squares = constant_op.constant(
                    0.0,
                    name=self.get_tree_name('candidate_split_squares',
                                            tree_num))

                self.accumulator_squares = constant_op.constant(
                    0.0,
                    name=self.get_tree_name('accumulator_squares', tree_num))
コード例 #43
0
def _my_linear(args,
               output_size,
               cell_init="random",
               bias=True,
               bias_start=0.0,
               scope=None):
    """BK: added a new option "cell_init" to _linear.

  args: It is assumed to take the form of [inputs, state]. state can be r*state in case of GRU and h in case of BasicLSTM. 
  output_size: It is usually the same as the state size, or n_units, but in case of LSTM it is 4*n_units. I will assume output_size is always
  an integer multiple of state size.
  """

    if args is None or (nest.is_sequence(args) and not args):
        raise ValueError("`args` must be specified")
    if not nest.is_sequence(args):
        args = [args]

    # Calculate the total size of arguments on dimension 1.
    arg_sizes = []
    shapes = [a.get_shape().as_list() for a in args]
    for shape in shapes:
        if len(shape) != 2:
            raise ValueError("Linear is expecting 2D arguments: %s" %
                             str(shapes))
        if not shape[1]:
            raise ValueError("Linear expects shape[1] of arguments: %s" %
                             str(shapes))
        else:
            arg_sizes += [shape[1]]

    dtype = [a.dtype for a in args][0]

    #Check if output_size is an integer multiple of state size.
    if (output_size % arg_sizes[1]) != 0:
        raise ValueError("output_size must be an integer multiple of n_units.")
    r = int(
        output_size / arg_sizes[1]
    )  #Even though both numerator and denominator are integers, their ratio is float type. So, we need to cast it to int.

    total_arg_size = sum(arg_sizes)
    # Now the computation.
    with vs.variable_scope(scope or "Linear"):
        if cell_init == "random":
            with vs.variable_scope(scope or "random"):
                matrix = vs.get_variable("Matrix",
                                         [total_arg_size, output_size],
                                         dtype=dtype)
                if len(args) == 1:
                    res = math_ops.matmul(args[0], matrix)
                else:
                    res = math_ops.matmul(array_ops.concat(1, args), matrix)
                if not bias:
                    return res
                bias_term = vs.get_variable(
                    "Bias", [output_size],
                    dtype=dtype,
                    initializer=init_ops.constant_initializer(bias_start,
                                                              dtype=dtype))
                return res + bias_term
        elif cell_init == "identity":
            with vs.variable_scope(scope or "identity"):
                #Below is an obsolete code that didn't concatenate matrices to make computations faster.

                # inputs_matrix = vs.get_variable("Inputs_Matrix", [arg_sizes[0], output_size], dtype=dtype)
                # if arg_sizes[1] == output_size:
                #   init = tf.constant_initializer(np.identity(output_size))
                #   state_matrix = vs.get_variable("State_Matrix", [output_size, output_size], initializer=init, dtype=dtype)
                # else:
                #   raise ValueError("state size and output size don't match.")
                # #Both inputs_result and state_results are tensors of shape (n_batch, output_size)
                # inputs_result = math_ops.matmul(args[0], inputs_matrix)
                # state_result = math_ops.matmul(args[1], state_matrix)
                # res = inputs_result + state_result

                list_id = [np.identity(arg_sizes[1]) for _ in range(r)]
                concat_id = np.concatenate(list_id,
                                           1)  # (arg_sizes[1], output_size)

                epsilon = np.sqrt(6.0 / (arg_sizes[0] + output_size))
                xavier_part = np.random.uniform(-epsilon, epsilon,
                                                (arg_sizes[0], output_size))

                total_init_matrix = np.concatenate(
                    [xavier_part, concat_id],
                    0)  # (arg_sizes[0]+arg_sizes[1], output_size)
                init = tf.constant_initializer(total_init_matrix)

                my_matrix = vs.get_variable("my_matrix",
                                            [total_arg_size, output_size],
                                            initializer=init,
                                            dtype=dtype)
                if len(args) == 1:
                    res = math_ops.matmul(args[0], my_matrix)
                else:
                    res = math_ops.matmul(array_ops.concat(1, args), my_matrix)

                if not bias:
                    return res
                bias_term = vs.get_variable(
                    "Bias", [output_size],
                    dtype=dtype,
                    initializer=init_ops.constant_initializer(bias_start,
                                                              dtype=dtype))
                return res + bias_term
コード例 #44
0
def _line_sep(args,
              output_size,
              bias,
              bias_initializer=None,
              kernel_initializer=None):
    if args is None or (nest.is_sequence(args) and not args):
        raise ValueError("`args` must be specified")
    if not nest.is_sequence(args):
        args = [args]

    # Calculate the total size of arguments on dimension 1.
    total_arg_size = 0
    shapes = [a.get_shape() for a in args]
    for shape in shapes:
        if shape.ndims != 2:
            raise ValueError("linear is expecting 2D arguments: %s" % shapes)
        if shape[1].value is None:
            raise ValueError("linear expects shape[1] to \
                             be provided for shape %s, "
                             "but saw %s" % (shape, shape[1]))
        else:
            total_arg_size += shape[1].value

    dtype = [a.dtype for a in args][0]

    # Now the computation.
    scope = vs.get_variable_scope()
    with vs.variable_scope(scope) as outer_scope:
        [x, h] = args

        x_size = x.get_shape().as_list()[1]
        W_xh = tf.get_variable(
            'W_xh', [x_size, h_size * 4], initializer=weights_initializer
           )
        W_ih = tf.get_variable(
            'W_ih', [h_size, h_size], initializer=weights_initializer
            )
        W_jh = tf.get_variable(
            'W_jh', [h_size, h_size], initializer=weights_initializer
            )
        W_fh = tf.get_variable(
            'W_fh', [h_size, h_size], initializer=weights_initializer
            )
        W_oh = tf.get_variable(
            'W_oh', [h_size, h_size], initializer=weights_initializer
            )

        xh = tf.matmul(x, W_xh)

        ih = tf.matmul(h, W_ih) + cn_xh[:, :h_size]
        jh = tf.matmul(h, W_jh) + cn_xh[:, h_size:h_size * 2]
        fh = tf.matmul(h, W_fh) + cn_xh[:, h_size * 2:h_size * 3]
        oh = tf.matmul(h, W_oh) + cn_xh[:, h_size * 3:]

        if not bias:
            return ih, jh, fh, oh
        with vs.variable_scope(outer_scope) as inner_scope:
            inner_scope.set_partitioner(None)
            if bias_initializer is None:
                bias_initializer = init_ops.constant_initializer(
                    0.0, dtype=dtype)
            biases = vs.get_variable(
                _BIAS_VARIABLE_NAME, [output_size],
                dtype=dtype,
                initializer=bias_initializer)
        return nn_ops.bias_add(res, biases)
コード例 #45
0
def LastValueQuantize(inputs,
                      per_channel=False,
                      init_min=-6.0,
                      init_max=6.0,
                      vars_collection=ops.GraphKeys.MOVING_AVERAGE_VARIABLES,
                      name_prefix='LastValueQuant',
                      reuse=None,
                      is_training=True,
                      num_bits=8,
                      narrow_range=False):
    """Adds a layer that collects quantization ranges as last input ranges.

  LastValueQuantize creates variables called 'min' and 'max', representing the
  interval used for quantization and clamping.

  Args:
    inputs: a tensor containing values to be quantized.
    per_channel: (Optional) a boolean specifying whether to use different
      quantization ranges per output channel.
    init_min: a float scalar, the initial value for variable min.
    init_max: a float scalar, the initial value for variable max.
    vars_collection: (Optional) collection where to store variables for
      quantization interval ends.
    name_prefix: name_prefix for created nodes.
    reuse: whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    is_training: Whether the op is applied to a training or eval graph.
    num_bits: Number of bits to use for quantization, must be between 2 and 8.
    narrow_range: Whether to use the narrow quantization range
      [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1].
  Returns:
    a tensor containing quantized values.
  """
    with variable_scope.variable_scope(None,
                                       default_name=name_prefix,
                                       values=[inputs],
                                       reuse=reuse) as scope:
        scope.set_partitioner(None)
        input_shape = inputs.get_shape()
        input_dim = len(input_shape)
        if per_channel:
            # Only support quantizing 1-, 2- and 4-dimensional tensors.
            assert input_dim in [1, 2, 4
                                 ], ('Expected 1D, 2D or 4D input, was: %s in '
                                     ' scope: %s' % (input_shape, name_prefix))
            min_max_shape = [input_shape[-1]]
        else:
            min_max_shape = []

        min_var = model_variable(
            'min',
            shape=min_max_shape,
            initializer=init_ops.constant_initializer(init_min),
            collections=[vars_collection],
            trainable=False)
        max_var = model_variable(
            'max',
            shape=min_max_shape,
            initializer=init_ops.constant_initializer(init_max),
            collections=[vars_collection],
            trainable=False)
        if not is_training:
            return _FakeQuantWithMinMaxVars(inputs,
                                            min_var,
                                            max_var,
                                            per_channel=per_channel,
                                            num_bits=num_bits,
                                            narrow_range=narrow_range)

        if per_channel:
            if input_dim == 2:
                reduce_dims = [0]
            elif input_dim == 4:
                reduce_dims = [0, 1, 2]

        if per_channel:
            if input_dim >= 2:
                batch_min = math_ops.reduce_min(inputs,
                                                reduction_indices=reduce_dims,
                                                name='BatchMin')
            else:
                batch_min = inputs
        else:
            batch_min = math_ops.reduce_min(inputs, name='BatchMin')
        # TFLite requires that 0.0 if always in the [min; max] range.
        batch_min = math_ops.minimum(batch_min, 0.0)
        assign_min = state_ops.assign(min_var, batch_min, name='AssignMinLast')

        if per_channel:
            if input_dim >= 2:
                batch_max = math_ops.reduce_max(inputs,
                                                reduction_indices=reduce_dims,
                                                name='BatchMax')
            else:
                batch_max = inputs
        else:
            batch_max = math_ops.reduce_max(inputs, name='BatchMax')
        # TFLite requires that 0.0 if always in the [min; max] range.
        batch_max = math_ops.maximum(batch_max, 0.0)
        assign_max = state_ops.assign(max_var, batch_max, name='AssignMaxLast')

        return _FakeQuantWithMinMaxVars(inputs,
                                        assign_min,
                                        assign_max,
                                        per_channel=per_channel,
                                        num_bits=num_bits,
                                        narrow_range=narrow_range)
コード例 #46
0
ファイル: dau_conv.py プロジェクト: skokec/DAU-ConvNet-TF
    def __init__(self, filters,
                 dau_units,
                 max_kernel_size,
                 strides=1,
                 data_format='channels_first',
                 activation=None,
                 use_bias=True,
                 weight_initializer=init_ops.random_normal_initializer(stddev=0.1),
                 mu1_initializer=None,
                 mu2_initializer=None,
                 sigma_initializer=None,
                 bias_initializer=init_ops.zeros_initializer(),
                 weight_regularizer=None,
                 mu1_regularizer=None,
                 mu2_regularizer=None,
                 sigma_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 weight_constraint=None,
                 mu1_constraint=None,
                 mu2_constraint=None,
                 sigma_constraint=None,
                 bias_constraint=None,
                 trainable=True,
                 mu_learning_rate_factor=500,
                 dau_unit_border_bound=0.01,
                 dau_sigma_trainable=False,
                 name=None,
                 **kwargs):
        super(DAUConv2dTF, self).__init__(trainable=trainable, name=name,
                                    activity_regularizer=activity_regularizer,
                                    **kwargs)
        self.rank = 2
        self.filters = filters
        self.dau_units = utils.normalize_tuple(dau_units, self.rank, 'dau_components')
        self.max_kernel_size = max_kernel_size
        self.padding = np.floor(self.max_kernel_size/2.0)
        self.strides = strides
        self.data_format = utils.normalize_data_format(data_format)
        self.activation = activation
        self.use_bias = use_bias
        self.bias_initializer = bias_initializer
        self.bias_regularizer = bias_regularizer
        self.bias_constraint = bias_constraint

        self.weight_initializer = weight_initializer
        self.weight_regularizer = weight_regularizer
        self.weight_constraint = weight_constraint

        self.mu1_initializer = mu1_initializer
        self.mu1_regularizer = mu1_regularizer
        self.mu1_constraint = mu1_constraint

        self.mu2_initializer = mu2_initializer
        self.mu2_regularizer = mu2_regularizer
        self.mu2_constraint = mu2_constraint

        self.sigma_initializer = sigma_initializer
        self.sigma_regularizer = sigma_regularizer
        self.sigma_constraint = sigma_constraint

        if self.mu1_initializer is None:
            raise Exception("Must initialize MU1")
        if self.mu2_initializer is None:
            raise Exception("Must initialize MU2")

        if self.sigma_initializer is None:
            self.sigma_initializer=init_ops.constant_initializer(0.5)

        self.mu_learning_rate_factor = mu_learning_rate_factor

        self.input_spec = base.InputSpec(ndim=self.rank + 2)

        self.dau_unit_border_bound = dau_unit_border_bound
        self.num_dau_units_all = np.int32(np.prod(self.dau_units))

        self.dau_weights = None
        self.dau_mu1 = None
        self.dau_mu2 = None
        self.dau_sigma = None

        self.dau_sigma_trainable = dau_sigma_trainable
コード例 #47
0
 def initialize_graph(self, input_statistics=None):
   super(StubTimeSeriesModel, self).initialize_graph(
       input_statistics=input_statistics)
   self.prior_var = variable_scope.get_variable(
       "prior", [], initializer=init_ops.constant_initializer(0.))
コード例 #48
0
def overfeat(inputs,
             num_classes=1000,
             is_training=True,
             dropout_keep_prob=0.5,
             spatial_squeeze=True,
             scope='overfeat'):
    """Contains the model definition for the OverFeat network.

  The definition for the network was obtained from:
    OverFeat: Integrated Recognition, Localization and Detection using
    Convolutional Networks
    Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and
    Yann LeCun, 2014
    http://arxiv.org/abs/1312.6229

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 231x231. To use in fully
        convolutional mode, set spatial_squeeze to false.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.

  Returns:
    the last op containing the log predictions and end_points dict.

  """
    with variable_scope.variable_scope(scope, 'overfeat', [inputs]) as sc:
        end_points_collection = sc.name + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d
        with arg_scope(
            [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],
                outputs_collections=end_points_collection):
            net = layers.conv2d(inputs,
                                64, [11, 11],
                                4,
                                padding='VALID',
                                scope='conv1')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool1')
            net = layers.conv2d(net,
                                256, [5, 5],
                                padding='VALID',
                                scope='conv2')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool2')
            net = layers.conv2d(net, 512, [3, 3], scope='conv3')
            net = layers.conv2d(net, 1024, [3, 3], scope='conv4')
            net = layers.conv2d(net, 1024, [3, 3], scope='conv5')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool5')
            with arg_scope(
                [layers.conv2d],
                    weights_initializer=trunc_normal(0.005),
                    biases_initializer=init_ops.constant_initializer(0.1)):
                # Use conv2d instead of fully_connected layers.
                net = layers.conv2d(net,
                                    3072, [6, 6],
                                    padding='VALID',
                                    scope='fc6')
                net = layers_lib.dropout(net,
                                         dropout_keep_prob,
                                         is_training=is_training,
                                         scope='dropout6')
                net = layers.conv2d(net, 4096, [1, 1], scope='fc7')
                net = layers_lib.dropout(net,
                                         dropout_keep_prob,
                                         is_training=is_training,
                                         scope='dropout7')
                net = layers.conv2d(
                    net,
                    num_classes, [1, 1],
                    activation_fn=None,
                    normalizer_fn=None,
                    biases_initializer=init_ops.zeros_initializer(),
                    scope='fc8')
            # Convert end_points_collection into a end_point dict.
            end_points = utils.convert_collection_to_dict(
                end_points_collection)
            if spatial_squeeze:
                net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed')
                end_points[sc.name + '/fc8'] = net
            return net, end_points
コード例 #49
0
ファイル: loss_test.py プロジェクト: evan01/Paw-sitive
    def testSequenceLoss(self):
        with self.test_session() as sess:
            with variable_scope.variable_scope(
                    'root', initializer=init_ops.constant_initializer(0.5)):
                batch_size = 2
                sequence_length = 3
                number_of_classes = 5
                logits = [
                    constant_op.constant(i + 0.5,
                                         shape=[batch_size, number_of_classes])
                    for i in range(sequence_length)
                ]
                logits = array_ops.stack(logits, axis=1)
                targets = [
                    constant_op.constant(i, dtypes.int32, shape=[batch_size])
                    for i in range(sequence_length)
                ]
                targets = array_ops.stack(targets, axis=1)
                weights = [
                    constant_op.constant(1.0, shape=[batch_size])
                    for i in range(sequence_length)
                ]
                weights = array_ops.stack(weights, axis=1)

                average_loss_per_example = loss.sequence_loss(
                    logits,
                    targets,
                    weights,
                    average_across_timesteps=True,
                    average_across_batch=True)
                res = sess.run(average_loss_per_example)
                self.assertAllClose(1.60944, res)

                average_loss_per_sequence = loss.sequence_loss(
                    logits,
                    targets,
                    weights,
                    average_across_timesteps=False,
                    average_across_batch=True)
                res = sess.run(average_loss_per_sequence)
                compare_per_sequence = np.ones((sequence_length)) * 1.60944
                self.assertAllClose(compare_per_sequence, res)

                average_loss_per_batch = loss.sequence_loss(
                    logits,
                    targets,
                    weights,
                    average_across_timesteps=True,
                    average_across_batch=False)
                res = sess.run(average_loss_per_batch)
                compare_per_batch = np.ones((batch_size)) * 1.60944
                self.assertAllClose(compare_per_batch, res)

                total_loss = loss.sequence_loss(logits,
                                                targets,
                                                weights,
                                                average_across_timesteps=False,
                                                average_across_batch=False)
                res = sess.run(total_loss)
                compare_total = np.ones(
                    (batch_size, sequence_length)) * 1.60944
                self.assertAllClose(compare_total, res)
コード例 #50
0
def convolve_inputs(inputs, batch_size, height, width, channels, filters):
    W = get_variable('Weights', [1, 1, 1] + [channels, filters])
    b = get_variable('Biases', [filters],
                     initializer=constant_initializer(0.0))
    y = conv3d(inputs, W, [1] * 5, 'SAME') + b
    return reshape(y, [batch_size, -1, height * width * filters])
コード例 #51
0
ファイル: calib.py プロジェクト: pyjennings/tf_pg
def _InsertCalibOp(context,
                   name,
                   producer,
                   consumers,
                   vars_collection=ops.GraphKeys.GLOBAL_VARIABLES,
                   producer_scope=None,
                   consumer_scope=None):
    """Inserts calibration ops between a producer op and (multiple) consumer ops.
  Args:
    context: Context where producer and consumer operations are nested.
    name: Name for the new calibration op within the context.
    producer: Producer operation of the pairs where calibration will be
      inserted.
    consumers: Consumer operations of the pairs.
    producer_scope: The restriction of producer scope. If not None, the new op
      will be inserted only when the producer is in this scope.
    consumer_scope: The restriction of consumer scope. If not None, the new op
      will be inserted only when all the consumers are in this scope.
  Raises:
    ValueError: When producer operation is not directly connected to the
      consumer operation.
  """
    if producer_scope and not producer.name.startswith(producer_scope):
        logging.info(
            '_InsertCalibOp ignores context="%s" name="%s" '
            'because producer "%s" is not in scope "%s"', context, name,
            producer.name, producer_scope)
        return

    if consumer_scope:
        consumers_in_scope = []
        for consumer in consumers:
            if consumer.name.startswith(consumer_scope):
                consumers_in_scope.append(consumer)
            else:
                logging.info(
                    '_InsertCalibOp context="%s" name="%s" ignores '
                    'consumer "%s" because it is not in scope "%s"', context,
                    name, consumer.name, consumer_scope)
                return
        consumers = consumers_in_scope

    name_prefix = _AddContextToName(context, name)

    name_scope = ops.get_name_scope()
    if name_scope:
        name_prefix = common.DropStringPrefix(name_prefix, name_scope + '/')

    inputs = producer.outputs[0]
    # Prevent ops from being modified multiple times. Bypass ops can sometimes
    # overlap between multiple matches, so we need to ensure that we don't
    # add duplicate calibration operations.
    #if _FollowedByFakeQuant(inputs):
    #  return

    with variable_scope.variable_scope(None,
                                       default_name=name_prefix,
                                       values=[inputs]) as scope:
        # Currently no per channel.
        min_max_shape = []
        vars_collections = [vars_collection] if vars_collection else []
        min_var = _ModelVariable('min',
                                 shape=min_max_shape,
                                 initializer=init_ops.constant_initializer(
                                     float('inf')),
                                 collections=vars_collections,
                                 trainable=False)
        max_var = _ModelVariable(
            'max',
            shape=min_max_shape,
            initializer=init_ops.constant_initializer(-float('inf')),
            collections=vars_collections,
            trainable=False)
        batch_min = math_ops.reduce_min(inputs, name='BatchMin')
        batch_max = math_ops.reduce_max(inputs, name='BatchMax')

        range_min = math_ops.minimum(batch_min,
                                     min_var,
                                     name=name_prefix + '/range_min')
        range_max = math_ops.maximum(batch_max,
                                     max_var,
                                     name=name_prefix + '/range_max')

    return range_min, range_max
コード例 #52
0
    def _batchnorm(self, input_x, scope, \
                   gamma_value, beta_value,\
                   moving_mean_value, moving_variance_value,\
                   is_training):
        """ Wrapper function for batch normalization. """
        with variable_scope.variable_scope(scope):
            gamma_initial = init_ops.constant_initializer(
                gamma_value, dtypes.float32)
            gamma = gap_finetune.get_variable(\
                      name='gamma',
                      shape=gamma_value.shape,
                      dtype=dtypes.float32,
                      initializer=gamma_initial,
                      gap=self.gap,
                      gap_vars=self.gap_vars)
            beta_initial = init_ops.constant_initializer(
                beta_value, dtypes.float32)
            beta = gap_finetune.get_variable(\
                      name='beta',
                      shape=beta_value.shape,
                      dtype=dtypes.float32,
                      initializer=beta_initial,
                      gap=self.gap,
                      gap_vars=self.gap_vars)
            moving_mean_initial = init_ops.constant_initializer(\
                      moving_mean_value,
                      dtypes.float32)
            moving_mean = gap_finetune.get_variable(\
                      name='moving_mean',
                      shape=moving_mean_value.shape,
                      dtype=dtypes.float32,
                      initializer=moving_mean_initial,
                      gap=self.gap,
                      gap_vars=self.gap_vars)
            moving_variance_initial = init_ops.constant_initializer(\
                      moving_variance_value,
                      dtypes.float32)
            moving_variance = gap_finetune.get_variable(\
                      name='moving_variance',
                      shape=moving_variance_value.shape,
                      dtype=dtypes.float32,
                      initializer=moving_variance_initial,
                      gap=self.gap,
                      gap_vars=self.gap_vars)

            def mean_var_with_update():
                mean, variance = nn_impl.moments(input_x, [0, 1, 2],
                                                 name='moments')
                with ops.control_dependencies([\
                    moving_averages.assign_moving_average(\
                                  moving_mean, mean, 0.9),
                    moving_averages.assign_moving_average(\
                                  moving_variance, variance, 0.9)]):
                    return array_ops.identity(mean), array_ops.identity(
                        variance)

            mean, variance = control_flow_ops.cond(is_training, \
                                  mean_var_with_update, \
                                  lambda: (moving_mean, moving_variance))

            out = nn_impl.batch_normalization(input_x, mean, variance, beta,
                                              gamma, 0.001)
            return out
コード例 #53
0
 def _create_slots(self, var_list):
     for var in var_list:
         dtype = var.dtype.base_dtype
         init = init_ops.constant_initializer(
             self._initial_accumulator_value, dtype=dtype)
         self.add_slot(var, 'accumulator', init)
コード例 #54
0
def alexnet_v2(inputs,
               is_training=True,
               dropout_keep_prob=0.5,
               scope='alexnet_v2'):
    """Modified version of AlexNet version 2 with a deconvolutional expanding
  path for semantic segmentation.

  Described in: http://arxiv.org/pdf/1404.5997v2.pdf

  Note: All the fully_connected layers have been transformed to conv2d layers.

  Args:
    inputs: a tensor of size [batch_size, 227, 227, 3].
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    scope: Optional scope for the variables.

  Returns:
    The last layer containing a segmentation map of an image.
  """

    net = layers.conv2d(inputs,
                        96, [11, 11],
                        4,
                        padding='VALID',
                        scope='conv1')
    net = layers.conv2d(net, 192, 3, 2, padding='VALID', scope='pconv1')
    net = layers.conv2d(net, 192, [5, 5], padding='VALID', scope='conv2')
    net = layers.conv2d(net, 384, 3, 2, padding='VALID', scope='pconv2')
    net = layers.conv2d(net, 384, [3, 3], padding='VALID', scope='conv3')
    net = layers.conv2d(net, 384, [3, 3], padding='VALID', scope='conv4')
    net = layers.conv2d(net, 256, [3, 3], padding='VALID', scope='conv5')

    # Convolution net
    with arg_scope([layers.conv2d],
                   weights_initializer=trunc_normal(0.005),
                   biases_initializer=init_ops.constant_initializer(0.1)):

        net = layers.conv2d(net, 4096, [5, 5], padding='VALID', scope='fc6')
        net = layers_lib.dropout(net,
                                 dropout_keep_prob,
                                 is_training=is_training,
                                 scope='dropout6')
        net = layers.conv2d(net, 4096, [1, 1], scope='fc7')
        net = layers_lib.dropout(net,
                                 dropout_keep_prob,
                                 is_training=is_training,
                                 scope='dropout7')
        net = layers.conv2d(
            net,
            2,
            [1, 1],  # Prediction is either 'car' or 'background' for Carvana.
            padding='VALID',
            activation_fn=tf.nn.sigmoid,
            biases_initializer=init_ops.zeros_initializer(),
            scope='fc8')

    # Deconvolution net
    with arg_scope([layers.conv2d_transpose],
                   padding='VALID',
                   activation_fn=nn_ops.relu):
        net = layers.conv2d_transpose(net, 4096, 1, scope='convt9')
        net = layers.conv2d_transpose(net, 4096, 1, scope='convt10')
        net = layers.conv2d_transpose(net, 256, 5, scope='convt11')
        net = layers.conv2d_transpose(net, 384, 3, scope='convt12')
        net = layers.conv2d_transpose(net, 384, 3, scope='convt13')
        net = layers.conv2d_transpose(net, 384, 3, scope='convt14')
        net = layers.conv2d_transpose(net, 192, 3, 2, scope='convt15')
        net = layers.conv2d_transpose(net, 192, 5, scope='convt16')
        net = layers.conv2d_transpose(net, 96, 3, 2, scope='convt17')
        net = layers.conv2d_transpose(net,
                                      2,
                                      11,
                                      4,
                                      activation_fn=tf.nn.sigmoid,
                                      scope='convt18')

    return net
コード例 #55
0
  def __init__(self, hparams, item, cluster, controller_id=0):
    """HierarchicalController class initializer.

    Args:
      hparams: All hyper-parameters.
      item: The metagraph to place.
      cluster: The cluster of hardware devices to optimize for.
      controller_id: the id of the controller in a multi-controller setup.
    """
    super(HierarchicalController, self).__init__(item, cluster)
    self.ctrl_id = controller_id
    self.hparams = hparams

    if self.hparams.num_groups is None:
      self.num_groups = min(256, 20 * self.num_devices)
    else:
      self.num_groups = self.hparams.num_groups

    # creates self.op_embeddings and self.type_dict
    self.create_op_embeddings(verbose=False)
    # TODO(azalia) clean up embedding/group_embedding_size names
    self.group_emb_size = (
        2 * self.num_groups + len(self.type_dict) +
        self.hparams.max_num_outputs * self.hparams.max_output_size)
    self.embedding_size = self.group_emb_size
    self.initializer = init_ops.glorot_uniform_initializer(
        seed=self.hparams.seed)

    with variable_scope.variable_scope(
        self.hparams.name,
        initializer=self.initializer,
        reuse=variable_scope.AUTO_REUSE):
      # define parameters of feedforward
      variable_scope.get_variable("w_grouping_ff", [
          1 + self.hparams.max_num_outputs * self.hparams.max_output_size +
          self.hparams.adj_embed_dim, self.hparams.grouping_hidden_size
      ])
      variable_scope.get_variable(
          "w_grouping_softmax",
          [self.hparams.grouping_hidden_size, self.num_groups])
      if self.hparams.bi_lstm:
        variable_scope.get_variable("encoder_lstm_forward", [
            self.embedding_size + self.hparams.hidden_size // 2,
            2 * self.hparams.hidden_size
        ])
        variable_scope.get_variable("encoder_lstm_backward", [
            self.embedding_size + self.hparams.hidden_size // 2,
            2 * self.hparams.hidden_size
        ])
        variable_scope.get_variable(
            "device_embeddings", [self.num_devices, self.hparams.hidden_size])
        variable_scope.get_variable(
            "decoder_lstm",
            [2 * self.hparams.hidden_size, 4 * self.hparams.hidden_size])
        variable_scope.get_variable(
            "device_softmax", [2 * self.hparams.hidden_size, self.num_devices])
        variable_scope.get_variable("device_go_embedding",
                                    [1, self.hparams.hidden_size])
        variable_scope.get_variable(
            "encoder_forget_bias",
            shape=1,
            dtype=dtypes.float32,
            initializer=init_ops.constant_initializer(
                self.hparams.forget_bias_init))
        variable_scope.get_variable(
            "decoder_forget_bias",
            shape=1,
            dtype=dtypes.float32,
            initializer=init_ops.constant_initializer(
                self.hparams.forget_bias_init))
        variable_scope.get_variable(
            "attn_w_1", [self.hparams.hidden_size, self.hparams.hidden_size])
        variable_scope.get_variable(
            "attn_w_2", [self.hparams.hidden_size, self.hparams.hidden_size])
        variable_scope.get_variable("attn_v", [self.hparams.hidden_size, 1])

      else:
        variable_scope.get_variable("encoder_lstm", [
            self.embedding_size + self.hparams.hidden_size,
            4 * self.hparams.hidden_size
        ])
        variable_scope.get_variable(
            "device_embeddings", [self.num_devices, self.hparams.hidden_size])
        variable_scope.get_variable(
            "decoder_lstm",
            [2 * self.hparams.hidden_size, 4 * self.hparams.hidden_size])
        variable_scope.get_variable(
            "device_softmax", [2 * self.hparams.hidden_size, self.num_devices])
        variable_scope.get_variable("device_go_embedding",
                                    [1, self.hparams.hidden_size])
        variable_scope.get_variable(
            "encoder_forget_bias",
            shape=1,
            dtype=dtypes.float32,
            initializer=init_ops.constant_initializer(
                self.hparams.forget_bias_init))
        variable_scope.get_variable(
            "decoder_forget_bias",
            shape=1,
            dtype=dtypes.float32,
            initializer=init_ops.constant_initializer(
                self.hparams.forget_bias_init))
        variable_scope.get_variable(
            "attn_w_1", [self.hparams.hidden_size, self.hparams.hidden_size])
        variable_scope.get_variable(
            "attn_w_2", [self.hparams.hidden_size, self.hparams.hidden_size])
        variable_scope.get_variable("attn_v", [self.hparams.hidden_size, 1])
    seq2seq_input_layer = array_ops.placeholder_with_default(
        array_ops.zeros([self.hparams.num_children,
                         self.num_groups,
                         self.group_emb_size],
                        dtypes.float32),
        shape=(self.hparams.num_children, self.num_groups, self.group_emb_size))
    self.seq2seq_input_layer = seq2seq_input_layer
コード例 #56
0
ファイル: optimizers.py プロジェクト: xjump/tensorflow-cl
def optimize_loss(loss,
                  global_step,
                  learning_rate,
                  optimizer,
                  gradient_noise_scale=None,
                  gradient_multipliers=None,
                  clip_gradients=None,
                  learning_rate_decay_fn=None,
                  update_ops=None,
                  variables=None,
                  name=None,
                  summaries=None,
                  colocate_gradients_with_ops=False):
    """Given loss and parameters for optimizer, returns a training op.

  Various ways of passing optimizers, include:

  - string, name of the optimizer like 'SGD', 'Adam', see OPTIMIZER_CLS_NAMES
      for full list. E.g. `optimize_loss(..., optimizer='Adam')`.
  - function, takes learning rate `Tensor` as argument and must return
      `Optimizer` instance. E.g. `optimize_loss(...,
      optimizer=lambda lr: tf.train.MomentumOptimizer(lr, momentum=0.5))`.
    Alternatively, if `learning_rate` is `None`, the function takes no
    arguments. E.g. `optimize_loss(..., learning_rate=None,
      optimizer=lambda: tf.train.MomentumOptimizer(0.5, momentum=0.5))`.
  - class, subclass of `Optimizer` that takes only one required argument -
      learning rate, such as AdamOptimizer, AdagradOptimizer.
      E.g. `optimize_loss(..., optimizer=tf.train.AdagradOptimizer)`.
  - object, instance of subclass of `Optimizer`.
      E.g., `optimizer_loss(..., optimizer=tf.train.AdagradOptimizer(0.5))`.

  Args:
    loss: Tensor, 0 dimensional.
    global_step: Tensor, step counter for each update.
    learning_rate: float or Tensor, magnitude of update per each training step.
    optimizer: string, class or optimizer instance, used as trainer.
               string should be name of optimizer, like 'SGD',
                 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant.
               class should be sub-class of `tf.Optimizer` that implements
                 `compute_gradients` and `apply_gradients` functions.
               optimizer instance should be instantiation of `tf.Optimizer`
                 sub-class and have `compute_gradients` and `apply_gradients`
                 functions.
    gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this
                          value.
    gradient_multipliers: dict of variables or variable names to floats.
                          If present, gradients for specified
                          variables will be multiplied by given constant.
    clip_gradients: float or `None`, clips gradients by this value.
    learning_rate_decay_fn: function, takes `learning_rate` and `global_step`
                            `Tensor`s, returns `Tensor`.
                            Can be used to implement any learning rate decay
                            functions.
                            For example: `tf.train.exponential_decay`.
    update_ops: list of update `Operation`s to execute at each step. If `None`,
                uses elements of UPDATE_OPS collection. The order of execution
                between `update_ops` and `loss` is non-deterministic.
    variables: list of variables to optimize or
               `None` to use all trainable variables.
    name: The name for this operation is used to scope operations and summaries.
    summaries: List of internal quantities to visualize on tensorboard. If not
               set only the loss and the learning rate will be reported. The
               complete list is in OPTIMIZER_SUMMARIES.
    colocate_gradients_with_ops: If True, try colocating gradients with the 
                                 corresponding op.

  Returns:
    Training op.

  Raises:
    ValueError: if optimizer is wrong type.
  """
    with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]):
        # Update ops take UPDATE_OPS collection if not provided.
        if update_ops is None:
            update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS))
        # Make sure update ops are ran before computing loss.
        if update_ops:
            loss = control_flow_ops.with_dependencies(list(update_ops), loss)

        # Learning rate variable, with possible decay.
        lr = None
        if learning_rate is not None:
            if (isinstance(learning_rate, ops.Tensor)
                    and learning_rate.get_shape().ndims == 0):
                lr = learning_rate
            elif isinstance(learning_rate, float):
                lr = vs.get_variable(
                    "learning_rate", [],
                    trainable=False,
                    initializer=init_ops.constant_initializer(learning_rate))
            else:
                raise ValueError(
                    "Learning rate should be 0d Tensor or float. "
                    "Got %s of type %s" %
                    (str(learning_rate), str(type(learning_rate))))
        if summaries is None:
            summaries = ["loss", "learning_rate"]
        if learning_rate is not None and learning_rate_decay_fn is not None:
            lr = learning_rate_decay_fn(lr, global_step)
            if "learning_rate" in summaries:
                logging_ops.scalar_summary("learning_rate", lr)

        # Create optimizer, given specified parameters.
        if isinstance(optimizer, six.string_types):
            if lr is None:
                raise ValueError(
                    "Learning rate is None, but should be specified if "
                    "optimizer is string (%s)." % optimizer)
            if optimizer not in OPTIMIZER_CLS_NAMES:
                raise ValueError(
                    "Optimizer name should be one of [%s], you provided %s." %
                    (", ".join(OPTIMIZER_CLS_NAMES), optimizer))
            opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
        elif (isinstance(optimizer, type)
              and issubclass(optimizer, optimizer_.Optimizer)):
            if lr is None:
                raise ValueError(
                    "Learning rate is None, but should be specified if "
                    "optimizer is class (%s)." % optimizer)
            opt = optimizer(learning_rate=lr)
        elif isinstance(optimizer, optimizer_.Optimizer):
            opt = optimizer
        elif callable(optimizer):
            if learning_rate is not None:
                opt = optimizer(lr)
            else:
                opt = optimizer()
            if not isinstance(opt, optimizer_.Optimizer):
                raise ValueError(
                    "Unrecognized optimizer: function should return "
                    "subclass of Optimizer. Got %s." % str(opt))
        else:
            raise ValueError(
                "Unrecognized optimizer: should be string, "
                "subclass of Optimizer, instance of "
                "subclass of Optimizer or function with one argument. "
                "Got %s." % str(optimizer))

        # All trainable variables, if specific variables are not specified.
        if variables is None:
            variables = vars_.trainable_variables()

        # Compute gradients.
        gradients = opt.compute_gradients(
            loss,
            variables,
            colocate_gradients_with_ops=colocate_gradients_with_ops)

        # Optionally add gradient noise.
        if gradient_noise_scale is not None:
            gradients = _add_scaled_noise_to_gradients(gradients,
                                                       gradient_noise_scale)

        # Multiply some gradients.
        if gradient_multipliers is not None:
            gradients = _multiply_gradients(gradients, gradient_multipliers)

        # Optionally clip gradients by global norm.
        if clip_gradients is not None:
            gradients = _clip_gradients_by_norm(gradients, clip_gradients)

        # Add scalar summary for loss.
        if "loss" in summaries:
            logging_ops.scalar_summary("loss", loss)

        # Add histograms for variables, gradients and gradient norms.
        for gradient, variable in gradients:
            if isinstance(gradient, ops.IndexedSlices):
                grad_values = gradient.values
            else:
                grad_values = gradient

            if grad_values is not None:
                if "gradients" in summaries:
                    logging_ops.histogram_summary(variable.name + "/gradients",
                                                  grad_values)
                if "gradient_norm" in summaries:
                    logging_ops.histogram_summary(
                        variable.name + "/gradient_norm",
                        clip_ops.global_norm([grad_values]))

        # Create gradient updates.
        grad_updates = opt.apply_gradients(gradients,
                                           global_step=global_step,
                                           name="train")

        # Ensure the train_tensor computes grad_updates.
        train_tensor = control_flow_ops.with_dependencies([grad_updates], loss)

        return train_tensor
コード例 #57
0
            N.TimeDelayedConv(n_new_features=512, n_time_context=5),
            N.TimeDelayedConv(n_new_features=512, n_time_context=5),
            N.TimeDelayedConv(n_new_features=512, n_time_context=7),
            N.Dense(512),
            N.BatchNorm(),
            N.Dense(1500),
            N.BatchNorm(),
            N.StatsPool(axes=1, output_mode='concat'),
            N.Flatten(outdim=2),
            N.Dense(512, name="LatentOutput"),
            N.BatchNorm(),
            N.Dense(512),
            N.BatchNorm(),
            N.Dense(n_speakers,
                    activation=K.linear,
                    b_init=init_ops.constant_initializer(value=0))
        ],
                           debug=1)
# ====== create outputs ====== #
y_logit = x_vec(X)
y_proba = tf.nn.softmax(y_logit)
z = K.ComputationGraph(y_proba).get(roles=N.Dense,
                                    scope='LatentOutput',
                                    beginning_scope=False)[0]
print('Latent space:', ctext(z, 'cyan'))
# ====== create loss ====== #
ce = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=y_logit)
acc = K.metrics.categorical_accuracy(y_true=y, y_pred=y_proba)
# ====== params and optimizing ====== #
updates = K.optimizers.Adam(lr=0.0001, name='XAdam').minimize(
    loss=ce,
コード例 #58
0
clockwork_mask = tf.constant(mask_tril, dtype=tf.float32, name="mask")
#clockwork_mask = tf.constant(mask_triu, dtype=tf.float32, name="mask")

# define paramaters
with tf.variable_scope("input"):
    input_w = tf.get_variable("i_w", [n_input, n_hidden])
    input_b = tf.get_variable("i_b", [n_hidden])

with tf.variable_scope("hidden"):
    hidden_w = tf.get_variable("h_w", [n_hidden, n_hidden])
    hidden_b = tf.get_variable("h_b", [n_hidden])

with tf.variable_scope("bias_all"):
    bias_all = tf.get_variable("b_all", [n_hidden],
                               initializer=init_ops.constant_initializer(0.0))

output_w = {
    'out_w': tf.Variable(tf.random_normal([n_hidden, n_classes]))
    #'out_w': tf.get_variable("o_w",[n_hidden, n_classes])
}
output_b = {
    'out_b': tf.Variable(tf.random_normal([n_classes]))
    #'out_b': tf.get_variable("o_b",[n_classes])
}


# Construct network
#def RNN(x,state):
def RNN(x, output_w, output_b):
    x = tf.transpose(x, [1, 0, 2])
コード例 #59
0
def linear(args,
           output_size,
           bias,
           bias_initializer=None,
           kernel_initializer=None):
    """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
      Args:
        args: a 2D Tensor or a list of 2D, batch x n, Tensors.
        output_size: int, second dimension of W[i].
        bias: boolean, whether to add a bias term or not.
        bias_initializer: starting value to initialize the bias
          (default is all zeros).
        kernel_initializer: starting value to initialize the weight.
      Returns:
        A 2D Tensor with shape [batch x output_size] equal to
        sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
      Raises:
        ValueError: if some of the arguments has unspecified or wrong shape.
    """
    if args is None or (nest.is_sequence(args) and not args):
        raise ValueError("`args` must be specified")
    if not nest.is_sequence(args):
        args = [args]

    # Calculate the total size of arguments on dimension 1.
    total_arg_size = 0
    shapes = [a.get_shape() for a in args]
    for shape in shapes:
        if shape.ndims != 2:
            raise ValueError("linear is expecting 2D arguments: %s" % shapes)
        if shape[1].value is None:
            raise ValueError(
                "linear expects shape[1] to be provided for shape %s, "
                "but saw %s" % (shape, shape[1]))
        else:
            total_arg_size += shape[1].value

    dtype = [a.dtype for a in args][0]

    # Now the computation.
    scope = vs.get_variable_scope()
    with vs.variable_scope(scope) as outer_scope:
        weights = vs.get_variable(_WEIGHTS_VARIABLE_NAME,
                                  [total_arg_size, output_size],
                                  dtype=dtype,
                                  initializer=kernel_initializer)
        if len(args) == 1:
            res = math_ops.matmul(args[0], weights)
        else:
            res = math_ops.matmul(array_ops.concat(args, 1), weights)
        if not bias:
            return res
        with vs.variable_scope(outer_scope) as inner_scope:
            inner_scope.set_partitioner(None)
            if bias_initializer is None:
                bias_initializer = init_ops.constant_initializer(0.0,
                                                                 dtype=dtype)
            biases = vs.get_variable(_BIAS_VARIABLE_NAME, [output_size],
                                     dtype=dtype,
                                     initializer=bias_initializer)
        return nn_ops.bias_add(res, biases)
コード例 #60
0
    def build(self, input_shape):
        """Create variables of the Cudnn RNN.

    It can be called manually before `__call__()` or automatically through
    `__call__()`. In the former case, subsequent `__call__()`s will skip
    creating variables.
    Args:
      input_shape: network input tensor shape, a python list or a TensorShape
        object with 3 dimensions.
    Raises:
      ValueError: if input_shape has wrong dimension or unknown 3rd dimension.
    """
        if self.built:
            return

        input_shape = tensor_shape.TensorShape(input_shape)
        if input_shape.ndims != 3:
            raise ValueError("Expecting input_shape with 3 dims, got %d" %
                             input_shape.ndims)
        if input_shape[-1].value is None:
            raise ValueError("The last dimension of the inputs to `CudnnRNN` "
                             "should be defined. Found `None`.")
        self._input_size = input_shape[-1].value
        self.input_spec = base_layer.InputSpec(ndim=3,
                                               axes={-1: self._input_size})

        self._set_scope(None)

        # Not using base class `add_variable()` since the it calls
        # `tf.get_variable()` with a callable initializer whereas here with a
        # tensor. The difference is mandated to support forward-compatibility with
        # Cudnn.
        with vs.variable_scope(self._scope,
                               reuse=self.built,
                               custom_getter=self._update_trainable_weights):
            if self._kernel_initializer is None:
                self._kernel_initializer = init_ops.glorot_uniform_initializer(
                    seed=self._seed, dtype=self._plain_dtype)
            if self._bias_initializer is None:
                self._bias_initializer = init_ops.constant_initializer(
                    0.0, dtype=self._plain_dtype)

            weights = [
                self._kernel_initializer(sp, dtype=self._plain_dtype)
                for sp in self.canonical_weight_shapes
            ]
            biases = [
                self._bias_initializer(sp, dtype=self._plain_dtype)
                for sp in self.canonical_bias_shapes
            ]
            opaque_params_t = self._canonical_to_opaque(weights, biases)

            if vs.get_variable_scope().partitioner is not None:
                logging.warn(
                    "Partitioner is not supported for Cudnn RNN layer variables, using "
                    "it will create forward-compatibility issues with future "
                    "CUDA/CuDNN generations.")
            # Initialize opaque params with a tensor.
            self.kernel = vs.get_variable("opaque_kernel",
                                          dtype=self._plain_dtype,
                                          initializer=opaque_params_t,
                                          validate_shape=False)
        # Create saveable in the outer scope of the cudnn subgraph, such that
        # alternative subgraph with platform-independent rnn cells can load the
        # checkpoints directly.
        if not (self.built or vs.get_variable_scope().reuse is True):
            self._create_saveable()
        self.built = True