コード例 #1
0
 def testUnknownUnconnectedGradientsValueGiven(self):
   with ops.Graph().as_default():
     x = constant(1.0)
     y = constant(1.0)
     with self.assertRaisesRegexp(
         ValueError, "Unknown value for unconnected_gradients: 'nonsense'"):
       gradients.gradients([y], [x], unconnected_gradients="nonsense")
コード例 #2
0
ファイル: gradients_test.py プロジェクト: didukhle/tensorflow
 def testRealOnly(self):
   x = constant_op.constant(7+3j, dtype=dtypes.complex64)
   y = math_ops.square(x)
   with self.assertRaisesRegexp(
       TypeError,
       r"Gradients of complex tensors must set grad_ys "
       r"\(y\.dtype = tf\.complex64\)"):
     gradients.gradients(y, x)
コード例 #3
0
ファイル: gradients_test.py プロジェクト: didukhle/tensorflow
 def testPartialDerivatives(self):
   with self.test_session():
     x = constant_op.constant(1.)
     y = 2 * x
     z = x + y
     totalg = gradients.gradients(z, [x, y])
     self.assertEqual([3.0, 1.0], [g.eval() for g in totalg])
     partialg = gradients.gradients(z, [x, y], stop_gradients=[x, y])
     self.assertEqual([1.0, 1.0], [g.eval() for g in partialg])
コード例 #4
0
ファイル: math_ops_test.py プロジェクト: yuikns/tensorflow
 def testFloorDivGrad(self):
     with self.test_session():
         a = variables.Variable(2.0)
         b = variables.Variable(4.0)
         with self.test_session() as sess:
             sess.run(variables.initialize_all_variables())
             c_grad = gradients.gradients(math_ops.div_deprecated(a, b), [a, b])
             self.assertAllEqual([x.eval() for x in c_grad], [0.25, -0.125])
             c_grad = gradients.gradients(math_ops.div(a, b), [a, b])
             self.assertAllEqual([x.eval() for x in c_grad], [0.25, -0.125])
             c_grad = gradients.gradients(math_ops.floordiv(a, b), [a, b])
             self.assertAllEqual([None if x is None else x.eval() for x in c_grad], [None, None])
コード例 #5
0
 def testFloorDivGrad(self):
   with self.test_session():
     a = variables.Variable(2.)
     b = variables.Variable(4.)
     with self.test_session() as sess:
       sess.run(variables.global_variables_initializer())
       c_grad = gradients.gradients(math_ops.divide(a, b), [a, b])
       self.assertAllEqual([x.eval() for x in c_grad], [.25, -.125])
       c_grad = gradients.gradients(math_ops.div(a, b), [a, b])
       self.assertAllEqual([x.eval() for x in c_grad], [.25, -.125])
       c_grad = gradients.gradients(math_ops.floordiv(a, b), [a, b])
       self.assertAllEqual([None if x is None else x.eval()
                            for x in c_grad], [None, None])
コード例 #6
0
ファイル: gradients_test.py プロジェクト: didukhle/tensorflow
 def testDependentYs(self):
   with self.test_session():
     x = constant_op.constant(3.0)
     y = math_ops.square(x)
     y1 = math_ops.square(y)
     y2 = math_ops.square(y1)
     g = gradients.gradients([y, y2], x)
     self.assertAllClose(17502.0, g[0].eval())
     g = gradients.gradients(y + y2, x)
     self.assertAllClose(17502.0, g[0].eval())
     z = array_ops.identity(y)
     z2 = array_ops.identity(y2)
     g = gradients.gradients([z, z2], x)
     self.assertAllClose(17502.0, g[0].eval())
コード例 #7
0
 def test_jacobian_fixed_shape(self):
   x = random_ops.random_uniform([2, 2])
   y = math_ops.matmul(x, x, transpose_a=True)
   jacobian_pfor = gradients.jacobian(y, x, use_pfor=True)
   jacobian_while = gradients.jacobian(y, x, use_pfor=False)
   answer = ops.convert_to_tensor([[
       gradient_ops.gradients(y[0][0], x)[0],
       gradient_ops.gradients(y[0][1], x)[0]
   ], [
       gradient_ops.gradients(y[1][0], x)[0],
       gradient_ops.gradients(y[1][1], x)[0]
   ]])
   self.run_and_assert_equal(answer, jacobian_pfor)
   self.run_and_assert_equal(answer, jacobian_while)
コード例 #8
0
 def testColocateGradientsWithAggregation(self):
   with ops.Graph().as_default() as g:
     with g.device("/gpu:1"):
       w = constant(1.0, shape=[1, 1])
     x = constant(1.0, shape=[1, 2])
     y = constant(1.0, shape=[1, 2])
     wx = math_ops.matmul(w, x)
     wy = math_ops.matmul(w, y)
     with g.device("/gpu:0"):
       z = wx + wy
     gw1 = gradients.gradients(z, [w], colocate_gradients_with_ops=True)[0]
     self.assertEquals("/gpu:1", gw1.device)
     gw2 = gradients.gradients(z, [w], colocate_gradients_with_ops=False)[0]
     self.assertEquals(None, gw2.device)
コード例 #9
0
ファイル: gradients_test.py プロジェクト: didukhle/tensorflow
  def testCustomGradientErrors(self):

    @custom_gradient.custom_gradient
    def F(x):

      def Grad(_):
        raise RuntimeError("x")

      return x, Grad

    with ops.Graph().as_default():
      x = constant(1.0)
      y = F(x)
      with self.assertRaises(RuntimeError):
        gradients.gradients(y, x)
コード例 #10
0
ファイル: nn_test.py プロジェクト: adam-erickson/tensorflow
 def testBatchNormGradImpl(self):
     x_shape = [7, 5, 4, 6]
     param_shape = [6]
     np.random.seed(1)  # Make it reproducible.
     x_val = np.random.random_sample(x_shape).astype(np.float32)
     m_val = np.random.random_sample(param_shape).astype(np.float32)
     v_val = np.random.random_sample(param_shape).astype(np.float32)
     beta_val = np.random.random_sample(param_shape).astype(np.float32)
     gamma_val = np.random.random_sample(param_shape).astype(np.float32)
     backprop_val = np.random.random_sample(x_shape).astype(np.float32)
     for use_gpu in [False, True]:
         with self.test_session(use_gpu=use_gpu) as sess:
             x = constant_op.constant(x_val, name="x")
             m = constant_op.constant(m_val, name="m")
             v = constant_op.constant(v_val, name="v")
             beta = constant_op.constant(beta_val, name="beta")
             gamma = constant_op.constant(gamma_val, name="gamma")
             backprop = constant_op.constant(backprop_val, name="backprop")
             epsilon = 0.001
             for scale_after_normalization in [True, False]:
                 dx, dm, dv, db, dg = gen_nn_ops._batch_norm_with_global_normalization_grad(
                     x, m, v, gamma, backprop, epsilon, scale_after_normalization
                 )
                 on = self._opsBatchNorm(x, m, v, beta, gamma, epsilon, scale_after_normalization)
                 odx, odm, odv, odb, odg = gradients.gradients([on], [x, m, v, beta, gamma], [backprop])
                 if scale_after_normalization:
                     all_grads = sess.run([dx, dm, dv, db, dg, odx, odm, odv, odb, odg])
                     to_check = ["dx", "dm", "dv", "db", "dg"]
                 else:
                     all_grads = sess.run([dx, dm, dv, db, odx, odm, odv, odb])
                     to_check = ["dx", "dm", "dv", "db"]
                 for i, n in enumerate(to_check):
                     print(n)
                     self.assertAllClose(all_grads[i + len(to_check)], all_grads[i], atol=0.000001)
コード例 #11
0
ファイル: optimizer_v2.py プロジェクト: aritratony/tensorflow
  def get_gradients(self, loss, params):
    """Returns gradients of `loss` with respect to `params`.

    Arguments:
      loss: Loss tensor.
      params: List of variables.

    Returns:
      List of gradient tensors.

    Raises:
      ValueError: In case any gradient cannot be computed (e.g. if gradient
        function not implemented).
    """
    params = nest.flatten(params)
    with backend.get_graph().as_default():
      grads = gradients.gradients(loss, params)
    for grad, param in zip(grads, params):
      if grad is None:
        raise ValueError("Variable {} has `None` for gradient. "
                         "Please make sure that all of your ops have a "
                         "gradient defined (i.e. are differentiable). "
                         "Common ops without gradient: "
                         "K.argmax, K.round, K.eval.".format(param))
    if hasattr(self, "clipnorm"):
      grads = [clip_ops.clip_by_norm(g, self.clipnorm) for g in grads]
    if hasattr(self, "clipvalue"):
      grads = [
          clip_ops.clip_by_value(g, -self.clipvalue, self.clipvalue)
          for g in grads
      ]
    return grads
コード例 #12
0
  def testThatBackpropRuns(self):
    """Run optimization to ensure that gradients can be computed."""

    batch_size = 1
    image_height = 9
    image_width = 12
    image = variables.Variable(
        np.float32(
            np.random.uniform(size=[batch_size, image_height, image_width, 3])))
    control_point_locations = [[3., 3.]]
    control_point_locations = constant_op.constant(
        np.float32(np.expand_dims(control_point_locations, 0)))
    control_point_displacements = [[0.25, -0.5]]
    control_point_displacements = constant_op.constant(
        np.float32(np.expand_dims(control_point_displacements, 0)))
    warped_image, _ = sparse_image_warp.sparse_image_warp(
        image,
        control_point_locations,
        control_point_locations + control_point_displacements,
        num_boundary_points=3)

    loss = math_ops.reduce_mean(math_ops.abs(warped_image - image))
    optimizer = momentum.MomentumOptimizer(0.001, 0.9)
    grad = gradients.gradients(loss, [image])
    grad, _ = clip_ops.clip_by_global_norm(grad, 1.0)
    opt_func = optimizer.apply_gradients(zip(grad, [image]))
    init_op = variables.global_variables_initializer()

    with self.test_session() as sess:
      sess.run(init_op)
      for _ in range(5):
        sess.run([loss, opt_func])
コード例 #13
0
def _compute_gradients(tensor, var_list):
  grads = gradients.gradients(tensor, var_list)
  # tf.gradients sometimes returns `None` when it should return 0.
  return [
      grad if grad is not None else array_ops.zeros_like(var)
      for var, grad in zip(var_list, grads)
  ]
コード例 #14
0
    def _get_train_ops(self, features, targets):
        """See base class."""
        global_step = contrib_variables.get_global_step()
        assert global_step
        logits = self._logits(features, is_training=True)
        if self._enable_centered_bias:
            centered_bias_step = [self._centered_bias_step(targets, features)]
        else:
            centered_bias_step = []
        with ops.control_dependencies(centered_bias_step):
            loss = self._loss(logits, targets, features)
        logging_ops.scalar_summary("loss", loss)

        linear_vars = self._get_linear_vars()
        dnn_vars = self._get_dnn_vars()
        grads = gradients.gradients(loss, dnn_vars + linear_vars)
        if self._gradient_clip_norm:
            grads, _ = clip_ops.clip_by_global_norm(grads, self._gradient_clip_norm)

        dnn_grads = grads[0 : len(dnn_vars)]
        linear_grads = grads[len(dnn_vars) :]

        train_ops = self._get_linear_training_ops(linear_grads, linear_vars) + self._get_dnn_training_ops(
            dnn_grads, dnn_vars
        )

        train_step = control_flow_ops.group(*train_ops, name="combined_training_op")
        with ops.control_dependencies([train_step]):
            with ops.get_default_graph().colocate_with(global_step):
                return state_ops.assign_add(global_step, 1).op, loss
コード例 #15
0
ファイル: backprop_test.py プロジェクト: chdinh/tensorflow
  def testAggregateGradients(self):

    def fn(x):
      ind1 = tensor.Tensor(np.array([0, 1]))
      ind2 = tensor.Tensor(np.array([2, 3]))
      ind3 = tensor.Tensor(np.array([1, 3]))
      # A mixture of IndexedSlices and dense tensor to aggregate.
      g1 = embedding_ops.embedding_lookup(x, ind1)
      g2 = embedding_ops.embedding_lookup(x, ind2)
      g3 = embedding_ops.embedding_lookup(x, ind3)
      g4 = math_ops.reduce_sum(x * tensor.Tensor(2.0))
      return g1 * g2 * g3 * g4

    var_np = np.random.rand(4, 2).astype(np.float32)
    var = tensor.Tensor(var_np)
    grad = backprop.gradients_function(fn, [0])(var)[0]

    with context.graph_mode(), self.test_session():
      tf_var = array_ops.constant(var_np, dtypes.float32)
      tf_ind1 = array_ops.constant([0, 1])
      tf_ind2 = array_ops.constant([2, 3])
      tf_ind3 = array_ops.constant([1, 3])
      tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1)
      tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2)
      tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3)
      tf_g4 = math_ops.reduce_sum(tf_var * 2.0, reduction_indices=(0, 1))
      tf_y = tf_g1 * tf_g2 * tf_g3 * tf_g4
      tf_grad = gradients.gradients(tf_y, [tf_var])[0]

      tf_dense_grad = math_ops.unsorted_segment_sum(
          tf_grad.values, tf_grad.indices, tf_grad.dense_shape[0])

      self.assertAllClose(grad.numpy(), tf_dense_grad.eval())
コード例 #16
0
  def test_zero_grad_tf_gradients(self):
    if context.executing_eagerly():
      self.skipTest("tf.gradients not supported in eager.")

    x = constant_op.constant([-1., 0., 1.])
    g = self.evaluate(gradients.gradients(math_ops.pow(x, 2), x)[0])
    self.assertAllClose([-2., 0., 2.], g)
コード例 #17
0
ファイル: nccl_ops_test.py プロジェクト: becster/tensorflow
 def _Gradient(tensors, devices):
   inputs = [array_ops.placeholder(t.dtype, t.shape) for t in tensors]
   reduce_tensors = nccl_reduce(inputs, devices)
   losses = _DeviceTensors(tensors, [t.device for t in reduce_tensors])
   grads = gradients.gradients(
       reduce_tensors, inputs, losses, colocate_gradients_with_ops=True)
   return [g for g in grads if g is not None]
コード例 #18
0
  def test_interpolation_gradient(self):
    """Make sure that backprop can run. Correctness of gradients is assumed.

    Here, we create a use a small 'training' set and a more densely-sampled
    set of query points, for which we know the true value in advance. The goal
    is to choose x locations for the training data such that interpolating using
    this training data yields the best reconstruction for the function
    values at the query points. The training data locations are optimized
    iteratively using gradient descent.
    """
    tp = _QuadraticPlusSinProblemND()
    (query_points, query_values, train_points,
     train_values) = tp.get_problem(optimizable=True)

    regularization = 0.001
    for interpolation_order in (1, 2, 3, 4):
      interpolator = interpolate_spline.interpolate_spline(
          train_points, train_values, query_points, interpolation_order,
          regularization)

      loss = math_ops.reduce_mean(math_ops.square(query_values - interpolator))

      optimizer = momentum.MomentumOptimizer(0.001, 0.9)
      grad = gradients.gradients(loss, [train_points])
      grad, _ = clip_ops.clip_by_global_norm(grad, 1.0)
      opt_func = optimizer.apply_gradients(zip(grad, [train_points]))
      init_op = variables.global_variables_initializer()

      with self.cached_session() as sess:
        sess.run(init_op)
        for _ in range(100):
          sess.run([loss, opt_func])
コード例 #19
0
  def get_gradients(self, loss, params):
    """Returns gradients of `loss` with respect to `params`.

    Arguments:
      loss: Loss tensor.
      params: List of variables.

    Returns:
      List of gradient tensors.

    Raises:
      ValueError: In case any gradient cannot be computed (e.g. if gradient
        function not implemented).
    """
    loss = self._scale_loss(loss)
    grads = gradients.gradients(loss, params)
    if None in grads:
      raise ValueError("An operation has `None` for gradient. "
                       "Please make sure that all of your ops have a "
                       "gradient defined (i.e. are differentiable). "
                       "Common ops without gradient: "
                       "K.argmax, K.round, K.eval.")
    if hasattr(self, "clipnorm"):
      grads = [clip_ops.clip_by_norm(g, self.clipnorm) for g in grads]
    if hasattr(self, "clipvalue"):
      grads = [
          clip_ops.clip_by_value(g, -self.clipvalue, self.clipvalue)
          for g in grads
      ]
    return grads
コード例 #20
0
ファイル: jit_test.py プロジェクト: 1000sprites/tensorflow
  def testPlaysNicelyWithDefunSeparateGradientScope(self):
    with self.test_session(graph=ops.Graph()) as sess:
      with jit.experimental_jit_scope(True):

        @function.Defun(
            compiled=True, noinline=True, separate_compiled_gradients=True)
        def mulop(x1, x2):
          return x1 * x2

        x = constant_op.constant(1.0)
        r = mulop(x, x)
        g_r = gradients.gradients(r, x, name="GA")[0]

      # Ensure the forward function is compiled.
      graph_def = r.graph.as_graph_def()
      func_attrs = graph_def.library.function[0].attr
      self.assertTrue(func_attrs["_XlaCompile"].b)
      self.assertEqual(b"jit_scope_0", func_attrs["_XlaScope"].s)

      # Ensure the gradient (SymbolicGradient) is compiled, with a different
      # _XlaScope from the function itself.
      grad_op = g_r.op.inputs[0].op
      self.assertTrue(grad_op.get_attr("_XlaCompile"))
      self.assertEqual(b"jit_scope_0_grad_GA",
                       grad_op.get_attr("_XlaScope"))

      # Ensure the ops run: grad(x1*x1) = 2*x1
      self.assertAllClose([1.0, 1.0, 2.0], sess.run([x, r, g_r]))
コード例 #21
0
ファイル: gradients_test.py プロジェクト: didukhle/tensorflow
  def testColocateGradientsWithAggregation(self):
    with ops.Graph().as_default() as g:
      with g.device("/device:GPU:1"):
        w = constant(1.0, shape=[1, 1])
      x = constant(1.0, shape=[1, 2])
      y = constant(1.0, shape=[1, 2])
      wx = math_ops.matmul(w, x)
      wy = math_ops.matmul(w, y)
      with g.device("/device:GPU:0"):
        z = wx + wy

      gw1 = gradients.gradients(z, [w], colocate_gradients_with_ops=True)[0]
      self.assertEqual(gw1.op.colocation_groups(), wx.op.colocation_groups())

      gw2 = gradients.gradients(z, [w], colocate_gradients_with_ops=False)[0]
      self.assertTrue(wx.op.colocation_groups() != gw2.op.colocation_groups())
コード例 #22
0
  def test_tensor_array_grad(self):
    inp = constant_op.constant(np.random.rand(3, 4, 2), dtype=dtypes.float32)
    ta = tensor_array_ops.TensorArray(dtypes.float32, size=3)
    ta = ta.unstack(inp)

    def loop_fn(i):

      def body(j, x):
        value = ta.gather([j])
        value = array_ops.gather(array_ops.reshape(value, [4, 2]), i)
        return j + 1, x + value

      _, out = control_flow_ops.while_loop(lambda j, _: j < 3, body,
                                           (0, array_ops.zeros([2])))
      out = math_ops.reduce_prod(out)
      return out, gradient_ops.gradients(out, inp)[0]

    pfor_out, pfor_out_grad = pfor_control_flow_ops.pfor(loop_fn, 4)
    # Note that tf.while_loop does not work in the setup above. So we manually
    # construct the equivalent computation of the above loops here.
    real_out = math_ops.reduce_sum(inp, axis=[0])
    real_out = math_ops.reduce_prod(real_out, axis=[1])
    # Note that gradients of real_out will accumulate the gradients across the
    # output value. Hence we do the same aggregation on pfor_out_grad.
    real_out_grad = gradient_ops.gradients(real_out, inp)[0]
    sum_pfor_out_grad = math_ops.reduce_sum(pfor_out_grad, axis=[0])

    with session.Session() as sess:
      v1, v2, v1_grad, v2_grad = sess.run(
          [pfor_out, real_out, sum_pfor_out_grad, real_out_grad])
      self.assertAllClose(v1, v2)
      self.assertAllClose(v1_grad, v2_grad)
コード例 #23
0
ファイル: optimizers.py プロジェクト: tdozat/Optimization
 def approximate_hessian(self, grads_and_vars, name=None):
   """
   I haven't tested this yet so I have no idea if it works, but even if it
   does it's probably super slow, and either way nothing else has been modified
   to deal with it.
   """
   
   gv = 0
   var_refs = []
   for g_t, x_tm1 in grads_and_vars:
     var_refs.append(x_tm1.ref())
     if g_t is None:
       continue
     with ops.name_scope('update_' + x_tm1.op.name), ops.device(x_tm1.device):
       if isinstance(g_t, ops.Tensor):
         gv += math_ops.reduce_sum(g_t * random_ops.random_normal(g_t.get_shape()))
       else:
         idxs, idxs_ = array_ops.unique(g_t.indices)
         g_t_ = math_ops.unsorted_segment_sum(g_t.values, idxs_, array_ops.size(idxs))
         gv += math_ops.reduce_sum(g_t_ * random_ops.random_normal(g_t_.get_shape()))
   hesses = gradients.gradients(gv, var_refs,
                                gate_gradients=(gate_gradients == Optimizer.GATE_OP),
                                aggregation_method=aggregation_method,
                                colocate_gradients_with_ops=colocate_gradients_with_ops)
   return zip([g_t for g_t, _ in grads_and_vars], [x_tm1 for _, x_tm1 in grads_and_vars], hesses)
コード例 #24
0
 def loop_fn(i):
   image = array_ops.gather(images, i)
   label = array_ops.gather(labels, i)
   logits = array_ops.reshape(model(image, training=training), [-1])
   loss = losses.softmax_cross_entropy(
       logits=logits, onehot_labels=label, reduction=losses.Reduction.NONE)
   return gradient_ops.gradients(loss, variables.trainable_variables())
コード例 #25
0
ファイル: optimizers.py プロジェクト: tdozat/Optimization
 def compute_gradients(self, loss, var_list=None, gate_gradients=GATE_OP,
                       aggregation_method=None, colocate_gradients_with_ops=False):
   """"""
   
   # Error checking
   if gate_gradients not in [Optimizer.GATE_NONE, Optimizer.GATE_OP,
                             Optimizer.GATE_GRAPH]:
     raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, " +
       "Optimizer.GATE_OP, Optimizer.GATE_GRAPH. Not %s" % gate_gradients)
   self._assert_valid_dtypes([loss])
   if var_list is None:
     var_list = variables.trainable_variables()
   for x_tm1 in var_list:
     if not isinstance(x_tm1, variables.Variable):
       raise TypeError("Argument is not a tf.Variable: %s" % x_tm1)
   if not var_list:
     raise ValueError("No variables to optimize")
   
   # The actual stuff
   var_refs = [x_tm1.ref() for x_tm1 in var_list]
   grads = gradients.gradients(loss, var_refs,
                               gate_gradients=(gate_gradients == Optimizer.GATE_OP),
                               aggregation_method=aggregation_method,
                               colocate_gradients_with_ops=colocate_gradients_with_ops)
   if gate_gradients == Optimizer.GATE_GRAPH:
     grads = control_flow_ops.tuple(grads)
   grads_and_vars = list(zip(grads, var_list))
   self._assert_valid_dtypes([x_tm1 for g_t, x_tm1 in grads_and_vars if g_t is not None])
   return grads_and_vars
コード例 #26
0
ファイル: backprop_test.py プロジェクト: Wajih-O/tensorflow
  def testAggregateGradients(self):

    def fn(x):
      ind1 = constant_op.constant(np.array([0, 1]))
      ind2 = constant_op.constant(np.array([2, 3]))
      ind3 = constant_op.constant(np.array([1, 3]))
      # A mixture of IndexedSlices and dense tensor to aggregate.
      g1 = embedding_ops.embedding_lookup(x, ind1)
      g2 = embedding_ops.embedding_lookup(x, ind2)
      g3 = embedding_ops.embedding_lookup(x, ind3)
      g4 = math_ops.reduce_sum(x * constant_op.constant(2.0))
      return g1 * g2 * g3 * g4

    var_np = np.random.rand(4, 2).astype(np.float32)
    var = constant_op.constant(var_np)
    grad = backprop.gradients_function(fn, [0])(var)[0]
    grad = self.evaluate(ops.convert_to_tensor(grad))

    if not context.executing_eagerly():
      tf_var = array_ops.constant(var_np, dtypes.float32)
      tf_ind1 = array_ops.constant([0, 1])
      tf_ind2 = array_ops.constant([2, 3])
      tf_ind3 = array_ops.constant([1, 3])
      tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1)
      tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2)
      tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3)
      tf_g4 = math_ops.reduce_sum(tf_var * 2.0, axis=(0, 1))
      tf_y = tf_g1 * tf_g2 * tf_g3 * tf_g4
      tf_grad = gradients.gradients(tf_y, [tf_var])[0]

      tf_dense_grad = math_ops.unsorted_segment_sum(
          tf_grad.values, tf_grad.indices, tf_grad.dense_shape[0])

      self.assertAllClose(grad, self.evaluate(tf_dense_grad))
コード例 #27
0
 def testUnconnectedGradientsNoneUnconnectedGradients(self):
   with ops.Graph().as_default():
     x = constant(1.0, shape=[2, 2])
     y = constant(3.0, shape=[3, 1])
     grad = gradients.gradients(
         [y], [x], unconnected_gradients="none")
   self.assertIsNone(grad[0])
コード例 #28
0
ファイル: optimizer.py プロジェクト: Jackhuang945/tensorflow
  def compute_gradients(self, loss, var_list=None,
                        gate_gradients=GATE_OP,
                        aggregation_method=None,
                        colocate_gradients_with_ops=False,
                        grad_loss=None):
    """Compute gradients of `loss` for the variables in `var_list`.

    This is the first part of `minimize()`.  It returns a list
    of (gradient, variable) pairs where "gradient" is the gradient
    for "variable".  Note that "gradient" can be a `Tensor`, an
    `IndexedSlices`, or `None` if there is no gradient for the
    given variable.

    Args:
      loss: A Tensor containing the value to minimize.
      var_list: Optional list of `tf.Variable` to update to minimize
        `loss`.  Defaults to the list of variables collected in the graph
        under the key `GraphKey.TRAINABLE_VARIABLES`.
      gate_gradients: How to gate the computation of gradients.  Can be
        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
      aggregation_method: Specifies the method used to combine gradient terms.
        Valid values are defined in the class `AggregationMethod`.
      colocate_gradients_with_ops: If True, try colocating gradients with
        the corresponding op.
      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.

    Returns:
      A list of (gradient, variable) pairs. Variable is always present, but
      gradient can be `None`.

    Raises:
      TypeError: If `var_list` contains anything else than `Variable` objects.
      ValueError: If some arguments are invalid.
    """
    if gate_gradients not in [Optimizer.GATE_NONE, Optimizer.GATE_OP,
                              Optimizer.GATE_GRAPH]:
      raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, "
                       "Optimizer.GATE_OP, Optimizer.GATE_GRAPH.  Not %s" %
                       gate_gradients)
    self._assert_valid_dtypes([loss])
    if grad_loss is not None:
      self._assert_valid_dtypes([grad_loss])
    if var_list is None:
      var_list = (
          variables.trainable_variables() +
          ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
    processors = [_get_processor(v) for v in var_list]
    if not var_list:
      raise ValueError("No variables to optimize.")
    var_refs = [p.target() for p in processors]
    grads = gradients.gradients(
        loss, var_refs, grad_ys=grad_loss,
        gate_gradients=(gate_gradients == Optimizer.GATE_OP),
        aggregation_method=aggregation_method,
        colocate_gradients_with_ops=colocate_gradients_with_ops)
    if gate_gradients == Optimizer.GATE_GRAPH:
      grads = control_flow_ops.tuple(grads)
    grads_and_vars = list(zip(grads, var_list))
    self._assert_valid_dtypes([v for g, v in grads_and_vars if g is not None])
    return grads_and_vars
コード例 #29
0
  def test_gradients_exist(self):
    """Check that backprop can run.

    The correctness of the gradients is assumed, since the forward propagation
    is tested to be correct and we only use built-in tf ops.
    However, we perform a simple test to make sure that backprop can actually
    run. We treat the flows as a tf.Variable and optimize them to minimize
    the difference between the interpolated image and the input image.
    """

    batch_size, height, width, numchannels = [4, 5, 6, 7]
    image_shape = [batch_size, height, width, numchannels]
    image = random_ops.random_normal(image_shape)
    flow_shape = [batch_size, height, width, 2]
    init_flows = np.float32(np.random.normal(size=flow_shape) * 0.25)
    flows = variables.Variable(init_flows)

    interp = dense_image_warp.dense_image_warp(image, flows)
    loss = math_ops.reduce_mean(math_ops.square(interp - image))

    optimizer = adam.AdamOptimizer(1.0)
    grad = gradients.gradients(loss, [flows])
    opt_func = optimizer.apply_gradients(zip(grad, [flows]))
    init_op = variables.global_variables_initializer()

    with self.test_session() as sess:
      sess.run(init_op)
      for _ in range(10):
        sess.run(opt_func)
コード例 #30
0
ファイル: gradients_test.py プロジェクト: didukhle/tensorflow
  def testCustomGradientWithVariables(self):

    @custom_gradient.custom_gradient
    def F(x):
      out = core_layers.dense(x, 3, use_bias=False)

      def Grad(out_grad, variables=None):  # pylint: disable=redefined-outer-name
        self.assertEqual(1, len(variables))
        grads = gradients.gradients(out, [x, variables[0]], grad_ys=out_grad)
        return grads[0], [array_ops.ones((4, 3))]

      return out, Grad

    with ops.Graph().as_default():
      x = array_ops.ones((2, 4))
      with variable_scope.variable_scope("f", use_resource=True) as vs:
        y = F(x)
        all_vars = vs.global_variables()
        assert len(all_vars) == 1
      grads = gradients.gradients(y, [x, all_vars[0]])
      for g in grads:
        self.assertTrue(g is not None)
      with session.Session() as sess:
        sess.run(variables.global_variables_initializer())
        dw = sess.run(math_ops.reduce_sum(grads[1]))
        self.assertEqual(12., dw)
コード例 #31
0
  def compute_gradients(self,
                        loss,
                        var_list,
                        aggregation_method=None,
                        colocate_gradients_with_ops=False,
                        grad_loss=None,
                        stop_gradients=None):
    """Compute gradients of `loss` for the variables in `var_list`.

    This is the first part of `minimize()`.  It returns a list
    of (gradient, variable) pairs where "gradient" is the gradient
    for "variable".  Note that "gradient" can be a `Tensor`, an
    `IndexedSlices`, or `None` if there is no gradient for the
    given variable.

    Args:
      loss: A Tensor containing the value to minimize or a callable taking no
        arguments which returns the value to minimize. When eager execution is
        enabled it must be a callable.
      var_list: Optional list or tuple of `tf.Variable` to update to minimize
        `loss`.  Defaults to the list of variables collected in the graph under
        the key `GraphKeys.TRAINABLE_VARIABLES`.
      aggregation_method: Specifies the method used to combine gradient terms.
        Valid values are defined in the class `AggregationMethod`.
      colocate_gradients_with_ops: If True, try colocating gradients with the
        corresponding op.
      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
      stop_gradients: Optional. A Tensor or list of tensors not to differentiate
        through.

    Returns:
      A list of (gradient, variable) pairs. Variable is always present, but
      gradient can be `None`.

    Raises:
      TypeError: If `var_list` contains anything else than `Variable` objects.
      ValueError: If some arguments are invalid, or var_list is None.
      RuntimeError: If called with eager execution enabled and `loss` is
        not callable.

    @compatibility(eager)
    When eager execution is enabled, `aggregation_method`, and
    `colocate_gradients_with_ops` are ignored.
    @end_compatibility
    """
    var_list = nest.flatten(var_list)
    # TODO(josh11b): Test that we handle weight decay in a reasonable way.
    if callable(loss):
      with backprop.GradientTape() as tape:
        tape.watch(var_list)
        loss_value = loss()
      grads = tape.gradient(loss_value, var_list, grad_loss)
    else:
      if context.executing_eagerly():
        raise RuntimeError("`loss` passed to Optimizer.compute_gradients "
                           "should be a function when eager execution is "
                           "enabled.")
      self._assert_valid_dtypes([loss])
      if grad_loss is not None:
        self._assert_valid_dtypes([grad_loss])
      grads = gradients.gradients(
          loss,
          var_list,
          grad_ys=grad_loss,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          stop_gradients=stop_gradients)

    grads_and_vars = list(zip(grads, var_list))
    self._assert_valid_dtypes([
        v for g, v in grads_and_vars
        if g is not None and v.dtype != dtypes.resource
    ])

    return grads_and_vars
コード例 #32
0
def _linear_classifier_model_fn(features, targets, mode, params):
    """Linear classifier model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * n_classes: number of target classes.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * enable_centered_bias: A bool. If True, estimator will learn a centered
          bias variable for each class. Rest of the model structure learns the
          residual after centered bias.
      * num_ps_replicas: The number of parameter server replicas.
      * joint_weights: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.

  Raises:
    ValueError: If mode is not any of the `ModeKeys`.
  """
    feature_columns = params["feature_columns"]
    n_classes = params["n_classes"]
    weight_column_name = params["weight_column_name"]
    optimizer = params["optimizer"]
    gradient_clip_norm = params.get("gradient_clip_norm", None)
    enable_centered_bias = params.get("enable_centered_bias", True)
    num_ps_replicas = params.get("num_ps_replicas", 0)
    joint_weights = params.get("joint_weights", False)

    if not isinstance(features, dict):
        features = {"": features}

    parent_scope = "linear"
    num_label_columns = 1 if n_classes == 2 else n_classes
    loss_fn = _softmax_cross_entropy_loss
    if n_classes == 2:
        loss_fn = _log_loss_with_two_classes

    partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas, min_slice_size=64 << 20)
    with variable_scope.variable_op_scope(features.values(),
                                          parent_scope,
                                          partitioner=partitioner) as scope:
        if joint_weights:
            logits, _, _ = (layers.joint_weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=num_label_columns,
                weight_collections=[parent_scope],
                scope=scope))
        else:
            logits, _, _ = (layers.weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=num_label_columns,
                weight_collections=[parent_scope],
                scope=scope))

    if enable_centered_bias:
        logits = nn.bias_add(logits, _centered_bias(num_label_columns))

    loss = None
    if mode != estimator.ModeKeys.INFER:
        loss = loss_fn(logits, targets)
        if weight_column_name:
            weight_tensor = array_ops.reshape(math_ops.to_float(
                features[weight_column_name]),
                                              shape=(-1, ))
            loss = _weighted_loss(loss, weight_tensor)
        else:
            loss = math_ops.reduce_mean(loss, name="loss")
        logging_ops.scalar_summary("loss", loss)

    train_ops = []
    if mode == estimator.ModeKeys.TRAIN:
        global_step = contrib_variables.get_global_step()

        my_vars = ops.get_collection("linear")
        grads = gradients.gradients(loss, my_vars)
        if gradient_clip_norm:
            grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
        train_ops.append(
            optimizer.apply_gradients(zip(grads, my_vars),
                                      global_step=global_step))
        if enable_centered_bias:
            train_ops.append(
                _centered_bias_step(targets, loss_fn, num_label_columns))

    predictions = {}
    if n_classes == 2:
        predictions[_LOGISTIC] = math_ops.sigmoid(logits)
        logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)

    return predictions, loss, control_flow_ops.group(*train_ops)
コード例 #33
0
 def loop_fn(i):
     out_i = array_ops.gather(out, i)
     return gradient_ops.gradients(out_i, x)[0]
コード例 #34
0
 def testVariableAsGraphElementGradient(self):
     with ops.Graph().as_default() as graph:
         init = constant_op.constant(100.0)
         var = variables.Variable(init)
         gradient = gradients.gradients(graph.as_graph_element(var), var)
         self.assertIsNotNone(gradient)
コード例 #35
0
ファイル: linear.py プロジェクト: xjump/tensorflow-cl
def _linear_classifier_model_fn(features, targets, mode, params):
    """Estimator's linear model_fn."""
    n_classes = params["n_classes"]
    weight_column_name = params["weight_column_name"]
    feature_columns = params["feature_columns"]
    optimizer = params["optimizer"]
    gradient_clip_norm = params.get("gradient_clip_norm", None)
    enable_centered_bias = params.get("enable_centered_bias", True)
    num_ps_replicas = params.get("num_ps_replicas", 0)
    joint_weights = params.get("joint_weights", False)

    if not isinstance(features, dict):
        features = {"": features}

    num_label_columns = 1 if n_classes == 2 else n_classes
    loss_fn = _softmax_cross_entropy_loss
    if n_classes == 2:
        loss_fn = _log_loss_with_two_classes

    feat_values = (features.values()
                   if isinstance(features, dict) else [features])
    partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas, min_slice_size=64 << 20)
    with variable_scope.variable_op_scope(feat_values,
                                          "linear",
                                          partitioner=partitioner) as scope:
        if joint_weights:
            logits, _, _ = (layers.joint_weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=num_label_columns,
                weight_collections=["linear"],
                scope=scope))
        else:
            logits, _, _ = (layers.weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=num_label_columns,
                weight_collections=["linear"],
                scope=scope))

    if enable_centered_bias:
        logits = nn.bias_add(logits, _centered_bias(num_label_columns))

    loss = None
    if mode != estimator.ModeKeys.INFER:
        loss = loss_fn(logits, targets)
        if weight_column_name:
            weight_tensor = array_ops.reshape(math_ops.to_float(
                features[weight_column_name]),
                                              shape=(-1, ))
            loss = _weighted_loss(loss, weight_tensor)
        else:
            loss = math_ops.reduce_mean(loss, name="loss")
        logging_ops.scalar_summary("loss", loss)

    train_ops = []
    if mode == estimator.ModeKeys.TRAIN:
        global_step = contrib_variables.get_global_step()

        my_vars = ops.get_collection("linear")
        grads = gradients.gradients(loss, my_vars)
        if gradient_clip_norm:
            grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
        train_ops.append(
            optimizer.apply_gradients(zip(grads, my_vars),
                                      global_step=global_step))
        if enable_centered_bias:
            train_ops.append(
                _centered_bias_step(targets, loss_fn, num_label_columns))

    predictions = {}
    if n_classes == 2:
        predictions[_LOGISTIC] = math_ops.sigmoid(logits)
        logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)

    return predictions, loss, control_flow_ops.group(*train_ops)
コード例 #36
0
 def model_fn(inps, init_state):
     state = init_state
     for inp in inps:
         _, state = cell(inp, state)
     output = nn.l2_loss(state.c)
     return gradient_ops.gradients(output, variables.trainable_variables())
コード例 #37
0
    def _TestCompareFoldAndUnfolded(self, relu, relu_op_name, with_bypass,
                                    has_scaling, fused_batch_norm,
                                    freeze_batch_norm_delay):
        """Tests that running folded and unfolded BN returns the same results.

    Args:
      relu: Callable that returns an Operation, a factory method for the Relu*.
      relu_op_name: String, name of the Relu* operation.
      with_bypass: Bool, when true there is an extra connection added from
        inputs to just before Relu*.
      has_scaling: Bool, when true the batch norm has scaling.
      fused_batch_norm: Bool, when true the batch norm is fused.
      freeze_batch_norm_delay: None or the number of steps after which training
      switches to using frozen mean and variance
    """
        random_seed.set_random_seed(1234)
        unfolded_g = ops.Graph()
        with unfolded_g.as_default():
            batch_size, height, width = 5, 128, 128
            inputs = random_ops.random_uniform((batch_size, height, width, 3),
                                               dtype=dtypes.float32,
                                               seed=1234)
            out_depth = 3 if with_bypass else 32
            stride = 1 if with_bypass else 2
            activation_fn = None if with_bypass else relu
            scope = 'test/test2' if with_bypass else 'test'
            node = conv2d(inputs,
                          out_depth, [5, 5],
                          stride=stride,
                          padding='SAME',
                          weights_initializer=self._WeightInit(0.09),
                          activation_fn=activation_fn,
                          normalizer_fn=batch_norm,
                          normalizer_params=self._BatchNormParams(
                              scale=has_scaling, fused=fused_batch_norm),
                          scope=scope)
            if with_bypass:
                node = math_ops.add(inputs, node, name='test/Add')
            relu_node = relu(node, name='test/' + relu_op_name)
        folded_g = self._CopyGraph(unfolded_g)
        with folded_g.as_default():
            fold_batch_norms.FoldBatchNorms(
                folded_g,
                is_training=True,
                freeze_batch_norm_delay=freeze_batch_norm_delay)
        with session.Session(graph=unfolded_g) as sess:
            sess.run(variables.global_variables_initializer())
            grad_node = gradients.gradients(relu_node, inputs)
            results = sess.run([relu_node, grad_node])
            unfolded_forward, unfolded_backward = results[0], results[1]

        with session.Session(graph=folded_g) as sess:
            sess.run(variables.global_variables_initializer())
            relu_node = folded_g.get_tensor_by_name(relu_node.name)
            inputs = folded_g.get_tensor_by_name(inputs.name)
            grad_node = gradients.gradients(relu_node, inputs)
            results = sess.run([relu_node, grad_node])
            folded_forward, folded_backward = results[0], results[1]

        # Check that the folded and unfolded results match.
        self.assertAllClose(unfolded_forward, folded_forward, atol=1e-3)
        self.assertAllClose(unfolded_backward, folded_backward, atol=1e-3)
コード例 #38
0
    def compute_gradients(self,
                          loss,
                          var_list=None,
                          gate_gradients=GATE_OP,
                          aggregation_method=None,
                          colocate_gradients_with_ops=False,
                          grad_loss=None):
        """Compute gradients of `loss` for the variables in `var_list`.

    This is the first part of `minimize()`.  It returns a list
    of (gradient, variable) pairs where "gradient" is the gradient
    for "variable".  Note that "gradient" can be a `Tensor`, an
    `IndexedSlices`, or `None` if there is no gradient for the
    given variable.

    Args:
      loss: A Tensor containing the value to minimize or a callable taking
        no arguments which returns the value to minimize. When eager execution
        is enabled it must be a callable.
      var_list: Optional list or tuple of `tf.Variable` to update to minimize
        `loss`.  Defaults to the list of variables collected in the graph
        under the key `GraphKeys.TRAINABLE_VARIABLES`.
      gate_gradients: How to gate the computation of gradients.  Can be
        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
      aggregation_method: Specifies the method used to combine gradient terms.
        Valid values are defined in the class `AggregationMethod`.
      colocate_gradients_with_ops: If True, try colocating gradients with
        the corresponding op.
      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.

    Returns:
      A list of (gradient, variable) pairs. Variable is always present, but
      gradient can be `None`.

    Raises:
      TypeError: If `var_list` contains anything else than `Variable` objects.
      ValueError: If some arguments are invalid.
      RuntimeError: If called with eager execution enabled and `loss` is
        not callable.

    @compatibility(eager)
    When eager execution is enabled, `gate_gradients`, `aggregation_method`,
    and `colocate_gradients_with_ops` are ignored.
    @end_compatibility
    """
        if callable(loss):
            with backprop.GradientTape() as tape:
                if var_list is not None:
                    tape.watch(var_list)
                loss_value = loss()
            if var_list is None:
                var_list = tape.watched_variables()
            grads = tape.gradient(loss_value, var_list, grad_loss)
            return list(zip(grads, var_list))
        if context.in_eager_mode():
            raise RuntimeError(
                "`loss` passed to Optimizer.compute_gradients should "
                "be a function when eager execution is enabled.")
        if gate_gradients not in [
                Optimizer.GATE_NONE, Optimizer.GATE_OP, Optimizer.GATE_GRAPH
        ]:
            raise ValueError(
                "gate_gradients must be one of: Optimizer.GATE_NONE, "
                "Optimizer.GATE_OP, Optimizer.GATE_GRAPH.  Not %s" %
                gate_gradients)
        self._assert_valid_dtypes([loss])
        if grad_loss is not None:
            self._assert_valid_dtypes([grad_loss])
        if var_list is None:
            var_list = (
                variables.trainable_variables() +
                ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
        else:
            var_list = nest.flatten(var_list)
        # pylint: disable=protected-access
        var_list += ops.get_collection(ops.GraphKeys._STREAMING_MODEL_PORTS)
        # pylint: enable=protected-access
        processors = [_get_processor(v) for v in var_list]
        if not var_list:
            raise ValueError("No variables to optimize.")
        var_refs = [p.target() for p in processors]
        grads = gradients.gradients(
            loss,
            var_refs,
            grad_ys=grad_loss,
            gate_gradients=(gate_gradients == Optimizer.GATE_OP),
            aggregation_method=aggregation_method,
            colocate_gradients_with_ops=colocate_gradients_with_ops)
        if gate_gradients == Optimizer.GATE_GRAPH:
            grads = control_flow_ops.tuple(grads)
        grads_and_vars = list(zip(grads, var_list))
        self._assert_valid_dtypes([
            v for g, v in grads_and_vars
            if g is not None and v.dtype != dtypes.resource
        ])
        return grads_and_vars
コード例 #39
0
 def loop_fn(i):
     y_i = array_ops.gather(y, i)
     grad = gradient_ops.gradients(y_i, x)[0]
     return array_ops.gather(grad, i)
コード例 #40
0
 def loop_fn(i):
     out_i = array_ops.gather(out, i, axis=1)
     return array_ops.reshape(gradient_ops.gradients(out_i, x)[0], [-1])
コード例 #41
0
ファイル: __init__.py プロジェクト: ShipengWang/HyperAdam
 def _get_fx(self, f, i, x):
     if isinstance(f, list):
         return f[0], f[1]
     fx = f(i, x)
     grad = gradients.gradients(fx, x)[0]
     return fx, grad
コード例 #42
0
    def compute_gradients(self,
                          loss,
                          var_list=None,
                          gate_gradients=GATE_OP,
                          aggregation_method=None,
                          colocate_gradients_with_ops=False,
                          grad_loss=None):
        """Compute gradients of `loss` for the variables in `var_list`.

    This is the first part of `minimize()`.  It returns a list
    of (gradient, variable) pairs where "gradient" is the gradient
    for "variable".  Note that "gradient" can be a `Tensor`, an
    `IndexedSlices`, or `None` if there is no gradient for the
    given variable.

    Args:
      loss: A Tensor containing the value to minimize.
      var_list: Optional list of `tf.Variable` to update to minimize
        `loss`.  Defaults to the list of variables collected in the graph
        under the key `GraphKey.TRAINABLE_VARIABLES`.
      gate_gradients: How to gate the computation of gradients.  Can be
        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
      aggregation_method: Specifies the method used to combine gradient terms.
        Valid values are defined in the class `AggregationMethod`.
      colocate_gradients_with_ops: If True, try colocating gradients with
        the corresponding op.
      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.

    Returns:
      A list of (gradient, variable) pairs. Variable is always present, but
      gradient can be `None`.

    Raises:
      TypeError: If `var_list` contains anything else than `Variable` objects.
      ValueError: If some arguments are invalid.
    """
        if gate_gradients not in [
                Optimizer.GATE_NONE, Optimizer.GATE_OP, Optimizer.GATE_GRAPH
        ]:
            raise ValueError(
                "gate_gradients must be one of: Optimizer.GATE_NONE, "
                "Optimizer.GATE_OP, Optimizer.GATE_GRAPH.  Not %s" %
                gate_gradients)
        self._assert_valid_dtypes([loss])
        if grad_loss is not None:
            self._assert_valid_dtypes([grad_loss])
        if var_list is None:
            var_list = (
                variables.trainable_variables() +
                ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
        processors = [_get_processor(v) for v in var_list]
        if not var_list:
            raise ValueError("No variables to optimize.")
        var_refs = [p.target() for p in processors]
        grads = gradients.gradients(
            loss,
            var_refs,
            grad_ys=grad_loss,
            gate_gradients=(gate_gradients == Optimizer.GATE_OP),
            aggregation_method=aggregation_method,
            colocate_gradients_with_ops=colocate_gradients_with_ops)
        if gate_gradients == Optimizer.GATE_GRAPH:
            grads = control_flow_ops.tuple(grads)
        grads_and_vars = list(zip(grads, var_list))
        self._assert_valid_dtypes(
            [v for g, v in grads_and_vars if g is not None])
        return grads_and_vars
コード例 #43
0
def _compute_gradients(tensor, var_list):
  grads = gradients.gradients(tensor, var_list)
  # tf.gradients sometimes returns `None` when it should return 0.
  return [grad if grad is not None else array_ops.zeros_like(var)
          for var, grad in zip(var_list, grads)]
コード例 #44
0
 def loop_fn(i):
   a = array_ops.gather(x, i)
   y = nn.bias_add(a, bias, data_format=data_format)
   loss = math_ops.reduce_sum(y * y)
   return y, gradient_ops.gradients(loss, bias)
コード例 #45
0
 def model_fn(activation):
   for layer in layers:
     activation = layer(activation)
   activation = projection(activation)
   activation = nn.l2_loss(activation)
   return gradient_ops.gradients(activation, variables.trainable_variables())
コード例 #46
0
 def testPreventGradient(self):
     with ops.Graph().as_default():
         inp = constant(1.0, shape=[100, 32], name="in")
         out = array_ops.prevent_gradient(inp)
         with self.assertRaisesRegexp(LookupError, "explicitly disabled"):
             _ = gradients.gradients(out, inp)
コード例 #47
0
# g = tf.GradientTape().__enter__()
# model.variables
# model.trainable_weights[0]
# g.watch(model.variables)
# tf.GradientTape().__exit__()

# model.evaluate()
# model.layers[0].layers[6].updates
model.optimizer.get_gradients
# thing.updates

from tensorflow.python.ops import gradients

#alternatively try pytorch and cross validate with TF results
#aaah apparently there's no way, but GradientTape.jacobian does have it, it just doesnt support tf.conds!!! adkjshja
grads = [gradients.gradients(model2.output[i],model2.variables) for i in range(25)]
grads = [gradients.gradients(model(train_images[i:i+1]),model.variables) for i in range(25)]

sess = tf.Session()
sess.run(grads, feed_dict={model2.input:train_images[:25]})

grads = gradients.gradients(tf.expand_dims(tf.tile(model(train_images[:25]),tf.constant([1,25], tf.int32)),0),model.variables, grad_ys=tf.eye(25))

grads

grads[0].shape

tf.app.flags.DEFINE_string('f', '', 'kernel')

model.build()
コード例 #48
0
 def Grad(out_grad, variables=None):  # pylint: disable=redefined-outer-name
     self.assertEqual(1, len(variables))
     grads = gradients.gradients(out, [x, variables[0]],
                                 grad_ys=out_grad)
     return grads[0], [array_ops.ones((4, 3))]
コード例 #49
0
 def testStopGradient(self):
     with ops.Graph().as_default():
         inp = constant(1.0, shape=[100, 32], name="in")
         out = array_ops.stop_gradient(inp)
         igrad = gradients.gradients(out, inp)[0]
     assert igrad is None
コード例 #50
0
 def loop_fn(i):
   x1 = array_ops.gather(x, i)
   output = nn.max_pool(
       x1, ksize, strides=[1, 2, 2, 1], padding="VALID", data_format="NHWC")
   loss = nn.l2_loss(output)
   return output, gradient_ops.gradients(loss, x1)
コード例 #51
0
 def loop_fn(i):
   logits_i = array_ops.gather(logits, i)
   labels_i = array_ops.gather(labels, i)
   loss = nn.softmax_cross_entropy_with_logits(
       labels=labels_i, logits=logits_i)
   return loss, gradient_ops.gradients(math_ops.reduce_sum(loss), logits_i)
コード例 #52
0
 def testUnconnectedGradientsNoneUnconnectedGradients(self):
     with ops.Graph().as_default():
         x = constant(1.0, shape=[2, 2])
         y = constant(3.0, shape=[3, 1])
         grad = gradients.gradients([y], [x], unconnected_gradients="none")
     self.assertIsNone(grad[0])
コード例 #53
0
 def _xlogy_gradients(self, x, y):
     xlogy_xgrad = self.evaluate(
         gradients.gradients(math_ops.xlogy(x, y), x)[0])
     xlogy_ygrad = self.evaluate(
         gradients.gradients(math_ops.xlogy(x, y), y)[0])
     return xlogy_xgrad, xlogy_ygrad
コード例 #54
0
def _compute_gradients(tensor, var_list):
    grads = gradients.gradients(tensor, var_list)
    return [
        grad if grad is not None else array_ops.zeros_like(var)
        for var, grad in zip(var_list, grads)
    ]
コード例 #55
0
 def loop_fn(i):
     y = array_ops.gather(output, i, axis=1)
     return gradient_ops.gradients(y, inp)[0]
コード例 #56
0
 def _xdivy_gradients(self, x, y):
     xdivy_xgrad = self.evaluate(
         gradients.gradients(math_ops.xdivy(x, y), x)[0])
     xdivy_ygrad = self.evaluate(
         gradients.gradients(math_ops.xdivy(x, y), y)[0])
     return xdivy_xgrad, xdivy_ygrad
コード例 #57
0
 def testVariableRefGradient(self):
     with ops.Graph().as_default():
         init = constant_op.constant(100.0)
         var = variables.Variable(init)
         gradient = gradients.gradients(var._ref(), var)
         self.assertIsNotNone(gradient)
コード例 #58
0
 def loop_fn(i):
     y = array_ops.gather(output, i)
     return gradient_ops.gradients(y, flat_inputs)
コード例 #59
0
 def _Gradients(ys, xs, **kwargs):
     dydxs = gradients.gradients(ys, xs, **kwargs)
     dydxs = [
         0. * x if dydx is None else dydx for x, dydx in zip(xs, dydxs)
     ]
     return dydxs
コード例 #60
0
 def loop_fn(i):
   x1 = array_ops.gather(x, i)
   y = op(x1)
   loss = math_ops.reduce_sum(y * y)
   return op(x), y, gradient_ops.gradients(loss, x1)