Beispiel #1
0
  def Test(self):
    if not use_static_shape_ or a_np_.dtype in (np.int32, np.int64, np.float16):
      self.skipTest("Skipping infeasible gradient test.")

    # Transpose and possibly conjugate a_np_ and b_np_ according to the
    # attributes such that tf.matmul(effective_a_np, effective_b_np, **kwargs)
    # results in a valid matrix multiplication and produces the same result as
    # np.matrix(a_np_) * np.matrix(b_np_)
    effective_a_np = _GetTransposedMatrices(a_np_, "a", kwargs_)
    effective_b_np = _GetTransposedMatrices(b_np_, "b", kwargs_)

    epsilon = np.finfo(a_np_.dtype).eps
    delta = epsilon**(1.0 / 3.0)
    tol = 20 * delta
    with self.session(), test_util.use_gpu():
      theoretical, numerical = gradient_checker_v2.compute_gradient(
          lambda x: math_ops.matmul(x, effective_b_np, **kwargs_),
          [effective_a_np],
          delta=delta)
      self.assertAllClose(theoretical, numerical, rtol=tol, atol=tol)

      theoretical, numerical = gradient_checker_v2.compute_gradient(
          lambda x: math_ops.matmul(effective_a_np, x, **kwargs_),
          [effective_b_np],
          delta=delta)
      self.assertAllClose(theoretical, numerical, rtol=tol, atol=tol)
 def testEmptySucceeds(self):
   def f(x):
     return array_ops.identity(x)
   x = constant_op.constant(np.random.random_sample((0, 3)),
                            dtype=dtypes.float32)
   for grad in gradient_checker.compute_gradient(f, [x]):
     self.assertEqual(grad[0].shape, (0, 0))
   error = gradient_checker.max_error(*gradient_checker.compute_gradient(
       f, [x]))
   self.assertEqual(error, 0)
 def testComplexConj(self):
   def f(x):
     return math_ops.conj(x)
   x = constant_op.constant(11 - 13j, dtype=dtypes.complex64)
   analytical, numerical = gradient_checker.compute_gradient(
       f, [x], delta=0.1)
   correct = np.array([[1, 0], [0, -1]])
   self.assertAllEqual(correct, analytical[0])
   self.assertAllClose(correct, numerical[0], rtol=2e-5)
   self.assertLess(
       gradient_checker.max_error(*gradient_checker.compute_gradient(
           f, [x], delta=0.1)), 2e-5)
 def testComplexMul(self):
   if not context.executing_eagerly():
     return
   c = constant_op.constant(5 + 7j, dtype=dtypes.complex64)
   def f(x):
     return c * x
   x = constant_op.constant(11 - 13j, dtype=dtypes.complex64)
   analytical, numerical = gradient_checker.compute_gradient(
       f, [x], delta=0.1)
   correct = np.array([[5, 7], [-7, 5]])
   self.assertAllEqual(correct, analytical[0])
   self.assertAllClose(correct, numerical[0], rtol=1e-4)
   self.assertLess(
       gradient_checker.max_error(*gradient_checker.compute_gradient(
           f, [x], delta=0.1)), 2e-4)
 def testComplexConj(self):
   def f(x):
     return math_ops.conj(x)
   x_shape = ()
   x_dtype = dtypes.complex64
   x = constant_op.constant(_random_complex(x_shape, x_dtype))
   analytical, numerical = gradient_checker.compute_gradient(
       f, [x])
   correct = np.array([[1, 0], [0, -1]])
   self.assertAllEqual(correct, analytical[0])
   self.assertAllClose(correct, numerical[0], rtol=2e-5)
   x = constant_op.constant(_random_complex(x_shape, x_dtype))
   self.assertLess(
       gradient_checker.max_error(*gradient_checker.compute_gradient(
           f, [x])), 2e-5)
 def testComplexMul(self):
   c = constant_op.constant(5 + 7j, dtype=dtypes.complex64)
   def f(x):
     return c * x
   x_shape = c.shape
   x_dtype = c.dtype
   x = constant_op.constant(_random_complex(x_shape, x_dtype))
   analytical, numerical = gradient_checker.compute_gradient(
       f, [x])
   correct = np.array([[5, 7], [-7, 5]])
   self.assertAllEqual(correct, analytical[0])
   self.assertAllClose(correct, numerical[0], rtol=1e-4)
   x = constant_op.constant(_random_complex(x_shape, x_dtype))
   self.assertLess(
       gradient_checker.max_error(*gradient_checker.compute_gradient(
           f, [x])), 3e-4)
 def testEmptyFails(self):
   @custom_gradient.custom_gradient
   def id_bad_grad(x):
     y = array_ops.identity(x)
     def grad_fn(dy):
       # dx = constant_op.constant(np.zeros((1, 4)), dtype=dtypes.float32)
       dx = array_ops.transpose(dy)
       return dx
     return y, grad_fn
   def f(x):
     return id_bad_grad(x)
   x = constant_op.constant(np.random.random_sample((0, 3)),
                            dtype=dtypes.float32)
   bad = r"Empty gradient has wrong shape: expected \(0, 3\), got \(3, 0\)"
   with self.assertRaisesRegexp(ValueError, bad):
     gradient_checker.compute_gradient(f, [x])
  def _gradientTest(self, diags, rhs, dtype=dtypes.float64):

    def reference_matmul(diags, rhs):
      matrix = self._makeTridiagonalMatrix(diags[..., 0, :-1], diags[..., 1, :],
                                           diags[..., 2, 1:])
      return math_ops.matmul(matrix, rhs)

    diags = constant_op.constant(diags, dtype=dtype)
    rhs = constant_op.constant(rhs, dtype=dtype)
    with self.cached_session(use_gpu=True):
      grad_reference, _ = gradient_checker_v2.compute_gradient(
          reference_matmul, [diags, rhs])
      grad_theoretical, grad_numerical = gradient_checker_v2.compute_gradient(
          linalg_impl.tridiagonal_matmul, [diags, rhs])
    self.assertAllClose(grad_theoretical, grad_numerical)
    self.assertAllClose(grad_theoretical, grad_reference)
Beispiel #9
0
 def testGradientFloat64(self):
   with self.cached_session():
     x_val = [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]]
     x = np.asarray(x_val, dtype=np.float64, order="F")
     err = gradient_checker_v2.max_error(
         *gradient_checker_v2.compute_gradient(nn_ops.selu, [x]))
   print("selu (float64) gradient err = ", err)
   self.assertLess(err, 1e-6)
 def testAddSimple(self):
   size = (2, 3)
   x1 = constant_op.constant(2.0, shape=size, name="x1")
   x2 = constant_op.constant(3.0, shape=size, name="x2")
   error = gradient_checker.max_error(*gradient_checker.compute_gradient(
       lambda x1: math_ops.add(x1, x2), [x1]))
   tf_logging.info("x1 error = %f", error)
   assert error < 1e-4
 def testBroadcastingWithGradientChecker(self):
   for dtype in [dtypes.float32, dtypes.float64]:
     with self.cached_session():
       x1 = np.array([-1, 0, 1, 2, 3], dtype=dtype.as_numpy_dtype)
       x2 = np.array([2], dtype=dtype.as_numpy_dtype)
       err = gradient_checker_v2.max_error(
           *gradient_checker_v2.compute_gradient(
               lambda x: math_ops.nextafter(x, x2), [x1]))  # pylint: disable=cell-var-from-loop
       self.assertLess(err, 1e-3)
Beispiel #12
0
 def testGradientFloat32(self):
   with self.cached_session():
     x = np.asarray(
         [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
         dtype=np.float32,
         order="F")
     err = gradient_checker_v2.max_error(
         *gradient_checker_v2.compute_gradient(nn_ops.leaky_relu, [x]))
   print("leaky_relu (float32) gradient err = ", err)
   self.assertLess(err, 1e-4)
Beispiel #13
0
 def testGradientFloat64(self):
   with self.cached_session():
     x = np.asarray(
         [[-0.9, -0.7, -0.5, -0.3, -0.1], [6.1, 6.3, 6.5, 6.7, 6.9]],
         dtype=np.float64,
         order="F")
     err = gradient_checker_v2.max_error(
         *gradient_checker_v2.compute_gradient(nn_ops.relu6, [x]))
   print("relu6 (float64) gradient err = ", err)
   self.assertLess(err, 1e-10)
 def testAddCustomized(self):
   size = (2, 3)
   x1 = constant_op.constant(
       2.0, shape=size, dtype=dtypes.float64, name="x1")
   x2 = np.asarray(np.arange(6, dtype=np.float64).reshape(2, 3))
   # checkint gradients for x2 using a special delta
   error = gradient_checker.max_error(*gradient_checker.compute_gradient(
       lambda x2: math_ops.add(x1, x2),
       [x2], delta=1e-2))
   tf_logging.info("x2 error = %f", error)
   assert error < 1e-10
 def testAddSimple(self):
   # if context.executing_eagerly():
   #   return
   np.random.seed(1)  # Fix seed to avoid flakiness
   size = (2, 3)
   x1 = constant_op.constant(2.0, shape=size, name="x1")
   x2 = constant_op.constant(3.0, shape=size, name="x2")
   error = gradient_checker.max_error(*gradient_checker.compute_gradient(
       lambda x1: math_ops.add(x1, x2), [x1]))
   tf_logging.info("x1 error = %f", error)
   assert error < 1e-4
  def testGradGrad(self):

    def f(x):
      with backprop.GradientTape() as tape:
        tape.watch(x)
        y = math_ops.square(x)
        z = math_ops.square(y)
      return tape.gradient(z, x)

    analytical, numerical = gradient_checker.compute_gradient(f, [2.0])
    self.assertAllEqual([[[48.]]], analytical)
    self.assertAllClose([[[48.]]], numerical, rtol=1e-4)
  def Test(self):
    np.random.seed(1)
    n = shape_[-1]
    batch_shape = shape_[:-2]
    np_dtype = dtype_.as_numpy_dtype

    def RandomInput():
      a = np.random.uniform(
          low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype)
      if dtype_.is_complex:
        a += 1j * np.random.uniform(
            low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype)
      a += np.conj(a.T)
      a = np.tile(a, batch_shape + (1, 1))
      return a

    # Optimal stepsize for central difference is O(epsilon^{1/3}).
    epsilon = np.finfo(np_dtype).eps
    delta = 0.1 * epsilon**(1.0 / 3.0)
    # tolerance obtained by looking at actual differences using
    # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build
    # after discarding one random input sample
    _ = RandomInput()
    if dtype_ in (dtypes_lib.float32, dtypes_lib.complex64):
      tol = 1e-2
    else:
      tol = 1e-7
    with self.session(use_gpu=True):
      def Compute(x):
        e, v = linalg_ops.self_adjoint_eig(x)
        # (complex) Eigenvectors are only unique up to an arbitrary phase
        # We normalize the vectors such that the first component has phase 0.
        top_rows = v[..., 0:1, :]
        if dtype_.is_complex:
          angle = -math_ops.angle(top_rows)
          phase = math_ops.complex(math_ops.cos(angle), math_ops.sin(angle))
        else:
          phase = math_ops.sign(top_rows)
        v *= phase
        return e, v

      if compute_v_:
        funcs = [lambda x: Compute(x)[0], lambda x: Compute(x)[1]]
      else:
        funcs = [linalg_ops.self_adjoint_eigvals]

      for f in funcs:
        theoretical, numerical = gradient_checker_v2.compute_gradient(
            f,
            [RandomInput()],
            delta=delta)
        self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol)
 def testGather(self):
   def f(params):
     index_values = [1, 3]
     indices = constant_op.constant(index_values, name="i")
     return array_ops.gather(params, indices, name="y")
   p_shape = (4, 2)
   p_size = 8
   params = constant_op.constant(
       np.arange(p_size).astype(np.float), shape=p_shape, name="p")
   error = gradient_checker.max_error(*gradient_checker.compute_gradient(
       f, [params]))
   tf_logging.info("gather error = %f", error)
   assert error < 1e-4
 def testNestedGather(self):
   def f(params):
     index_values = [1, 3, 5, 6]
     indices = constant_op.constant(index_values, name="i")
     y = array_ops.gather(params, indices, name="y")
     index_values2 = [0, 2]
     indices2 = constant_op.constant(index_values2, name="i2")
     return array_ops.gather(y, indices2, name="y2")
   p_shape = (8, 2)
   p_size = 16
   params = constant_op.constant(
       np.arange(p_size).astype(np.float), shape=p_shape, name="p")
   error = gradient_checker.max_error(*gradient_checker.compute_gradient(
       f, [params]))
   tf_logging.info("nested gather error = %f", error)
   assert error < 1e-4
  def testComplexAbsGradGrad(self):

    def f(x):
      real = math_ops.cos(x)
      imag = ops.convert_to_tensor(1.)
      return math_ops.abs(math_ops.complex(real, imag))

    def g(x):
      with backprop.GradientTape() as t:
        t.watch(x)
        y = f(x)
      return t.gradient(y, x)

    err = gradient_checker_v2.max_error(
        *gradient_checker_v2.compute_gradient(g, [ops.convert_to_tensor(2.0)]))
    self.assertLess(err, 1e-3)
 def _checkGrad(self, x_in, y_in, adjoint_a, adjoint_b):
   x_t_shape = x_in.shape[:-2] + (x_in.shape[-1], x_in.shape[-2])
   y_t_shape = y_in.shape[:-2] + (y_in.shape[-1], y_in.shape[-2])
   x = x_in if not adjoint_a else x_in.reshape(x_t_shape)
   y = y_in if not adjoint_b else y_in.reshape(y_t_shape)
   epsilon = np.finfo(x.dtype).eps
   delta = epsilon**(1.0 / 3.0)
   def Loss(x, y):
     z = math_ops.matmul(x, y, adjoint_a, adjoint_b)
     return math_ops.reduce_sum(z)
   with self.cached_session(use_gpu=True):
     ((x_jacob_t, y_jacob_t),
      (x_jacob_n, y_jacob_n)) = gradient_checker_v2.compute_gradient(
          Loss, [x, y], delta=delta)
     tol = 20 * delta
     self.assertAllClose(x_jacob_t, x_jacob_n, rtol=tol, atol=tol)
     self.assertAllClose(y_jacob_t, y_jacob_n, rtol=tol, atol=tol)
Beispiel #22
0
  def testGradGradFloat64(self):
    with self.cached_session():

      def f(x):
        assert x.dtype == dtypes.float64
        with backprop.GradientTape() as tape:
          tape.watch(x)
          y = nn_ops.selu(x)
        return tape.gradient(y, x)

      x = np.asarray(
          [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
          dtype=np.float64,
          order="F")
      err = gradient_checker_v2.max_error(
          *gradient_checker_v2.compute_gradient(f, [x]))
    print("selu (float64) gradient of gradient err = ", err)
    self.assertLess(err, 1e-6)
Beispiel #23
0
  def testGradGradFloat32(self):
    with compat.forward_compatibility_horizon(2018, 11, 2):
      with self.cached_session():

        def f(x):
          assert x.dtype == dtypes.float32
          with backprop.GradientTape() as tape:
            tape.watch(x)
            y = nn_ops.leaky_relu(x)
          return tape.gradient(y, x)

        x = np.asarray(
            [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
            dtype=np.float32,
            order="F")
        err = gradient_checker_v2.max_error(
            *gradient_checker_v2.compute_gradient(f, [x]))
      print("leaky_relu (float32) gradient of gradient err = ", err)
      self.assertLess(err, 1e-4)
 def testNaNGradFails(self):
   @custom_gradient.custom_gradient
   def id_nan_grad(x):
     y = array_ops.identity(x)
     def grad_fn(dy):
       dx = np.nan * dy
       # dx = dy
       return dx
     return y, grad_fn
   def f(x):
     return id_nan_grad(x)
   x = constant_op.constant(np.random.random_sample((1, 1)),
                            dtype=dtypes.float32)
   error = gradient_checker.max_error(*gradient_checker.compute_gradient(
       f, [x]))
   # Typical test would assert error < max_err, so assert this test would
   # raise AssertionError, since NaN is not < 1.0.
   with self.assertRaisesRegexp(AssertionError, "False is not true"):
     self.assertTrue(error < 1.0)
    def _BuildAndTestMiniMNIST(self, param_index, tag):
        # Fix seed to avoid occasional flakiness
        np.random.seed(6)

        # Hyperparameters
        batch = 3
        inputs = 16
        features = 32
        classes = 10

        # Define the parameters
        inp_data = np.random.random_sample(inputs * batch)
        hidden_weight_data = np.random.randn(
            inputs * features) / np.sqrt(inputs)
        hidden_bias_data = np.random.random_sample(features)
        sm_weight_data = np.random.randn(
            features * classes) / np.sqrt(features)
        sm_bias_data = np.random.random_sample(classes)

        # special care for labels since they need to be normalized per batch
        label_data = np.random.random(batch * classes).reshape(
            (batch, classes))
        s = label_data.sum(axis=1)
        label_data /= s[:, None]

        # We treat the inputs as "parameters" here
        inp = constant_op.constant(inp_data.tolist(),
                                   shape=[batch, inputs],
                                   dtype=dtypes.float64,
                                   name="inp")
        hidden_weight = constant_op.constant(hidden_weight_data.tolist(),
                                             shape=[inputs, features],
                                             dtype=dtypes.float64,
                                             name="hidden_weight")
        hidden_bias = constant_op.constant(hidden_bias_data.tolist(),
                                           shape=[features],
                                           dtype=dtypes.float64,
                                           name="hidden_bias")
        softmax_weight = constant_op.constant(sm_weight_data.tolist(),
                                              shape=[features, classes],
                                              dtype=dtypes.float64,
                                              name="softmax_weight")
        softmax_bias = constant_op.constant(sm_bias_data.tolist(),
                                            shape=[classes],
                                            dtype=dtypes.float64,
                                            name="softmax_bias")

        # List all the parameter so that we can test them one at a time
        all_params = [
            inp, hidden_weight, hidden_bias, softmax_weight, softmax_bias
        ]

        # Now, Building MNIST
        def f(inp, hidden_weight, hidden_bias, softmax_weight, softmax_bias):
            features = nn_ops.relu(nn_ops.xw_plus_b(inp, hidden_weight,
                                                    hidden_bias),
                                   name="features")
            logits = nn_ops.xw_plus_b(features,
                                      softmax_weight,
                                      softmax_bias,
                                      name="logits")
            labels = constant_op.constant(label_data.tolist(),
                                          shape=[batch, classes],
                                          dtype=dtypes.float64,
                                          name="labels")
            cost = nn_ops.softmax_cross_entropy_with_logits(labels=labels,
                                                            logits=logits,
                                                            name="cost")
            return cost

        def f_restricted(x):
            xs = all_params
            i = param_index
            # use x for the i-th parameter
            xs = xs[0:i] + [x] + xs[i + 1:]
            return f(*xs)

        # Test the gradients.
        err = gradient_checker.max_error(*gradient_checker.compute_gradient(
            f_restricted, [all_params[param_index]], delta=1e-5))

        tf_logging.info("Mini MNIST: %s gradient error = %g", tag, err)
        return err
Beispiel #26
0
 def _testGrad(self, f, x):
     max_error = gradient_checker_v2.max_error(
         *gradient_checker_v2.compute_gradient(f, [x]))
     self.assertLess(max_error, 1e-4)
Beispiel #27
0
 def _compute_error():
     return gradient_checker_v2.max_error(
         *gradient_checker_v2.compute_gradient(f, x=args, delta=delta))
Beispiel #28
0
  def _test_backward_pass(self, batch_input_shape, axis, fp64_tol=1e-5,
                          fp32_tol=1e-5, fp16_tol=2e-2):
    """Tests the backwards pass of layer normalization.

    Args:
      batch_input_shape: The input shape that will be used to test, including
        the batch dimension.
      axis: A list of axises to normalize. Will be passed to the `axis` argument
        of LayerNormalization.
      fp64_tol: The relative and absolute tolerance for float64.
      fp32_tol: The relative and absolute tolerance for float32.
      fp16_tol: The relative and absolute tolerance for float16.
    """
    param_shape = [batch_input_shape[i] for i in axis]
    param_elems = 1
    for dim in param_shape:
      param_elems *= dim
    beta = np.arange(param_elems, dtype='float64').reshape(param_shape)
    gamma = np.arange(1, param_elems + 1, dtype='float64').reshape(param_shape)
    x = np.random.normal(size=batch_input_shape)

    for epsilon in 1e-12, 1e-3:
      # Float64 must come first in this list, as we use the float64 numerical
      # gradients to compare to the float32 and float16 symbolic gradients as
      # well. Computing float32/float16 numerical gradients is too numerically
      # unstable.
      for dtype in 'float64', 'float32', 'float16':
        norm = normalization.LayerNormalization(
            axis=axis, dtype=dtype, batch_input_shape=batch_input_shape,
            epsilon=epsilon, beta_initializer=keras.initializers.constant(beta),
            gamma_initializer=keras.initializers.constant(gamma))
        norm.build(x.shape)

        # pylint: disable=cell-var-from-loop
        def forward_fn(x, beta, gamma):
          # We must monkey-patch the attributes of `norm` with the function
          # arguments, so that the gradient checker will properly compute their
          # gradients. The gradient checker computes gradients with respect to
          # the input arguments of `f`.
          with test.mock.patch.object(norm, 'beta', beta):
            with test.mock.patch.object(norm, 'gamma', gamma):
              return norm(x)
        # pylint: enable=cell-var-from-loop
        results = gradient_checker_v2.compute_gradient(
            forward_fn, [keras.backend.cast(x, dtype), norm.beta, norm.gamma])
        ([x_grad_t, beta_grad_t, gamma_grad_t],
         [x_grad_n, beta_grad_n, gamma_grad_n]) = results

        if dtype == 'float64':
          # We use the float64 numeric gradients as the reference, to compare
          # against the symbolic gradients for all dtypes.
          x_grad_ref = x_grad_n
          beta_grad_ref = beta_grad_n
          gamma_grad_ref = gamma_grad_n
          tol = fp64_tol
        elif dtype == 'float32':
          tol = fp32_tol
        else:
          assert dtype == 'float16'
          tol = fp16_tol

        # We use absolute tolerances in addition to relative tolerances, because
        # some of the values are very close to zero.
        self.assertAllClose(x_grad_t, x_grad_ref, rtol=tol, atol=tol)
        self.assertAllClose(beta_grad_t, beta_grad_ref, rtol=tol, atol=tol)
        self.assertAllClose(gamma_grad_t, gamma_grad_ref, rtol=tol, atol=tol)
Beispiel #29
0
 def test_spence_gradient(self):
   inputs = [np.random.uniform(1., 50., size=int(1e2))]
   analytical, numerical = gradient_checker_v2.compute_gradient(
       special_math_ops.spence, inputs)
   self.assertLess(gradient_checker_v2.max_error(analytical, numerical), 1e-4)
  def _BuildAndTestMiniMNIST(self, param_index, tag):
    # Fix seed to avoid occasional flakiness
    np.random.seed(6)

    # Hyperparameters
    batch = 3
    inputs = 16
    features = 32
    classes = 10

    # Define the parameters
    inp_data = np.random.random_sample(inputs * batch)
    hidden_weight_data = np.random.randn(inputs * features) / np.sqrt(inputs)
    hidden_bias_data = np.random.random_sample(features)
    sm_weight_data = np.random.randn(features * classes) / np.sqrt(features)
    sm_bias_data = np.random.random_sample(classes)

    # special care for labels since they need to be normalized per batch
    label_data = np.random.random(batch * classes).reshape((batch, classes))
    s = label_data.sum(axis=1)
    label_data /= s[:, None]

    # We treat the inputs as "parameters" here
    inp = constant_op.constant(
        inp_data.tolist(),
        shape=[batch, inputs],
        dtype=dtypes.float64,
        name="inp")
    hidden_weight = constant_op.constant(
        hidden_weight_data.tolist(),
        shape=[inputs, features],
        dtype=dtypes.float64,
        name="hidden_weight")
    hidden_bias = constant_op.constant(
        hidden_bias_data.tolist(),
        shape=[features],
        dtype=dtypes.float64,
        name="hidden_bias")
    softmax_weight = constant_op.constant(
        sm_weight_data.tolist(),
        shape=[features, classes],
        dtype=dtypes.float64,
        name="softmax_weight")
    softmax_bias = constant_op.constant(
        sm_bias_data.tolist(),
        shape=[classes],
        dtype=dtypes.float64,
        name="softmax_bias")

    # List all the parameter so that we can test them one at a time
    all_params = [
        inp, hidden_weight, hidden_bias, softmax_weight, softmax_bias
    ]

    # Now, Building MNIST
    def f(inp, hidden_weight, hidden_bias, softmax_weight, softmax_bias):
      features = nn_ops.relu(
          nn_ops.xw_plus_b(inp, hidden_weight, hidden_bias), name="features")
      logits = nn_ops.xw_plus_b(
          features, softmax_weight, softmax_bias, name="logits")
      labels = constant_op.constant(
          label_data.tolist(),
          shape=[batch, classes],
          dtype=dtypes.float64,
          name="labels")
      cost = nn_ops.softmax_cross_entropy_with_logits(
          labels=labels, logits=logits, name="cost")
      return cost

    def f_restricted(x):
      xs = all_params
      i = param_index
      # use x for the i-th parameter
      xs = xs[0:i]+[x]+xs[i+1:]
      return f(*xs)
    # Test the gradients.
    err = gradient_checker.max_error(*gradient_checker.compute_gradient(
        f_restricted, [all_params[param_index]], delta=1e-5))

    tf_logging.info("Mini MNIST: %s gradient error = %g", tag, err)
    return err
Beispiel #31
0
 def test_spence_gradient_at_one(self):
   analytical, _ = gradient_checker_v2.compute_gradient(
       special_math_ops.spence, [1.])
   self.assertAllClose([[[-1.]]], analytical)
Beispiel #32
0
  def testGradRandomBoxes(self):
    """Test that the gradient is correct for randomly generated boxes.

    The mapping is piecewise differentiable with respect to the box coordinates.
    The points where the function is not differentiable are those which are
    mapped to image pixels, i.e., the normalized y coordinates in
    np.linspace(0, 1, image_height) and normalized x coordinates in
    np.linspace(0, 1, image_width). Make sure that the box coordinates are
    sufficiently far away from those rectangular grid centers that are points of
    discontinuity, so that the finite difference Jacobian is close to the
    computed one.
    """
    np.random.seed(1)  # Make it reproducible.
    delta = 1e-3
    radius = 2 * delta
    low, high = -0.5, 1.5  # Also covers the case of extrapolation.

    image_height = 4
    for image_width in range(1, 3):
      for crop_height in range(1, 3):
        for crop_width in range(2, 4):
          for depth in range(1, 3):
            for num_boxes in range(1, 3):

              batch = num_boxes
              image_shape = [batch, image_height, image_width, depth]
              crop_size = [crop_height, crop_width]

              image = np.arange(0, batch * image_height * image_width *
                                depth).reshape(image_shape).astype(np.float32)
              boxes = []
              for _ in range(num_boxes):
                # pylint: disable=unbalanced-tuple-unpacking
                y1, y2 = self._randomUniformAvoidAnchors(
                    low, high, np.linspace(0, 1, image_height), radius, 2)
                x1, x2 = self._randomUniformAvoidAnchors(
                    low, high, np.linspace(0, 1, image_width), radius, 2)
                # pylint: enable=unbalanced-tuple-unpacking
                boxes.append([y1, x1, y2, x2])

              boxes = np.array(boxes, dtype=np.float32)
              box_ind = np.arange(batch, dtype=np.int32)

              image_tensor = constant_op.constant(image, shape=image_shape)
              boxes_tensor = constant_op.constant(boxes, shape=[num_boxes, 4])
              box_ind_tensor = constant_op.constant(box_ind, shape=[num_boxes])

              def crop_resize(image_tensor, boxes_tensor):
                # pylint: disable=cell-var-from-loop
                return image_ops.crop_and_resize(
                    image_tensor, boxes_tensor, box_ind_tensor,
                    constant_op.constant(crop_size, shape=[2]))

              with test_util.device(use_gpu=True):
                with self.cached_session():
                  # pylint: disable=cell-var-from-loop
                  if (config.is_op_determinism_enabled() and
                      test_util.is_gpu_available()):
                    with self.assertRaises(errors_impl.UnimplementedError):
                      gradient_checker_v2.compute_gradient(
                          lambda x: crop_resize(x, boxes_tensor),
                          [image_tensor])
                    with self.assertRaises(errors_impl.UnimplementedError):
                      gradient_checker_v2.compute_gradient(
                          lambda x: crop_resize(image_tensor, x),
                          [boxes_tensor])
                  else:
                    err1 = gradient_checker_v2.max_error(
                        *gradient_checker_v2.compute_gradient(
                            lambda x: crop_resize(x, boxes_tensor),
                            [image_tensor]))
                    err2 = gradient_checker_v2.max_error(
                        *gradient_checker_v2.compute_gradient(
                            lambda x: crop_resize(image_tensor, x),
                            [boxes_tensor]))
                    err = max(err1, err2)
                    self.assertLess(err, 2e-3)
Beispiel #33
0
    def testBatchDims(self,
                      params,
                      indices,
                      batch_dims,
                      expected=None,
                      axis=None):
        result = array_ops.gather(params,
                                  indices,
                                  axis=axis,
                                  batch_dims=batch_dims)
        self.assertAllEqual(expected, result)

        # Test gradients
        f64_params = math_ops.cast(params, dtypes.float64)

        def gather(params):
            return array_ops.gather(params,
                                    indices,
                                    axis=axis,
                                    batch_dims=batch_dims)

        theoretical, numerical = gradient_checker_v2.compute_gradient(
            gather, [f64_params])
        self.assertAllClose(theoretical, numerical)

        # Test gradients when input shapes are unknown
        @def_function.function(input_signature=[
            tensor_spec.TensorSpec(shape=None, dtype=dtypes.float64),
            tensor_spec.TensorSpec(shape=None, dtype=dtypes.int32)
        ])
        def gather_unknown_shapes(params, indices):
            return array_ops.gather(params,
                                    indices,
                                    axis=axis,
                                    batch_dims=batch_dims)

        if batch_dims is None or batch_dims >= 0:
            theoretical, numerical = gradient_checker_v2.compute_gradient(
                lambda p: gather_unknown_shapes(p, indices), [f64_params])
            self.assertAllClose(theoretical, numerical)
        else:
            with self.assertRaisesRegex(
                    ValueError,
                    "Currently, it is unsupported to take the gradient of tf.gather"
            ):
                gradient_checker_v2.compute_gradient(
                    lambda p: gather_unknown_shapes(p, indices), [f64_params])

        # Test the gradients shape.
        with backprop.GradientTape() as tape:
            zeros = array_ops.zeros_like(params, dtype=dtypes.float32)
            tape.watch(zeros)
            values = zeros * 2 + zeros
            result = array_ops.gather(values,
                                      indices,
                                      axis=axis,
                                      batch_dims=batch_dims)
        gradients = tape.gradient(result, zeros)

        self.assertAllEqual(array_ops.shape(params),
                            array_ops.shape(gradients))

        # Run the same test for strings.
        params = _to_str_elements(params)
        expected = _to_str_elements(expected)
        result = array_ops.gather(params,
                                  indices,
                                  axis=axis,
                                  batch_dims=batch_dims)

        self.assertAllEqual(expected, result)