def testGradientMatchesSegmentSum(self):
    # Strategy: compute the gradient for UnsortedSegmentSum and SegmentSum
    # and compare the outputs, which should be identical.
    # NB: for this test to work, indices must be valid for SegmentSum, namely
    # it must be sorted, the indices must be contiguous, and num_segments
    # must be max(indices) + 1.
    indices = [0, 0, 1, 1, 1, 2, 3, 4, 5]
    n = len(indices)
    num_cols = 2
    shape = [n, num_cols]
    num_segments = max(indices) + 1
    for dtype in self.differentiable_dtypes:
      with self.cached_session(use_gpu=True):
        tf_x, np_x = self._input(shape, dtype=dtype)
        # Results from UnsortedSegmentSum
        unsorted_s = math_ops.unsorted_segment_sum(
            data=tf_x, segment_ids=indices, num_segments=num_segments)
        unsorted_jacob_t, unsorted_jacob_n = (
            gradient_checker.compute_gradient(tf_x, shape, unsorted_s,
                                              [num_segments, num_cols],
                                              x_init_value=np_x, delta=1))

        # Results from SegmentSum
        sorted_s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
        sorted_jacob_t, sorted_jacob_n = gradient_checker.compute_gradient(
            tf_x,
            shape,
            sorted_s, [num_segments, num_cols],
            x_init_value=np_x,
            delta=1)
      self.assertAllClose(unsorted_jacob_t, sorted_jacob_t)
      self.assertAllClose(unsorted_jacob_n, sorted_jacob_n)
  def _compute_gradient_error_float16(self, x, x32, x_shape, y, y32, y_shape):
    """Computes the gradient error for float16 inputs and/or outputs.

    This returns the same value as gradient_checker.compute_gradient_error. The
    difference is that gradient_checker.compute_gradient_error does not
    numerically compute the gradients in a numerically stable way for float16
    tensors. To fix this, this function requires float32 versions of x and y to
    numerically compute the gradients, to compare with the float16 symbolically
    computed gradients.

    Args:
      x: The input tensor.
      x32: A float32 version of x.
      x_shape: The shape of x.
      y: The output tensor.
      y32: A float32 version of y. Must be calculated based on x32, not x.
      y_shape: The shape of y.

    Returns:
      The maximum error in between the two Jacobians, as in
      gradient_checker.compute_gradient_error.
    """
    x_init_val = np.random.random_sample(x_shape).astype(np.float16)
    x32_init_val = x_init_val.astype(np.float32)

    # TODO(reedwm): Do not perform the unnecessary computations in
    # compute_gradient, since they double the computation time of this function.
    theoretical_grad, _ = gradient_checker.compute_gradient(
        x, x_shape, y, y_shape, delta=1e-3, x_init_value=x_init_val)
    _, numerical_grad = gradient_checker.compute_gradient(
        x32, x_shape, y32, y_shape, delta=1e-3, x_init_value=x32_init_val)
    return np.fabs(theoretical_grad - numerical_grad).max()
 def _compareGradientY(self,
                       x,
                       y,
                       np_func,
                       tf_func,
                       numeric_gradient_type=None):
   z = np_func(x, y)
   zs = list(z.shape)
   with self.cached_session():
     inx = ops.convert_to_tensor(x)
     iny = ops.convert_to_tensor(y)
     if x.dtype in (np.float32, np.float64):
       out = 1.1 * tf_func(inx, iny)
     else:
       out = tf_func(inx, iny)
     ys = list(np.shape(y))
     jacob_t, jacob_n = gradient_checker.compute_gradient(
         iny, ys, out, zs, x_init_value=y)
     if numeric_gradient_type is not None:
       xf = x.astype(numeric_gradient_type)
       yf = y.astype(numeric_gradient_type)
       inxf = ops.convert_to_tensor(xf)
       inyf = ops.convert_to_tensor(yf)
       outf = tf_func(inxf, inyf)
       _, jacob_n = gradient_checker.compute_gradient(
           inyf, ys, outf, zs, x_init_value=yf)
       jacob_n = jacob_n.astype(x.dtype)
   tol = self._GRAD_TOL[dtypes_lib.as_dtype(x.dtype)]
   self.assertAllClose(jacob_t, jacob_n, rtol=tol, atol=tol)
  def _compareGpu(self, x, p, conjugate=False):
    if p is None:
      rank = x.ndim
      perm = (rank - 1) - np.arange(rank)
    else:
      perm = p
    np_ans = self._np_transpose(x, perm)
    if conjugate:
      np_ans = np.conj(np_ans)
    with self.test_session(use_gpu=True):
      inx = ops.convert_to_tensor(x)
      y = array_ops.transpose(inx, p, conjugate=conjugate)
      tf_ans = y.eval()

      self.assertAllEqual(np_ans, tf_ans)
      self.assertShapeEqual(np_ans, y)

      jacob_t = None
      # Gradient check on GPU.
      xs = list(np.shape(x))
      ys = list(np.shape(tf_ans))
      if x.dtype == np.float32:
        jacob_t, jacob_n = gradient_checker.compute_gradient(inx, xs, y, ys, x,
                                                             1e-2)
        self.assertAllClose(jacob_t, jacob_n, 1e-3, 1e-3)
      elif x.dtype == np.float64:
        jacob_t, jacob_n = gradient_checker.compute_gradient(inx, xs, y, ys, x,
                                                             1e-2)
        self.assertAllClose(jacob_t, jacob_n, 1e-6, 1e-6)

      return tf_ans, jacob_t
  def _compareCpu(self, x, p, conjugate=False):
    np_ans = self._np_transpose(x, p)
    if conjugate:
      np_ans = np.conj(np_ans)
    with self.test_session(use_gpu=False):
      inx = ops.convert_to_tensor(x)
      y = array_ops.transpose(inx, p, conjugate=conjugate)
      tf_ans = y.eval()
      self.assertShapeEqual(np_ans, y)
      self.assertAllEqual(np_ans, tf_ans)

      jacob_t = None
      # Gradient check on CPU.
      xs = list(np.shape(x))
      ys = list(np.shape(tf_ans))
      if x.dtype in [np.float32, np.complex64]:
        jacob_t, jacob_n = gradient_checker.compute_gradient(inx, xs, y, ys, x,
                                                             1e-2)
        self.assertAllClose(jacob_t, jacob_n, 1e-3, 1e-3)
      elif x.dtype in [np.float64, np.complex128]:
        jacob_t, jacob_n = gradient_checker.compute_gradient(inx, xs, y, ys, x,
                                                             1e-2)
        self.assertAllClose(jacob_t, jacob_n, 1e-6, 1e-6)

      return tf_ans, jacob_t
 def testEmptyFails(self):
   with ops.Graph().as_default() as g:
     with self.session(graph=g):
       x = array_ops.placeholder(dtypes.float32)
       with g.gradient_override_map({"Identity": "BadGrad"}):
         y = array_ops.identity(x)
       bad = r"Empty gradient has wrong shape: expected \(0, 3\), got \(3, 0\)"
       with self.assertRaisesRegexp(ValueError, bad):
         gradient_checker.compute_gradient(x, (0, 3), y, (0, 3))
       with self.assertRaisesRegexp(ValueError, bad):
         gradient_checker.compute_gradient_error(x, (0, 3), y, (0, 3))
  def Test(self):
    with self.session(use_gpu=True):
      np.random.seed(1)
      a_np = np.random.uniform(
          low=-1.0, high=1.0,
          size=np.prod(shape_)).reshape(shape_).astype(dtype_)
      a = constant_op.constant(a_np)
      if functor_.__name__ == 'matrix_square_root':
        # Square the input matrix to ensure that its matrix square root exists
        a = math_ops.matmul(a, a)
        a_np = a.eval()
      b = functor_(a, **kwargs_)

      # Optimal stepsize for central difference is O(epsilon^{1/3}).
      epsilon = np.finfo(dtype_).eps
      delta = epsilon**(1.0 / 3.0)
      # tolerance obtained by looking at actual differences using
      # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build
      tol = 1e-6 if dtype_ == np.float64 else 0.05

      theoretical, numerical = gradient_checker.compute_gradient(
          a,
          a.get_shape().as_list(),
          b,
          b.get_shape().as_list(),
          x_init_value=a_np,
          delta=delta)
      self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol)
  def testGradientWithEmptySegmentsAtEnd(self):
    shape = [10, 4]

    num_segments = 5
    segment_indices = [0, 1, 2, 2]
    num_indices = len(segment_indices)
    for tf_op in [
        math_ops.sparse_segment_sum_with_num_segments,
        math_ops.sparse_segment_mean_with_num_segments,
    ]:
      with self.cached_session():
        tf_indices, _, tf_x, np_x = self._sparse_input(
            shape, num_indices, dtype=dtypes_lib.float64)
        s = tf_op(
            data=tf_x,
            indices=tf_indices,
            segment_ids=segment_indices,
            num_segments=num_segments)
        jacob_t, jacob_n = gradient_checker.compute_gradient(
            tf_x,
            shape,
            s, [5, 4],
            x_init_value=np_x.astype(np.double),
            delta=1)
      self.assertAllClose(jacob_t, jacob_n)
    def test_high_dim_filter_grad(self):
        x_shape = [5, 10, 10]

        # Test inputs: unaries and RGB values
        unary_np = np.random.randn(*x_shape).astype(np.float32)
        rgb_np = np.random.randint(low=0, high=256, size=x_shape).astype(np.float32)

        with self.test_session():
            unary_tf = constant_op.constant(unary_np)
            rgb_tf = constant_op.constant(rgb_np)
            y_tf = custom_module.high_dim_filter(unary_tf, rgb_tf,
                                                 bilateral=True,
                                                 theta_alpha=1000.,
                                                 theta_beta=1000.,
                                                 theta_gamma=1000.)

            out = gradient_checker.compute_gradient([unary_tf, rgb_tf], [x_shape, x_shape],
                                                    y_tf, x_shape)

            # We only need to compare gradients w.r.t. unaries
            computed = out[0][0].flatten()
            estimated = out[0][1].flatten()

            mask = (computed != 0)
            computed = computed[mask]
            estimated = estimated[mask]
            difference = computed - estimated

            measure1 = np.mean(difference) / np.mean(computed)
            measure2 = np.max(difference) / np.max(computed)

            print('Gradient check: measure1 = {:.6f}, measure2 = {:.6f}'.format(measure1, measure2))
            self.assertLess(measure1, 1e-3, 'Errors found in the gradient computation.')
            self.assertLess(measure2, 2e-2, 'Errors found in the gradient computation.')
            print('Gradient check: success!')
Exemple #10
0
  def _compareMulGradient(self, data):
    # data is a float matrix of shape [n, 4].  data[:, 0], data[:, 1],
    # data[:, 2], data[:, 3] are real parts of x, imaginary parts of
    # x, real parts of y and imaginary parts of y.
    with self.cached_session():
      inp = ops.convert_to_tensor(data)
      xr, xi, yr, yi = array_ops.split(value=inp, num_or_size_splits=4, axis=1)

      def vec(x):  # Reshape to a vector
        return array_ops.reshape(x, [-1])

      xr, xi, yr, yi = vec(xr), vec(xi), vec(yr), vec(yi)

      def cplx(r, i):  # Combine to a complex vector
        return math_ops.complex(r, i)

      x, y = cplx(xr, xi), cplx(yr, yi)
      # z is x times y in complex plane.
      z = x * y
      # Defines the loss function as the sum of all coefficients of z.
      loss = math_ops.reduce_sum(math_ops.real(z) + math_ops.imag(z))
      epsilon = 0.005
      jacob_t, jacob_n = gradient_checker.compute_gradient(
          inp, list(data.shape), loss, [1], x_init_value=data, delta=epsilon)
    self.assertAllClose(jacob_t, jacob_n, rtol=epsilon, atol=epsilon)
 def Test(self):
   np.random.seed(42)
   a = np.random.uniform(low=-1.0, high=1.0, size=shape_).astype(dtype_)
   if dtype_ in [np.complex64, np.complex128]:
     a += 1j * np.random.uniform(
         low=-1.0, high=1.0, size=shape_).astype(dtype_)
   # Optimal stepsize for central difference is O(epsilon^{1/3}).
   epsilon = np.finfo(dtype_).eps
   delta = 0.1 * epsilon**(1.0 / 3.0)
   if dtype_ in [np.float32, np.complex64]:
     tol = 3e-2
   else:
     tol = 1e-6
   with self.session(use_gpu=True):
     tf_a = constant_op.constant(a)
     tf_b = linalg_ops.qr(tf_a, full_matrices=full_matrices_)
     for b in tf_b:
       x_init = np.random.uniform(
           low=-1.0, high=1.0, size=shape_).astype(dtype_)
       if dtype_ in [np.complex64, np.complex128]:
         x_init += 1j * np.random.uniform(
             low=-1.0, high=1.0, size=shape_).astype(dtype_)
       theoretical, numerical = gradient_checker.compute_gradient(
           tf_a,
           tf_a.get_shape().as_list(),
           b,
           b.get_shape().as_list(),
           x_init_value=x_init,
           delta=delta)
       self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol)
  def Test(self):
    # TODO(rmlarsen): Debug illegal address bug on CUDA and re-enable
    # GPU test for matrix_solve.
    use_gpu = False if functor_ == linalg_ops.matrix_solve else True

    with self.test_session(use_gpu=use_gpu):
      np.random.seed(1)
      a_np = np.random.uniform(
          low=-1.0, high=1.0,
          size=np.prod(shape_)).reshape(shape_).astype(dtype_)
      a = constant_op.constant(a_np)

      b_np = np.random.uniform(
          low=-1.0, high=1.0,
          size=np.prod(shape_)).reshape(shape_).astype(dtype_)
      b = constant_op.constant(b_np)
      c = functor_(a, b, **kwargs_)

      # Optimal stepsize for central difference is O(epsilon^{1/3}).
      epsilon = np.finfo(dtype_).eps
      delta = epsilon**(1.0 / 3.0)
      # tolerance obtained by looking at actual differences using
      # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build
      tol = 1e-6 if dtype_ == np.float64 else float32_tol_fudge * 0.04
      # The gradients for a and b may be of very different magnitudes,
      # so to not get spurious failures we test them separately.
      for factor, factor_init in [a, a_np], [b, b_np]:
        theoretical, numerical = gradient_checker.compute_gradient(
            factor,
            factor.get_shape().as_list(),
            c,
            c.get_shape().as_list(),
            x_init_value=factor_init,
            delta=delta)
        self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol)
  def Test(self):
    if not use_static_shape_ or a_np_.dtype in (np.int32, np.float16):
      self.skipTest("Skipping infeasible gradient test.")

    # Transpose and possibly conjugate a_np_ and b_np_ according to the
    # attributes such that tf.matmul(effective_a_np, effective_b_np, **kwargs)
    # results in a valid matrix multiplication and produces the same result as
    # np.matrix(a_np_) * np.matrix(b_np_)
    effective_a_np = _GetTransposedMatrices(a_np_, "a", kwargs_)
    effective_b_np = _GetTransposedMatrices(b_np_, "b", kwargs_)

    epsilon = np.finfo(a_np_.dtype).eps
    delta = epsilon**(1.0 / 3.0)
    tol = 20 * delta
    with self.test_session(use_gpu=True):
      a = constant_op.constant(effective_a_np)
      b = constant_op.constant(effective_b_np)
      res = math_ops.matmul(a, b, **kwargs_)
      for x, x_init in [a, effective_a_np], [b, effective_b_np]:
        theoretical, numerical = gradient_checker.compute_gradient(
            x,
            x_init.shape,
            res, [a_np_.shape[0], b_np_.shape[1]],
            x_init_value=x_init,
            delta=delta)
        self.assertAllClose(theoretical, numerical, rtol=tol, atol=tol)
 def Test(self):
   np.random.seed(1)
   n = shape_[-1]
   batch_shape = shape_[:-2]
   a = np.random.uniform(
       low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(dtype_)
   a += np.conj(a.T)
   a = np.tile(a, batch_shape + (1, 1))
   # Optimal stepsize for central difference is O(epsilon^{1/3}).
   epsilon = np.finfo(dtype_).eps
   delta = 0.1 * epsilon**(1.0 / 3.0)
   # tolerance obtained by looking at actual differences using
   # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build
   if dtype_ == np.float32:
     tol = 1e-2
   else:
     tol = 1e-7
   with self.test_session():
     tf_a = constant_op.constant(a)
     tf_e, tf_v = linalg_ops.self_adjoint_eig(tf_a)
     for b in tf_e, tf_v:
       x_init = np.random.uniform(
           low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(dtype_)
       x_init += np.conj(x_init.T)
       x_init = np.tile(x_init, batch_shape + (1, 1))
       theoretical, numerical = gradient_checker.compute_gradient(
           tf_a,
           tf_a.get_shape().as_list(),
           b,
           b.get_shape().as_list(),
           x_init_value=x_init,
           delta=delta)
       self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol)
  def _compareCpu(self, x, np_func, tf_func, grad_rtol=None, grad_atol=None):
    if grad_rtol is None:
      grad_rtol = _default_tolerance(x.dtype)
    if grad_atol is None:
      grad_atol = _default_tolerance(x.dtype)
    np_ans = np_func(x)
    with self.test_session(use_gpu=False):
      inx = ops.convert_to_tensor(x)
      if x.dtype in (np.float32, np.float64,
                     dtypes_lib.bfloat16.as_numpy_dtype):
        y = 1.1 * tf_func(inx)
        np_ans *= 1.1
      else:
        y = tf_func(inx)
      tf_cpu = y.eval()
      self.assertShapeEqual(np_ans, y)
      if x.dtype == np.float16:
        self.assertAllClose(np_ans, tf_cpu, rtol=1e-3, atol=1e-3)
      elif x.dtype == dtypes_lib.bfloat16.as_numpy_dtype:
        self.assertAllClose(np_ans, tf_cpu, rtol=1e-2, atol=1e-2)
      else:
        self.assertAllClose(np_ans, tf_cpu)

      if x.dtype in (np.complex64, np.complex128) and tf_func == math_ops.sign:
        return  # Return early

      if x.dtype == np.float16:
        s = list(np.shape(x))
        jacob_t, _ = gradient_checker.compute_gradient(
            inx, s, y, s, x_init_value=x)
        xf = x.astype(np.float)
        inxf = ops.convert_to_tensor(xf)
        yf = tf_func(inxf)
        _, jacob_n = gradient_checker.compute_gradient(
            inxf, s, yf, s, x_init_value=xf, delta=1e-2)
        jacob_n = jacob_n.astype(np.float16)
        self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol)
      elif x.dtype in (np.float32, np.complex64):
        s = list(np.shape(x))
        jacob_t, jacob_n = gradient_checker.compute_gradient(
            inx, s, y, s, x_init_value=x, delta=1e-3)
        self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol)
      elif x.dtype in (np.float64, np.complex128):
        s = list(np.shape(x))
        jacob_t, jacob_n = gradient_checker.compute_gradient(
            inx, s, y, s, x_init_value=x, delta=1e-5)
        self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol)
 def _compareGradient(self, shape, axis, exclusive, reverse):
   x = np.arange(1, 9).reshape(shape).astype(np.float64)
   with self.cached_session(use_gpu=True):
     t = ops.convert_to_tensor(x)
     result = math_ops.cumprod(t, axis, exclusive, reverse)
     jacob_t, jacob_n = gradient_checker.compute_gradient(
         t, shape, result, shape, x_init_value=x, delta=1)
   self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
 def testEmptySucceeds(self):
   with self.cached_session():
     x = array_ops.placeholder(dtypes.float32)
     y = array_ops.identity(x)
     for grad in gradient_checker.compute_gradient(x, (0, 3), y, (0, 3)):
       self.assertEqual(grad.shape, (0, 0))
     error = gradient_checker.compute_gradient_error(x, (0, 3), y, (0, 3))
     self.assertEqual(error, 0)
 def testGradient4(self):
   s = [2, 3, 4, 2]
   x = np.arange(1.0, 49.0).reshape(s).astype(np.float64)
   with self.test_session():
     t = ops.convert_to_tensor(x)
     su = math_ops.reduce_max(t)
     jacob_t, jacob_n = gradient_checker.compute_gradient(
         t, s, su, [1], x_init_value=x, delta=1)
   self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
Exemple #19
0
 def _compareBroadcastGradient(self, x):
   x_ = ops.convert_to_tensor(x)
   epsilon = 1e-3
   with self.cached_session():
     for args in [(x_, 0.), (0., x_)]:
       z = math_ops.reduce_sum(math_ops.abs(math_ops.complex(*args)))
       jacob_t, jacob_n = gradient_checker.compute_gradient(
           x_, list(x.shape), z, [1], x_init_value=x, delta=epsilon)
       self.assertAllClose(jacob_t, jacob_n, rtol=epsilon, atol=epsilon)
  def _test_grad_accuracy(self, dtype, grid_spec, error_spec):
    raw_grid = _make_grid(dtype, grid_spec)
    grid = ops.convert_to_tensor(raw_grid)
    with self.cached_session():
      fn = sm.log_ndtr if self._use_log else sm.ndtr

      # If there are N points in the grid,
      # grad_eval.shape = (N, N), with grad_eval[i, j] the partial derivative of
      # the ith output point w.r.t. the jth grid point.  We only expect the
      # diagonal to be nonzero.
      # TODO(b/31131137): Replace tf.compat.v1.test.compute_gradient with our
      # own custom gradient evaluation to ensure we correctly handle small
      # function delta.
      grad_eval, _ = gradient_checker.compute_gradient(grid, grid_spec.shape,
                                                       fn(grid),
                                                       grid_spec.shape)
      grad_eval = np.diag(grad_eval)

      # Check for NaN separately in order to get informative failures.
      self.assert_all_false(np.isnan(grad_eval))
      self.assert_all_true(grad_eval > 0.)
      # isfinite checks for NaN and Inf.
      self.assert_all_true(np.isfinite(grad_eval))

      # Do the same checks but explicitly compute the gradient.
      # (We did this because we're not sure if we trust
      # tf.test.compute_gradient.)
      grad_eval = gradients_impl.gradients(fn(grid), grid)[0].eval()
      self.assert_all_false(np.isnan(grad_eval))
      if self._use_log:
        g = np.reshape(grad_eval, [-1])
        half = np.ceil(len(g) / 2)
        self.assert_all_true(g[:int(half)] > 0.)
        self.assert_all_true(g[int(half):] >= 0.)
      else:
        # The ndtr gradient will only be non-zero in the range [-14, 14] for
        # float32 and [-38, 38] for float64.
        self.assert_all_true(grad_eval >= 0.)
      # isfinite checks for NaN and Inf.
      self.assert_all_true(np.isfinite(grad_eval))

      # Versus scipy.
      if not (special and stats):
        return

      expected = stats.norm.pdf(raw_grid)
      if self._use_log:
        expected /= special.ndtr(raw_grid)
        expected[np.isnan(expected)] = 0.
      # Scipy prematurely goes to zero at some places that we don't.  So don't
      # include these in the comparison.
      self.assertAllClose(
          expected.astype(np.float64)[expected < 0],
          grad_eval.astype(np.float64)[expected < 0],
          rtol=error_spec.rtol,
          atol=error_spec.atol)
 def _compareGradient(self, x, reduction_axes, rtol=1e-8, atol=1e-8):
   if reduction_axes is not None and np.shape(reduction_axes) == (1,):
     # Test scalar reduction_axes argument
     self._compareGradient(x, reduction_axes[0], rtol=rtol, atol=atol)
   with self.test_session(use_gpu=True):
     t = ops.convert_to_tensor(x)
     su = self._tf_reduce(t, reduction_axes, False)
     jacob_t, jacob_n = gradient_checker.compute_gradient(
         t, x.shape, su, su.get_shape().as_list(), x_init_value=x, delta=1)
   self.assertAllClose(jacob_t, jacob_n, rtol=rtol, atol=atol)
 def _testGradient(self, x, a, mode):
     with self.test_session(use_gpu=True):
         inx = ops.convert_to_tensor(x)
         xs = list(x.shape)
         ina = ops.convert_to_tensor(a)
         y = array_ops.pad(inx, ina, mode=mode)
         # Expected y's shape to be:
         ys = list(np.array(x.shape) + np.sum(np.array(a), axis=1))
         jacob_t, jacob_n = gradient_checker.compute_gradient(inx, xs, y, ys, x_init_value=x)
     self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5)
 def _testGradient(self, np_input, shift, axis):
   with self.test_session():
     inx = constant_op.constant(np_input.tolist())
     xs = list(np_input.shape)
     y = manip_ops.roll(inx, shift, axis)
     # Expected y's shape to be the same
     ys = xs
     jacob_t, jacob_n = gradient_checker.compute_gradient(
         inx, xs, y, ys, x_init_value=np_input)
     self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5)
 def testComplexConj(self):
     with self.test_session():
         size = ()
         x = constant_op.constant(11 - 13j, dtype=dtypes.complex64)
         y = math_ops.conj(x)
         analytical, numerical = gradient_checker.compute_gradient(x, size, y, size)
         correct = np.array([[1, 0], [0, -1]])
         self.assertAllEqual(correct, analytical)
         self.assertAllClose(correct, numerical, rtol=3e-6)
         self.assertLess(gradient_checker.compute_gradient_error(x, size, y, size), 2e-5)
 def Test(self):
   np.random.seed(1)
   n = shape_[-1]
   batch_shape = shape_[:-2]
   np_dtype = dtype_.as_numpy_dtype
   a = np.random.uniform(
       low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype)
   if dtype_.is_complex:
     a += 1j * np.random.uniform(
         low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype)
   a += np.conj(a.T)
   a = np.tile(a, batch_shape + (1, 1))
   # Optimal stepsize for central difference is O(epsilon^{1/3}).
   epsilon = np.finfo(np_dtype).eps
   delta = 0.1 * epsilon**(1.0 / 3.0)
   # tolerance obtained by looking at actual differences using
   # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build
   if dtype_ in (dtypes_lib.float32, dtypes_lib.complex64):
     tol = 1e-2
   else:
     tol = 1e-7
   with self.session(use_gpu=True):
     tf_a = constant_op.constant(a)
     if compute_v_:
       tf_e, tf_v = linalg_ops.self_adjoint_eig(tf_a)
       # (complex) Eigenvectors are only unique up to an arbitrary phase
       # We normalize the vectors such that the first component has phase 0.
       top_rows = tf_v[..., 0:1, :]
       if tf_a.dtype.is_complex:
         angle = -math_ops.angle(top_rows)
         phase = math_ops.complex(math_ops.cos(angle), math_ops.sin(angle))
       else:
         phase = math_ops.sign(top_rows)
       tf_v *= phase
       outputs = [tf_e, tf_v]
     else:
       tf_e = linalg_ops.self_adjoint_eigvals(tf_a)
       outputs = [tf_e]
     for b in outputs:
       x_init = np.random.uniform(
           low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype)
       if dtype_.is_complex:
         x_init += 1j * np.random.uniform(
             low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype)
       x_init += np.conj(x_init.T)
       x_init = np.tile(x_init, batch_shape + (1, 1))
       theoretical, numerical = gradient_checker.compute_gradient(
           tf_a,
           tf_a.get_shape().as_list(),
           b,
           b.get_shape().as_list(),
           x_init_value=x_init,
           delta=delta)
       self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol)
 def _compareGradient(self, shape, sum_shape, reduction_axes):
   if reduction_axes is not None and np.shape(reduction_axes) == (1,):
     # Test scalar reduction_axes argument
     self._compareGradient(shape, sum_shape, reduction_axes[0])
   x = np.arange(1.0, 49.0).reshape(shape).astype(np.float64)
   with self.test_session():
     t = ops.convert_to_tensor(x)
     su = math_ops.reduce_sum(t, reduction_axes)
     jacob_t, jacob_n = gradient_checker.compute_gradient(
         t, shape, su, sum_shape, x_init_value=x, delta=1)
   self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
 def testComplexMul(self):
     with self.test_session():
         size = ()
         c = constant_op.constant(5 + 7j, dtype=dtypes.complex64)
         x = constant_op.constant(11 - 13j, dtype=dtypes.complex64)
         y = c * x
         analytical, numerical = gradient_checker.compute_gradient(x, size, y, size)
         correct = np.array([[5, 7], [-7, 5]])
         self.assertAllEqual(correct, analytical)
         self.assertAllClose(correct, numerical, rtol=1e-4)
         self.assertLess(gradient_checker.compute_gradient_error(x, size, y, size), 2e-4)
  def testCompareGpuVsCpu(self):
    in_shape = [1, 4, 6, 3]
    out_shape = [1, 8, 16, 3]

    for nptype in self.TYPES:
      x = np.arange(0, np.prod(in_shape)).reshape(in_shape).astype(nptype)
      for align_corners in [True, False]:
        with self.test_session(use_gpu=False):
          input_tensor = constant_op.constant(x, shape=in_shape)
          resize_out = image_ops.resize_nearest_neighbor(
              input_tensor, out_shape[1:3], align_corners=align_corners)
          grad_cpu = gradient_checker.compute_gradient(
              input_tensor, in_shape, resize_out, out_shape, x_init_value=x)

        with self.test_session(use_gpu=True):
          input_tensor = constant_op.constant(x, shape=in_shape)
          resize_out = image_ops.resize_nearest_neighbor(
              input_tensor, out_shape[1:3], align_corners=align_corners)
          grad_gpu = gradient_checker.compute_gradient(
              input_tensor, in_shape, resize_out, out_shape, x_init_value=x)
        self.assertAllClose(grad_cpu, grad_gpu, rtol=1e-5, atol=1e-5)
Exemple #29
0
  def testGradientRandomValues(self):
    with self.cached_session():
      us = [2, 3]
      u = array_ops.reshape(
          [0.854, -0.616, 0.767, 0.725, -0.927, 0.159], shape=us)
      v = array_ops.reshape(
          [-0.522, 0.755, 0.407, -0.652, 0.241, 0.247], shape=us)
      s = math_ops.cross(u, v)
      jacob_u, jacob_v = gradient_checker.compute_gradient([u, v], [us, us], s,
                                                           us)

    self.assertAllClose(jacob_u[0], jacob_u[1], rtol=1e-3, atol=1e-3)
    self.assertAllClose(jacob_v[0], jacob_v[1], rtol=1e-3, atol=1e-3)
  def _compareGradient(self, x):
    with self.test_session():
      t = ops.convert_to_tensor(x)

      su = math_ops.reduce_prod(t, [])
      jacob_t, jacob_n = gradient_checker.compute_gradient(
          t, x.shape, su, [2, 3, 4, 2], x_init_value=x, delta=1)
      self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)

      su = math_ops.reduce_prod(t, [1, 2])
      jacob_t, jacob_n = gradient_checker.compute_gradient(
          t, x.shape, su, [2, 2], x_init_value=x, delta=1)
      self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)

      su = math_ops.reduce_prod(t, [0, 1, 2, 3])
      jacob_t, jacob_n = gradient_checker.compute_gradient(
          t, x.shape, su, [1], x_init_value=x, delta=1)
      self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)

      su = math_ops.reduce_prod(t, 0)
      jacob_t, jacob_n = gradient_checker.compute_gradient(
          t, x.shape, su, [3, 4, 2], x_init_value=x, delta=1)
      self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
Exemple #31
0
    def _compareCpu(self, x, np_func, tf_func, grad_rtol=None, grad_atol=None):
        if grad_rtol is None:
            grad_rtol = _default_tolerance(x.dtype)
        if grad_atol is None:
            grad_atol = _default_tolerance(x.dtype)
        np_ans = np_func(x)
        with self.cached_session(use_gpu=False):
            inx = ops.convert_to_tensor(x)
            if x.dtype in (np.float32, np.float64,
                           dtypes_lib.bfloat16.as_numpy_dtype):
                y = 1.1 * tf_func(inx)
                np_ans *= 1.1
            else:
                y = tf_func(inx)
            tf_cpu = self.evaluate(y)
            self.assertShapeEqual(np_ans, y)
            if x.dtype == np.float16:
                self.assertAllClose(np_ans, tf_cpu, rtol=1e-3, atol=1e-3)
            elif x.dtype == dtypes_lib.bfloat16.as_numpy_dtype:
                self.assertAllClose(np_ans, tf_cpu, rtol=1e-2, atol=1e-2)
            else:
                self.assertAllClose(np_ans, tf_cpu)

            if x.dtype in (np.complex64,
                           np.complex128) and tf_func == math_ops.sign:
                return  # Return early

            if x.dtype == np.float16:
                s = list(np.shape(x))
                jacob_t, _ = gradient_checker.compute_gradient(inx,
                                                               s,
                                                               y,
                                                               s,
                                                               x_init_value=x)
                xf = x.astype(np.float)
                inxf = ops.convert_to_tensor(xf)
                yf = tf_func(inxf)
                _, jacob_n = gradient_checker.compute_gradient(inxf,
                                                               s,
                                                               yf,
                                                               s,
                                                               x_init_value=xf,
                                                               delta=1e-2)
                jacob_n = jacob_n.astype(np.float16)
                self.assertAllClose(jacob_t,
                                    jacob_n,
                                    rtol=grad_rtol,
                                    atol=grad_atol)
            elif x.dtype in (np.float32, np.complex64):
                s = list(np.shape(x))
                jacob_t, jacob_n = gradient_checker.compute_gradient(
                    inx, s, y, s, x_init_value=x, delta=1e-3)
                self.assertAllClose(jacob_t,
                                    jacob_n,
                                    rtol=grad_rtol,
                                    atol=grad_atol)
            elif x.dtype in (np.float64, np.complex128):
                s = list(np.shape(x))
                jacob_t, jacob_n = gradient_checker.compute_gradient(
                    inx, s, y, s, x_init_value=x, delta=1e-5)
                self.assertAllClose(jacob_t,
                                    jacob_n,
                                    rtol=grad_rtol,
                                    atol=grad_atol)
Exemple #32
0
    def _testLargeBatchSparseMatrixMatMulGrad(
        self,
        datatype,
        transpose_a,
        transpose_b,
        adjoint_a,
        adjoint_b,
        transpose_output,
        conjugate_output,
        batched_inputs,
    ):
        if batched_inputs:
            a_shape = (3, 5, 11)
            b_shape = (3, 11, 13)
            transpose = lambda x: np.transpose(x, (0, 2, 1))
        else:
            a_shape = (5, 11)
            b_shape = (11, 13)
            transpose = np.transpose

        sparsify = lambda m: m * (m > 0)
        a_mats_val = sparsify(
            np.random.randn(*a_shape) +
            1.j * np.random.randn(*a_shape)).astype(datatype)
        if transpose_a or adjoint_a:
            a_mats_val = transpose(a_mats_val)
        if adjoint_a:
            a_mats_val = np.conj(a_mats_val)
        b_mats_val = (np.random.randn(*b_shape) +
                      1.j * np.random.randn(*b_shape)).astype(datatype)
        if transpose_b or adjoint_b:
            b_mats_val = transpose(b_mats_val)
        if adjoint_b:
            b_mats_val = np.conj(b_mats_val)
        with self.test_session():
            a_mats = ops.convert_to_tensor(a_mats_val, dtype=datatype)
            b_mats = ops.convert_to_tensor(b_mats_val, dtype=datatype)
            locs = array_ops.where(abs(a_mats_val) > 0)
            a_sm = sparse_csr_matrix_ops.dense_to_csr_sparse_matrix(
                a_mats, locs)
            c_mats = sparse_csr_matrix_ops.sparse_matrix_mat_mul(
                a_sm,
                b_mats,
                transpose_a=transpose_a,
                transpose_b=transpose_b,
                adjoint_a=adjoint_a,
                adjoint_b=adjoint_b,
                transpose_output=transpose_output,
                conjugate_output=conjugate_output)
            for [ten, val, nn] in [[a_mats, a_mats_val, "a"],
                                   [b_mats, b_mats_val, "b"]]:
                tf_logging.info("Testing gradients for %s" % nn)
                theoretical, numerical = gradient_checker.compute_gradient(
                    ten,
                    ten.get_shape().as_list(),
                    c_mats,
                    c_mats.get_shape().as_list(),
                    x_init_value=val,
                    delta=1e-3)
                self.assertAllClose(theoretical,
                                    numerical,
                                    atol=1e-3,
                                    rtol=1e-3)
Exemple #33
0
    def _testGradient(self, np_input, bias, dtype, data_format, use_gpu):
        with self.test_session(use_gpu=use_gpu):
            if data_format == "NCHW":
                np_input = self._NHWCToNCHW(np_input)
            input_tensor = constant_op.constant(np_input,
                                                shape=np_input.shape,
                                                dtype=dtype)
            bias_tensor = constant_op.constant(bias,
                                               shape=bias.shape,
                                               dtype=dtype)
            output_tensor = nn_ops.bias_add(input_tensor,
                                            bias_tensor,
                                            data_format=data_format)
            tensor_jacob_t, tensor_jacob_n = gradient_checker.compute_gradient(
                input_tensor, np_input.shape, output_tensor, np_input.shape)
            bias_jacob_t, bias_jacob_n = gradient_checker.compute_gradient(
                bias_tensor, bias.shape, output_tensor, np_input.shape)

            # Test gradient of BiasAddGrad
            bias_add_grad = gradients_impl.gradients(
                nn_ops.l2_loss(output_tensor), bias_tensor)[0]
            grad_jacob_t, grad_jacob_n = gradient_checker.compute_gradient(
                output_tensor, np_input.shape, bias_add_grad, bias.shape)

            if dtype == np.float16:
                # Compare fp16 theoretical gradients to fp32 numerical gradients,
                # since fp16 numerical gradients are too imprecise unless great
                # care is taken with choosing the inputs and the delta. This is
                # a weaker check (in particular, it does not test the op itself,
                # only its gradient), but it's much better than nothing.
                input_tensor = constant_op.constant(np_input,
                                                    shape=np_input.shape,
                                                    dtype=np.float32)
                bias_tensor = constant_op.constant(bias,
                                                   shape=bias.shape,
                                                   dtype=np.float32)
                output_tensor = nn_ops.bias_add(input_tensor,
                                                bias_tensor,
                                                data_format=data_format)
                _, tensor_jacob_n = gradient_checker.compute_gradient(
                    input_tensor, np_input.shape, output_tensor,
                    np_input.shape)
                _, bias_jacob_n = gradient_checker.compute_gradient(
                    bias_tensor, bias.shape, output_tensor, np_input.shape)

                bias_add_grad = gradients_impl.gradients(
                    nn_ops.l2_loss(output_tensor), bias_tensor)[0]
                _, grad_jacob_n = gradient_checker.compute_gradient(
                    output_tensor, np_input.shape, bias_add_grad, bias.shape)

            threshold = 2e-3
            if dtype == dtypes.float64:
                threshold = 1e-10
            self.assertAllClose(tensor_jacob_t, tensor_jacob_n, threshold,
                                threshold)
            # TODO(annarev): Re-add assertion for float16, float32 dtypes and NCHW
            # once we figure out why this check started failing with cuda mavx.
            if dtype == dtypes.float64 or data_format != "NCHW":
                self.assertAllClose(bias_jacob_t, bias_jacob_n, threshold,
                                    threshold)
                self.assertAllClose(grad_jacob_t, grad_jacob_n, threshold,
                                    threshold)
Exemple #34
0
    def _ConstructAndTestGradientForConfig(self, batch, input_shape,
                                           filter_shape, in_depth, out_depth,
                                           stride, padding, test_input,
                                           data_format, use_gpu):

        input_planes, input_rows, input_cols = input_shape
        filter_planes, filter_rows, filter_cols = filter_shape

        input_shape = [batch, input_planes, input_rows, input_cols, in_depth]
        filter_shape = [
            filter_planes, filter_rows, filter_cols, in_depth, out_depth
        ]

        if isinstance(stride, collections.Iterable):
            strides = [1] + list(stride) + [1]
        else:
            strides = [1, stride, stride, stride, 1]

        if padding == "VALID":
            output_planes = int(
                math.ceil((input_planes - filter_planes + 1.0) / strides[1]))
            output_rows = int(
                math.ceil((input_rows - filter_rows + 1.0) / strides[2]))
            output_cols = int(
                math.ceil((input_cols - filter_cols + 1.0) / strides[3]))
        else:
            output_planes = int(math.ceil(float(input_planes) / strides[1]))
            output_rows = int(math.ceil(float(input_rows) / strides[2]))
            output_cols = int(math.ceil(float(input_cols) / strides[3]))
        output_shape = [
            batch, output_planes, output_rows, output_cols, out_depth
        ]
        input_size = 1
        for x in input_shape:
            input_size *= x
        filter_size = 1
        for x in filter_shape:
            filter_size *= x
        input_data = [x * 1.0 / input_size for x in range(0, input_size)]
        filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]

        for data_type in self._DtypesToTest(use_gpu=use_gpu):
            # TODO(mjanusz): Modify gradient_checker to also provide max relative
            # error and synchronize the tolerance levels between the tests for forward
            # and backward computations.
            if data_type == dtypes.float64:
                tolerance = 1e-8
            elif data_type == dtypes.float32:
                tolerance = 5e-3
            elif data_type == dtypes.float16:
                tolerance = 1e-3

            with self.test_session(use_gpu=use_gpu):
                orig_input_tensor = constant_op.constant(input_data,
                                                         shape=input_shape,
                                                         dtype=data_type,
                                                         name="input")
                filter_tensor = constant_op.constant(filter_data,
                                                     shape=filter_shape,
                                                     dtype=data_type,
                                                     name="filter")

                if data_format == "NCDHW":
                    input_tensor = test_util.NHWCToNCHW(orig_input_tensor)
                    new_strides = test_util.NHWCToNCHW(strides)
                else:
                    input_tensor = orig_input_tensor
                    new_strides = strides

                conv = nn_ops.conv3d(input_tensor,
                                     filter_tensor,
                                     new_strides,
                                     padding,
                                     data_format=data_format,
                                     name="conv")

                if data_format == "NCDHW":
                    conv = test_util.NCHWToNHWC(conv)

                self.assertEqual(conv.shape,
                                 tensor_shape.TensorShape(output_shape))

                if test_input:
                    jacob_t, jacob_n = gradient_checker.compute_gradient(
                        orig_input_tensor, input_shape, conv, output_shape)
                else:
                    jacob_t, jacob_n = gradient_checker.compute_gradient(
                        filter_tensor, filter_shape, conv, output_shape)

                if data_type != dtypes.float16:
                    reference_jacob_t = jacob_t
                    err = np.fabs(jacob_t - jacob_n).max()
                else:
                    # Compare fp16 theoretical gradients to fp32 theoretical gradients,
                    # since fp16 numerical gradients are too imprecise.
                    err = np.fabs(jacob_t - reference_jacob_t).max()

            print("conv3d gradient error = ", err)
            self.assertLess(err, tolerance)