def testGradientFloat64(self): with self.cached_session(): x_val = [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]] x = np.asarray(x_val, dtype=np.float64, order="F") err = gradient_checker_v2.max_error( *gradient_checker_v2.compute_gradient(nn_ops.selu, [x])) print("selu (float64) gradient err = ", err) self.assertLess(err, 1e-6)
def testAddSimple(self): size = (2, 3) x1 = constant_op.constant(2.0, shape=size, name="x1") x2 = constant_op.constant(3.0, shape=size, name="x2") error = gradient_checker.max_error(*gradient_checker.compute_gradient( lambda x1: math_ops.add(x1, x2), [x1])) tf_logging.info("x1 error = %f", error) assert error < 1e-4
def testBroadcastingWithGradientChecker(self): for dtype in [dtypes.float32, dtypes.float64]: with self.cached_session(): x1 = np.array([-1, 0, 1, 2, 3], dtype=dtype.as_numpy_dtype) x2 = np.array([2], dtype=dtype.as_numpy_dtype) err = gradient_checker_v2.max_error( *gradient_checker_v2.compute_gradient( lambda x: math_ops.nextafter(x, x2), [x1])) # pylint: disable=cell-var-from-loop self.assertLess(err, 1e-3)
def testEmptySucceeds(self): def f(x): return array_ops.identity(x) x = constant_op.constant(np.random.random_sample((0, 3)), dtype=dtypes.float32) for grad in gradient_checker.compute_gradient(f, [x]): self.assertEqual(grad[0].shape, (0, 0)) error = gradient_checker.max_error(*gradient_checker.compute_gradient( f, [x])) self.assertEqual(error, 0)
def testGradientFloat32(self): with self.cached_session(): x = np.asarray( [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], dtype=np.float32, order="F") err = gradient_checker_v2.max_error( *gradient_checker_v2.compute_gradient(nn_ops.leaky_relu, [x])) print("leaky_relu (float32) gradient err = ", err) self.assertLess(err, 1e-4)
def testGradientFloat64(self): with self.cached_session(): x = np.asarray( [[-0.9, -0.7, -0.5, -0.3, -0.1], [6.1, 6.3, 6.5, 6.7, 6.9]], dtype=np.float64, order="F") err = gradient_checker_v2.max_error( *gradient_checker_v2.compute_gradient(nn_ops.relu6, [x])) print("relu6 (float64) gradient err = ", err) self.assertLess(err, 1e-10)
def testAddCustomized(self): size = (2, 3) x1 = constant_op.constant( 2.0, shape=size, dtype=dtypes.float64, name="x1") x2 = np.asarray(np.arange(6, dtype=np.float64).reshape(2, 3)) # checkint gradients for x2 using a special delta error = gradient_checker.max_error(*gradient_checker.compute_gradient( lambda x2: math_ops.add(x1, x2), [x2], delta=1e-2)) tf_logging.info("x2 error = %f", error) assert error < 1e-10
def testAddSimple(self): # if context.executing_eagerly(): # return np.random.seed(1) # Fix seed to avoid flakiness size = (2, 3) x1 = constant_op.constant(2.0, shape=size, name="x1") x2 = constant_op.constant(3.0, shape=size, name="x2") error = gradient_checker.max_error(*gradient_checker.compute_gradient( lambda x1: math_ops.add(x1, x2), [x1])) tf_logging.info("x1 error = %f", error) assert error < 1e-4
def testComplexConj(self): def f(x): return math_ops.conj(x) x = constant_op.constant(11 - 13j, dtype=dtypes.complex64) analytical, numerical = gradient_checker.compute_gradient( f, [x], delta=0.1) correct = np.array([[1, 0], [0, -1]]) self.assertAllEqual(correct, analytical[0]) self.assertAllClose(correct, numerical[0], rtol=2e-5) self.assertLess( gradient_checker.max_error(*gradient_checker.compute_gradient( f, [x], delta=0.1)), 2e-5)
def testGather(self): def f(params): index_values = [1, 3] indices = constant_op.constant(index_values, name="i") return array_ops.gather(params, indices, name="y") p_shape = (4, 2) p_size = 8 params = constant_op.constant( np.arange(p_size).astype(np.float), shape=p_shape, name="p") error = gradient_checker.max_error(*gradient_checker.compute_gradient( f, [params])) tf_logging.info("gather error = %f", error) assert error < 1e-4
def testComplexConj(self): def f(x): return math_ops.conj(x) x_shape = () x_dtype = dtypes.complex64 x = constant_op.constant(_random_complex(x_shape, x_dtype)) analytical, numerical = gradient_checker.compute_gradient( f, [x]) correct = np.array([[1, 0], [0, -1]]) self.assertAllEqual(correct, analytical[0]) self.assertAllClose(correct, numerical[0], rtol=2e-5) x = constant_op.constant(_random_complex(x_shape, x_dtype)) self.assertLess( gradient_checker.max_error(*gradient_checker.compute_gradient( f, [x])), 2e-5)
def testComplexMul(self): if not context.executing_eagerly(): return c = constant_op.constant(5 + 7j, dtype=dtypes.complex64) def f(x): return c * x x = constant_op.constant(11 - 13j, dtype=dtypes.complex64) analytical, numerical = gradient_checker.compute_gradient( f, [x], delta=0.1) correct = np.array([[5, 7], [-7, 5]]) self.assertAllEqual(correct, analytical[0]) self.assertAllClose(correct, numerical[0], rtol=1e-4) self.assertLess( gradient_checker.max_error(*gradient_checker.compute_gradient( f, [x], delta=0.1)), 2e-4)
def testComplexAbsGradGrad(self): def f(x): real = math_ops.cos(x) imag = ops.convert_to_tensor(1.) return math_ops.abs(math_ops.complex(real, imag)) def g(x): with backprop.GradientTape() as t: t.watch(x) y = f(x) return t.gradient(y, x) err = gradient_checker_v2.max_error( *gradient_checker_v2.compute_gradient(g, [ops.convert_to_tensor(2.0)])) self.assertLess(err, 1e-3)
def testComplexMul(self): c = constant_op.constant(5 + 7j, dtype=dtypes.complex64) def f(x): return c * x x_shape = c.shape x_dtype = c.dtype x = constant_op.constant(_random_complex(x_shape, x_dtype)) analytical, numerical = gradient_checker.compute_gradient( f, [x]) correct = np.array([[5, 7], [-7, 5]]) self.assertAllEqual(correct, analytical[0]) self.assertAllClose(correct, numerical[0], rtol=1e-4) x = constant_op.constant(_random_complex(x_shape, x_dtype)) self.assertLess( gradient_checker.max_error(*gradient_checker.compute_gradient( f, [x])), 3e-4)
def testNestedGather(self): def f(params): index_values = [1, 3, 5, 6] indices = constant_op.constant(index_values, name="i") y = array_ops.gather(params, indices, name="y") index_values2 = [0, 2] indices2 = constant_op.constant(index_values2, name="i2") return array_ops.gather(y, indices2, name="y2") p_shape = (8, 2) p_size = 16 params = constant_op.constant( np.arange(p_size).astype(np.float), shape=p_shape, name="p") error = gradient_checker.max_error(*gradient_checker.compute_gradient( f, [params])) tf_logging.info("nested gather error = %f", error) assert error < 1e-4
def testGradGradFloat64(self): with self.cached_session(): def f(x): assert x.dtype == dtypes.float64 with backprop.GradientTape() as tape: tape.watch(x) y = nn_ops.selu(x) return tape.gradient(y, x) x = np.asarray( [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], dtype=np.float64, order="F") err = gradient_checker_v2.max_error( *gradient_checker_v2.compute_gradient(f, [x])) print("selu (float64) gradient of gradient err = ", err) self.assertLess(err, 1e-6)
def testGradGradFloat32(self): with compat.forward_compatibility_horizon(2018, 11, 2): with self.cached_session(): def f(x): assert x.dtype == dtypes.float32 with backprop.GradientTape() as tape: tape.watch(x) y = nn_ops.leaky_relu(x) return tape.gradient(y, x) x = np.asarray( [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], dtype=np.float32, order="F") err = gradient_checker_v2.max_error( *gradient_checker_v2.compute_gradient(f, [x])) print("leaky_relu (float32) gradient of gradient err = ", err) self.assertLess(err, 1e-4)
def testNaNGradFails(self): @custom_gradient.custom_gradient def id_nan_grad(x): y = array_ops.identity(x) def grad_fn(dy): dx = np.nan * dy # dx = dy return dx return y, grad_fn def f(x): return id_nan_grad(x) x = constant_op.constant(np.random.random_sample((1, 1)), dtype=dtypes.float32) error = gradient_checker.max_error(*gradient_checker.compute_gradient( f, [x])) # Typical test would assert error < max_err, so assert this test would # raise AssertionError, since NaN is not < 1.0. with self.assertRaisesRegexp(AssertionError, "False is not true"): self.assertTrue(error < 1.0)
def testGradRandomBoxes(self): """Test that the gradient is correct for randomly generated boxes. The mapping is piecewise differentiable with respect to the box coordinates. The points where the function is not differentiable are those which are mapped to image pixels, i.e., the normalized y coordinates in np.linspace(0, 1, image_height) and normalized x coordinates in np.linspace(0, 1, image_width). Make sure that the box coordinates are sufficiently far away from those rectangular grid centers that are points of discontinuity, so that the finite difference Jacobian is close to the computed one. """ np.random.seed(1) # Make it reproducible. delta = 1e-3 radius = 2 * delta low, high = -0.5, 1.5 # Also covers the case of extrapolation. image_height = 4 for image_width in range(1, 3): for crop_height in range(1, 3): for crop_width in range(2, 4): for depth in range(1, 3): for num_boxes in range(1, 3): batch = num_boxes image_shape = [ batch, image_height, image_width, depth ] crop_size = [crop_height, crop_width] image = np.arange( 0, batch * image_height * image_width * depth).reshape(image_shape).astype(np.float32) boxes = [] for _ in range(num_boxes): # pylint: disable=unbalanced-tuple-unpacking y1, y2 = self._randomUniformAvoidAnchors( low, high, np.linspace(0, 1, image_height), radius, 2) x1, x2 = self._randomUniformAvoidAnchors( low, high, np.linspace(0, 1, image_width), radius, 2) # pylint: enable=unbalanced-tuple-unpacking boxes.append([y1, x1, y2, x2]) boxes = np.array(boxes, dtype=np.float32) box_ind = np.arange(batch, dtype=np.int32) image_tensor = constant_op.constant( image, shape=image_shape) boxes_tensor = constant_op.constant( boxes, shape=[num_boxes, 4]) box_ind_tensor = constant_op.constant( box_ind, shape=[num_boxes]) def crop_resize(image_tensor, boxes_tensor): # pylint: disable=cell-var-from-loop return image_ops.crop_and_resize( image_tensor, boxes_tensor, box_ind_tensor, constant_op.constant(crop_size, shape=[2])) with test_util.device(use_gpu=True): with self.cached_session(): # pylint: disable=cell-var-from-loop err1 = gradient_checker_v2.max_error( *gradient_checker_v2.compute_gradient( lambda x: crop_resize( x, boxes_tensor), [image_tensor])) err2 = gradient_checker_v2.max_error( *gradient_checker_v2.compute_gradient( lambda x: crop_resize( image_tensor, x), [boxes_tensor])) err = max(err1, err2) self.assertLess(err, 2e-3)
def _testGrad(self, f, x): max_error = gradient_checker_v2.max_error( *gradient_checker_v2.compute_gradient(f, [x])) self.assertLess(max_error, 1e-4)
def _BuildAndTestMiniMNIST(self, param_index, tag): # Fix seed to avoid occasional flakiness np.random.seed(6) # Hyperparameters batch = 3 inputs = 16 features = 32 classes = 10 # Define the parameters inp_data = np.random.random_sample(inputs * batch) hidden_weight_data = np.random.randn( inputs * features) / np.sqrt(inputs) hidden_bias_data = np.random.random_sample(features) sm_weight_data = np.random.randn( features * classes) / np.sqrt(features) sm_bias_data = np.random.random_sample(classes) # special care for labels since they need to be normalized per batch label_data = np.random.random(batch * classes).reshape( (batch, classes)) s = label_data.sum(axis=1) label_data /= s[:, None] # We treat the inputs as "parameters" here inp = constant_op.constant(inp_data.tolist(), shape=[batch, inputs], dtype=dtypes.float64, name="inp") hidden_weight = constant_op.constant(hidden_weight_data.tolist(), shape=[inputs, features], dtype=dtypes.float64, name="hidden_weight") hidden_bias = constant_op.constant(hidden_bias_data.tolist(), shape=[features], dtype=dtypes.float64, name="hidden_bias") softmax_weight = constant_op.constant(sm_weight_data.tolist(), shape=[features, classes], dtype=dtypes.float64, name="softmax_weight") softmax_bias = constant_op.constant(sm_bias_data.tolist(), shape=[classes], dtype=dtypes.float64, name="softmax_bias") # List all the parameter so that we can test them one at a time all_params = [ inp, hidden_weight, hidden_bias, softmax_weight, softmax_bias ] # Now, Building MNIST def f(inp, hidden_weight, hidden_bias, softmax_weight, softmax_bias): features = nn_ops.relu(nn_ops.xw_plus_b(inp, hidden_weight, hidden_bias), name="features") logits = nn_ops.xw_plus_b(features, softmax_weight, softmax_bias, name="logits") labels = constant_op.constant(label_data.tolist(), shape=[batch, classes], dtype=dtypes.float64, name="labels") cost = nn_ops.softmax_cross_entropy_with_logits(labels=labels, logits=logits, name="cost") return cost def f_restricted(x): xs = all_params i = param_index # use x for the i-th parameter xs = xs[0:i] + [x] + xs[i + 1:] return f(*xs) # Test the gradients. err = gradient_checker.max_error(*gradient_checker.compute_gradient( f_restricted, [all_params[param_index]], delta=1e-5)) tf_logging.info("Mini MNIST: %s gradient error = %g", tag, err) return err
def _BuildAndTestMiniMNIST(self, param_index, tag): # Fix seed to avoid occasional flakiness np.random.seed(6) # Hyperparameters batch = 3 inputs = 16 features = 32 classes = 10 # Define the parameters inp_data = np.random.random_sample(inputs * batch) hidden_weight_data = np.random.randn(inputs * features) / np.sqrt(inputs) hidden_bias_data = np.random.random_sample(features) sm_weight_data = np.random.randn(features * classes) / np.sqrt(features) sm_bias_data = np.random.random_sample(classes) # special care for labels since they need to be normalized per batch label_data = np.random.random(batch * classes).reshape((batch, classes)) s = label_data.sum(axis=1) label_data /= s[:, None] # We treat the inputs as "parameters" here inp = constant_op.constant( inp_data.tolist(), shape=[batch, inputs], dtype=dtypes.float64, name="inp") hidden_weight = constant_op.constant( hidden_weight_data.tolist(), shape=[inputs, features], dtype=dtypes.float64, name="hidden_weight") hidden_bias = constant_op.constant( hidden_bias_data.tolist(), shape=[features], dtype=dtypes.float64, name="hidden_bias") softmax_weight = constant_op.constant( sm_weight_data.tolist(), shape=[features, classes], dtype=dtypes.float64, name="softmax_weight") softmax_bias = constant_op.constant( sm_bias_data.tolist(), shape=[classes], dtype=dtypes.float64, name="softmax_bias") # List all the parameter so that we can test them one at a time all_params = [ inp, hidden_weight, hidden_bias, softmax_weight, softmax_bias ] # Now, Building MNIST def f(inp, hidden_weight, hidden_bias, softmax_weight, softmax_bias): features = nn_ops.relu( nn_ops.xw_plus_b(inp, hidden_weight, hidden_bias), name="features") logits = nn_ops.xw_plus_b( features, softmax_weight, softmax_bias, name="logits") labels = constant_op.constant( label_data.tolist(), shape=[batch, classes], dtype=dtypes.float64, name="labels") cost = nn_ops.softmax_cross_entropy_with_logits( labels=labels, logits=logits, name="cost") return cost def f_restricted(x): xs = all_params i = param_index # use x for the i-th parameter xs = xs[0:i]+[x]+xs[i+1:] return f(*xs) # Test the gradients. err = gradient_checker.max_error(*gradient_checker.compute_gradient( f_restricted, [all_params[param_index]], delta=1e-5)) tf_logging.info("Mini MNIST: %s gradient error = %g", tag, err) return err
def _compute_error(): return gradient_checker_v2.max_error( *gradient_checker_v2.compute_gradient(f, x=args, delta=delta))
def test_fresnel_sin_gradient(self): inputs = [np.random.uniform(1., 50., size=int(1e2))] analytical, numerical = gradient_checker_v2.compute_gradient( special_math_ops.fresnel_sin, inputs) self.assertLess(gradient_checker_v2.max_error(analytical, numerical), 5e-3)
def test_dawsn_gradient(self): inputs = [np.random.uniform(-50., 50., size=int(1e2))] analytical, numerical = gradient_checker_v2.compute_gradient( special_math_ops.dawsn, inputs) self.assertLess(gradient_checker_v2.max_error(analytical, numerical), 1e-4)