def testGradients(self): np.random.seed(1618) sp_shapes = [(10, 10, 10), (5, 5), (1618,), (3, 3, 7)] dense_shapes = [(10, 10, 1), (5, 5), (1,), (1, 7)] with self.test_session(use_gpu=False): for dtype in [np.float32, np.float64]: for sp_shape, dense_shape in zip(sp_shapes, dense_shapes): sp_vals_np = np.random.rand(*sp_shape).astype(dtype) + 1 dense_vals_np = np.random.rand(*dense_shape).astype(dtype) + 1 sp_t, nnz = _sparsify(sp_vals_np, thresh=1.5) dense_t = constant_op.constant(dense_vals_np) cmul = sp_t * dense_t err = gradient_checker.compute_gradient_error([sp_t.values, dense_t], [(nnz,), dense_shape], cmul.values, (nnz,)) self.assertLess(err, 1e-4) cdiv = sp_t / dense_t err = gradient_checker.compute_gradient_error(sp_t.values, (nnz,), cdiv.values, (nnz,)) self.assertLess(err, 1e-4) err = gradient_checker.compute_gradient_error( dense_t, dense_shape, cdiv.values, (nnz,), x_init_value=dense_vals_np) self.assertLess(err, 2e-4)
def testFillFloat(self): with self.test_session(use_gpu=False) as sess: values = constant_op.constant( [0.0, 10.0, 13.0, 14.0, 32.0, 33.0], dtype=dtypes.float64) default_value = constant_op.constant(-1.0, dtype=dtypes.float64) sp_input = sparse_tensor.SparseTensorValue( indices=np.array([[0, 0], [1, 0], [1, 3], [1, 4], [3, 2], [3, 3]]), values=values, dense_shape=np.array([5, 6])) sp_output, empty_row_indicator = (sparse_ops.sparse_fill_empty_rows( sp_input, default_value)) output, empty_row_indicator_out = sess.run( [sp_output, empty_row_indicator]) self.assertAllEqual(output.indices, [[0, 0], [1, 0], [1, 3], [1, 4], [2, 0], [3, 2], [3, 3], [4, 0]]) self.assertAllClose(output.values, [0, 10, 13, 14, -1, 32, 33, -1]) self.assertAllEqual(output.dense_shape, [5, 6]) self.assertAllEqual(empty_row_indicator_out, np.array([0, 0, 1, 0, 1]).astype(np.bool)) values_grad_err = gradient_checker.compute_gradient_error( values, values.shape.as_list(), sp_output.values, [8], delta=1e-8) self.assertGreater(values_grad_err, 0) self.assertLess(values_grad_err, 1e-8) default_value_grad_err = gradient_checker.compute_gradient_error( default_value, default_value.shape.as_list(), sp_output.values, [8], delta=1e-8) self.assertGreater(default_value_grad_err, 0) self.assertLess(default_value_grad_err, 1e-8)
def _test_gradient(self, x_shape, scale_shape, use_gpu=True, data_format='NHWC'): np.random.seed(1) x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) offset_val = np.random.random_sample(scale_shape).astype(np.float32) with self.test_session(use_gpu=use_gpu): x = constant_op.constant(x_val, name='x') scale = constant_op.constant(scale_val, name='scale') offset = constant_op.constant(offset_val, name='offset') y, _, _ = nn_impl.fused_batch_norm( x, scale, offset, data_format=data_format) err_x = gradient_checker.compute_gradient_error(x, x_shape, y, x_shape) err_scale = gradient_checker.compute_gradient_error(scale, scale_shape, y, x_shape) err_offset = gradient_checker.compute_gradient_error(offset, scale_shape, y, x_shape) err_tolerance = 1e-3 self.assertLess(err_x, err_tolerance) self.assertLess(err_scale, err_tolerance) self.assertLess(err_offset, err_tolerance)
def testGradient(self): if np.__version__ == "1.13.0": self.skipTest("numpy 1.13.0 bug") np.random.seed(8161) test_dims = [(11, 1, 5, 7, 1), (2, 2)] with self.test_session(use_gpu=False): for dims in test_dims: sp_t, nnz = _sparsify(np.random.randn(*dims)) # reduce random axes from 1D to N-D for d in range(1, len(dims) + 1): axes = np.random.choice(len(dims), size=d, replace=False).tolist() reduced = sparse_ops.sparse_reduce_sum(sp_t, axes) err = gradient_checker.compute_gradient_error(sp_t.values, (nnz,), reduced, reduced.eval().shape) self.assertLess(err, 1e-3) # Tests for negative axes. reduced = sparse_ops.sparse_reduce_sum(sp_t, -1) err = gradient_checker.compute_gradient_error(sp_t.values, (nnz,), reduced, reduced.eval().shape) self.assertLess(err, 1e-3)
def _testGradients(self, tr_a, tr_b, sp_a, sp_b, a_dtype, b_dtype, delta, name): with self.test_session(): a = constant_op.constant( RandMatrix( 3, 2, tr_a, round_bfloat=True), dtype=dtypes.float32) b = constant_op.constant( RandMatrix( 2, 4, tr_b, round_bfloat=True), dtype=dtypes.float32) tf_a = math_ops.cast(a, a_dtype) if a_dtype != dtypes.float32 else a tf_b = math_ops.cast(b, b_dtype) if b_dtype != dtypes.float32 else b m = math_ops.matmul( tf_a, tf_b, name=name, transpose_a=tr_a, transpose_b=tr_b, a_is_sparse=sp_a, b_is_sparse=sp_b) err = (gradient_checker.compute_gradient_error( a, [2, 3] if tr_a else [3, 2], m, [3, 4], x_init_value=a.eval(), delta=delta) + gradient_checker.compute_gradient_error( b, [4, 2] if tr_b else [2, 4], m, [3, 4], x_init_value=b.eval(), delta=delta)) self.assertLessEqual(err, delta / 2.)
def doOutputTest(self, input_shape, moments_axes, tol=1e-4, check_gradients=False): for mu in [0.0, 1.0, 1e3]: for sigma in [1.0, 0.1]: for keep_dims in [True, False]: input_values = np.random.rand(*input_shape) * sigma + mu expected_mean = np.mean( input_values, axis=moments_axes, keepdims=keep_dims) expected_var = np.var( input_values, axis=moments_axes, keepdims=keep_dims) with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: inputs = constant_op.constant( input_values, shape=input_shape, dtype=dtypes.float32) mean, variance = nn_impl.moments( inputs, moments_axes, keep_dims=keep_dims) if check_gradients: err = gradient_checker.compute_gradient_error( inputs, input_shape, mean, mean.shape.as_list()) self.assertLess(err, 1e-3) err = gradient_checker.compute_gradient_error( inputs, input_shape, variance, variance.shape.as_list()) self.assertLess(err, 1e-3) # Evaluate. [mean, variance] = sess.run([mean, variance]) # Make sure that there are no NaNs self.assertFalse(np.isnan(mean).any()) self.assertFalse(np.isnan(variance).any()) self.assertAllClose(mean, expected_mean, rtol=tol, atol=tol) self.assertAllClose(variance, expected_var, rtol=tol, atol=tol)
def testGradient(self): np.random.seed(1) # Make it reproducible. x_shape = [5, 10] x_np = np.random.randn(*x_shape).astype(np.float32) alpha_np = np.float32(np.random.rand(1, x_shape[1]) + 0.01) clip_np = np.float32(np.random.rand(x_shape[0], 1) * 5.) with self.test_session(use_gpu=True): x_tf = constant_op.constant(x_np) alpha_tf = constant_op.constant(alpha_np) clip_tf = constant_op.constant(clip_np) y_tf = scaled_softplus(x_tf, alpha_tf) z_tf = scaled_softplus(x_tf, alpha_tf, clip_tf * 0.1) err = gradient_checker.compute_gradient_error([x_tf, alpha_tf], [x_shape, alpha_np.shape], y_tf, x_shape, [x_np, alpha_np], delta=0.002) err_clip = gradient_checker.compute_gradient_error( [x_tf, alpha_tf, clip_tf], [x_shape, alpha_np.shape, clip_np.shape], z_tf, x_shape, [x_np, alpha_np, clip_np], delta=0.002) eps = 2e-4 self.assertLess(err, eps) self.assertLess(err_clip, eps)
def _test_gradient(self, x_shape, x_dtype, scale_shape, scale_dtype, use_gpu=True, data_format='NHWC', is_training=True): np.random.seed(1) x_val = np.random.random_sample(x_shape).astype(x_dtype) scale_val = np.random.random_sample(scale_shape).astype(scale_dtype) offset_val = np.random.random_sample(scale_shape).astype(scale_dtype) with self.test_session(use_gpu=use_gpu): x = constant_op.constant(x_val, name='x') scale = constant_op.constant(scale_val, name='scale') offset = constant_op.constant(offset_val, name='offset') if is_training: pop_mean = None pop_var = None else: pop_mean = np.random.random_sample(scale_shape).astype(scale_dtype) pop_var = np.random.random_sample(scale_shape).astype(scale_dtype) y, _, _ = nn_impl.fused_batch_norm( x, scale, offset, mean=pop_mean, variance=pop_var, data_format=data_format, is_training=is_training) if x_dtype != np.float16: err_x = gradient_checker.compute_gradient_error(x, x_shape, y, x_shape) err_scale = gradient_checker.compute_gradient_error( scale, scale_shape, y, x_shape) err_offset = gradient_checker.compute_gradient_error( offset, scale_shape, y, x_shape) else: x32 = constant_op.constant(x_val, name='x32', dtype=dtypes.float32) y32, _, _ = nn_impl.fused_batch_norm( x32, scale, offset, mean=pop_mean, variance=pop_var, data_format=data_format, is_training=is_training) err_x = self._compute_gradient_error_float16(x, x32, x_shape, y, y32, x_shape) err_scale = self._compute_gradient_error_float16( scale, scale, scale_shape, y, y32, x_shape) err_offset = self._compute_gradient_error_float16( offset, offset, scale_shape, y, y32, x_shape) x_err_tolerance = 2e-3 if x_dtype == np.float16 else 1e-3 scale_err_tolerance = 1e-3 self.assertLess(err_x, x_err_tolerance) self.assertLess(err_scale, scale_err_tolerance) self.assertLess(err_offset, scale_err_tolerance)
def testEmptyFails(self): with ops.Graph().as_default() as g: with self.session(graph=g): x = array_ops.placeholder(dtypes.float32) with g.gradient_override_map({"Identity": "BadGrad"}): y = array_ops.identity(x) bad = r"Empty gradient has wrong shape: expected \(0, 3\), got \(3, 0\)" with self.assertRaisesRegexp(ValueError, bad): gradient_checker.compute_gradient(x, (0, 3), y, (0, 3)) with self.assertRaisesRegexp(ValueError, bad): gradient_checker.compute_gradient_error(x, (0, 3), y, (0, 3))
def testClipByValueGradient(self): inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32) outputs_1 = clip_ops.clip_by_value(inputs, 0.5, 3.5) min_val = constant_op.constant([0.5, 0.5, 0.5, 0.5], dtype=dtypes.float32) max_val = constant_op.constant([3.5, 3.5, 3.5, 3.5], dtype=dtypes.float32) outputs_2 = clip_ops.clip_by_value(inputs, min_val, max_val) with self.test_session(): error_1 = gradient_checker.compute_gradient_error(inputs, [4], outputs_1, [4]) self.assertLess(error_1, 1e-4) error_2 = gradient_checker.compute_gradient_error(inputs, [4], outputs_2, [4]) self.assertLess(error_2, 1e-4)
def testGradientsAxis0(self): np.random.seed(7) for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): data = np.random.randn(*shape) shapes = [shape[1:]] * shape[0] with self.test_session(use_gpu=True): # TODO(irving): Remove list() once we handle maps correctly xs = list(map(constant_op.constant, data)) c = array_ops.pack(xs) err = gradient_checker.compute_gradient_error(xs, shapes, c, shape) self.assertLess(err, 1e-6) c = array_ops.stack(xs) err = gradient_checker.compute_gradient_error(xs, shapes, c, shape) self.assertLess(err, 1e-6)
def testSegmentMaxGradient(self): data = constant_op.constant([1.0, 2.0, 3.0], dtype=dtypes.float32) segment_ids = constant_op.constant([0, 0, 1], dtype=dtypes.int64) segment_max = math_ops.segment_max(data, segment_ids) with self.test_session(): error = gradient_checker.compute_gradient_error(data, [3], segment_max, [2]) self.assertLess(error, 1e-4)
def testGrad(self): shapes = ((3, 4, 4), (3, 3, 4), (3, 4, 3), (7, 4, 8, 8)) with self.test_session(use_gpu=self._use_gpu): for shape in shapes: x = constant_op.constant( np.random.rand(*shape), dtype=dtypes_lib.float32) diag_shape = shape[:-2] + (min(shape[-2:]),) x_diag = constant_op.constant( np.random.rand(*diag_shape), dtype=dtypes_lib.float32) y = array_ops.matrix_set_diag(x, x_diag) error_x = gradient_checker.compute_gradient_error( x, x.get_shape().as_list(), y, y.get_shape().as_list()) self.assertLess(error_x, 1e-4) error_x_diag = gradient_checker.compute_gradient_error( x_diag, x_diag.get_shape().as_list(), y, y.get_shape().as_list()) self.assertLess(error_x_diag, 1e-4)
def _testGlobalGradient(self, from_y="mean"): with self.cached_session(): x_shape = [3, 5, 4, 2] x_val = np.random.random_sample(x_shape).astype(np.float64) x = constant_op.constant(x_val) x.set_shape(x_shape) axes = [0, 1, 2] y_shape = [2] # Depth of x inputs_to_compute_gradients_for = [x] out_mean, out_var = self._unweighted_moments( x, axes, extra_out_grads=inputs_to_compute_gradients_for) if from_y == "mean": y = out_mean elif from_y == "var": y = out_var for (i, v) in enumerate(inputs_to_compute_gradients_for): err = gradient_checker.compute_gradient_error(v, v.get_shape().as_list(), y, y_shape) print("Moments %s gradient err vs input %d = %g" % (from_y, i, err)) self.assertLess(err, 1e-11)
def testLargePoolingRatioThroughGradientError(self): input_shape = (1, 17, 23, 1) input_data = self._GenerateRandomInputTensor(input_shape) pooling_ratio = (1, math.sqrt(13), math.sqrt(7), 1) output_shape = [int(a / b) for a, b in zip(input_shape, pooling_ratio)] overlapping = True pseudo_random = False with self.cached_session() as _: input_tensor = constant_op.constant(input_data, shape=input_shape) output_tensor, unused_a, unused_b = nn_ops.fractional_avg_pool( input_tensor, pooling_ratio, pseudo_random=pseudo_random, overlapping=overlapping, deterministic=True, seed=self._SEED, seed2=self._SEED2) # error_margin and delta setting is similar to avg_pool_grad. error_margin = 1e-4 gradient_error = gradient_checker.compute_gradient_error( input_tensor, input_shape, output_tensor, output_shape, x_init_value=input_data.reshape(input_shape), delta=1e-2) self.assertLess(gradient_error, error_margin)
def testDifferentTensorShapesThroughGradientError(self): pseudo_random = True overlapping = True pooling_ratio = [1, math.sqrt(3), math.sqrt(2), 1] for num_batches in [1, 2]: for num_rows in [5, 13]: for num_cols in [5, 11]: for num_channels in [1, 3]: input_shape = (num_batches, num_rows, num_cols, num_channels) input_data = self._GenerateRandomInputTensor(input_shape) with self.cached_session() as _: input_tensor = constant_op.constant(input_data, shape=input_shape) output_tensor, unused_a, unused_b = nn_ops.fractional_avg_pool( input_tensor, pooling_ratio, pseudo_random=pseudo_random, overlapping=overlapping, deterministic=True, seed=self._SEED, seed2=self._SEED2) output_data = output_tensor.eval() output_shape = output_data.shape # error_margin and delta setting is similar to avg_pool_grad. error_margin = 1e-4 gradient_error = gradient_checker.compute_gradient_error( input_tensor, input_shape, output_tensor, output_shape, x_init_value=input_data.reshape(input_shape), delta=1e-2) self.assertLess(gradient_error, error_margin)
def testAllInputOptionsThroughGradientError(self): input_shape = (1, 7, 13, 1) input_data = self._GenerateRandomInputTensor(input_shape) pooling_ratio = [1, math.sqrt(2), math.sqrt(3), 1] for pseudo_random in True, False: for overlapping in True, False: with self.cached_session() as _: input_tensor = constant_op.constant(input_data, shape=input_shape) output_tensor, unused_a, unused_b = nn_ops.fractional_avg_pool( input_tensor, pooling_ratio, pseudo_random=pseudo_random, overlapping=overlapping, deterministic=True, seed=self._SEED, seed2=self._SEED2) output_data = output_tensor.eval() output_shape = output_data.shape # error_margin and delta setting is similar to avg_pool_grad. error_margin = 1e-4 gradient_error = gradient_checker.compute_gradient_error( input_tensor, input_shape, output_tensor, output_shape, x_init_value=input_data.reshape(input_shape), delta=1e-2) self.assertLess(gradient_error, error_margin)
def testLargePoolingRatioThroughGradientError(self): input_shape = (1, 17, 23, 1) input_data = self._GenerateUniqueRandomInputTensor(input_shape) # Add some randomness to make input_data not so 'integer' input_data += self._PRNG.random_sample(input_shape) pooling_ratio = (1, math.sqrt(13), math.sqrt(7), 1) output_shape = [int(a / b) for a, b in zip(input_shape, pooling_ratio)] overlapping = True pseudo_random = False with self.cached_session() as _: input_tensor = constant_op.constant(input_data, shape=input_shape) output_tensor, unused_a, unused_b = nn_ops.fractional_max_pool_v2( input_tensor, pooling_ratio, pseudo_random=pseudo_random, overlapping=overlapping, seed=self._SEED) # error_margin and delta setting is similar to max_pool_grad. error_margin = 1e-3 gradient_error = gradient_checker.compute_gradient_error( input_tensor, input_shape, output_tensor, output_shape, x_init_value=input_data.reshape(input_shape), delta=1e-2) self.assertLess(gradient_error, error_margin)
def testGradient(self): with self.test_session(): for padding in ["SAME", "VALID"]: for stride in [1, 2]: np.random.seed(1) in_shape = [2, 4, 3, 3, 2] in_val = constant_op.constant( 2 * np.random.random_sample(in_shape) - 1, dtype=dtypes.float32) filter_shape = [3, 3, 3, 2, 3] strides = [1, stride, stride, stride, 1] # Make a convolution op with the current settings, just to easily get # the shape of the output. conv_out = nn_ops.conv3d(in_val, array_ops.zeros(filter_shape), strides, padding) out_backprop_shape = conv_out.get_shape().as_list() out_backprop_val = constant_op.constant( 2 * np.random.random_sample(out_backprop_shape) - 1, dtype=dtypes.float32) output = nn_ops.conv3d_backprop_filter_v2(in_val, filter_shape, out_backprop_val, strides, padding) err = gradient_checker.compute_gradient_error( [in_val, out_backprop_val], [in_shape, out_backprop_shape], output, filter_shape) print("conv3d_backprop_filter gradient err = %g " % err) err_tolerance = 1e-3 self.assertLess(err, err_tolerance)
def _RunAndVerifyGradients(self, dtype): with self.test_session(use_gpu=True): # random shape shape = np.random.randint(1, 5, size=4) # Make depth at least 2 to make it meaningful shape[3] += 1 # random depth_radius, bias, alpha, beta. cuDNN requires depth_radius to # be in [1, 7]. lrn_depth_radius = np.random.randint(1, min(8, shape[3])) bias = 1.0 + np.random.rand() alpha = 1.0 * np.random.rand() # cuDNN requires beta >= 0.01. beta = 0.01 + 1.0 * np.random.rand() if dtype == dtypes.float32: inp_array = np.random.rand(*shape).astype(np.float32) else: inp_array = np.random.rand(*shape).astype(np.float16) inp = constant_op.constant( list(inp_array.ravel(order="C")), shape=shape, dtype=dtype) lrn_op = nn.local_response_normalization( inp, name="lrn", depth_radius=lrn_depth_radius, bias=bias, alpha=alpha, beta=beta) err = gradient_checker.compute_gradient_error(inp, shape, lrn_op, shape) print("LRN Gradient error for bias ", bias, "alpha ", alpha, " beta ", beta, " is ", err) if dtype == dtypes.float32: self.assertLess(err, 1e-4) else: self.assertLess(err, 1.0)
def testGradientsEmbeddingLookupSparse(self): vocab_size = 12 batch_size = 4 param_shape = [2, 3] sp_ids, sp_weights, _, _, _ = (self._RandomIdsAndWeights( batch_size, vocab_size)) for num_shards, combiner, dtype, ignore_weights in itertools.product( [1, 3], ["sum", "mean", "sqrtn"], [dtypes.float32, dtypes.float64], [True, False]): with self.test_session(): x, params, _ = _EmbeddingParams( num_shards, vocab_size, shape=param_shape, dtype=dtype) y = embedding_ops.embedding_lookup_sparse( x, sp_ids, None if ignore_weights else sp_weights, combiner=combiner) x_name = [_PName(i) for i in range(num_shards)] x_init_value = [params[x_n + ":0"] for x_n in x_name] x_shape = [i.shape for i in x_init_value] y_shape = [batch_size] + list(params[_PName(0) + ":0"].shape[1:]) err = gradient_checker.compute_gradient_error( x, x_shape, y, y_shape, x_init_value=x_init_value) self.assertLess(err, 1e-5 if dtype == dtypes.float64 else 2e-3)
def testGradient(self): x_shape = [5, 10] x_np = np.random.randn(*x_shape).astype(np.float64) z_np = np.random.randint(0, 5, size=x_shape).astype(np.float64) with self.test_session(): x_tf = constant_op.constant(x_np) y_tf = nn_impl.log_poisson_loss(z_np, x_tf, compute_full_loss=False) y_tf_stirling = nn_impl.log_poisson_loss( z_np, x_tf, compute_full_loss=True) err = gradient_checker.compute_gradient_error(x_tf, x_shape, y_tf, x_shape) err_stirling = gradient_checker.compute_gradient_error( x_tf, x_shape, y_tf_stirling, x_shape) eps = 1e-6 self.assertLess(err, eps) self.assertLess(err_stirling, eps)
def testGradientDilatedConv(self): if test.is_gpu_available(cuda_only=True): with self.test_session(use_gpu=True): for padding in ["SAME", "VALID"]: for stride in [1, 2]: np.random.seed(1) in_shape = [5, 8, 6, 4] in_val = constant_op.constant( 2 * np.random.random_sample(in_shape) - 1, dtype=dtypes.float32) filter_shape = [3, 3, 4, 6] # Make a convolution op with the current settings, # just to easily get the shape of the output. conv_out = nn_ops.conv2d( in_val, array_ops.zeros(filter_shape), dilations=[1, 2, 2, 1], strides=[1, stride, stride, 1], padding=padding) out_backprop_shape = conv_out.get_shape().as_list() out_backprop_val = constant_op.constant( 2 * np.random.random_sample(out_backprop_shape) - 1, dtype=dtypes.float32) output = nn_ops.conv2d_backprop_filter( in_val, filter_shape, out_backprop_val, dilations=[1, 2, 2, 1], strides=[1, stride, stride, 1], padding=padding) err = gradient_checker.compute_gradient_error( [in_val, out_backprop_val], [in_shape, out_backprop_shape], output, filter_shape) print("conv2d_backprop_filter gradient err = %g " % err) err_tolerance = 2e-3 self.assertLess(err, err_tolerance)
def testGradient(self): with self.test_session() as sess: l = constant_op.constant( [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.5], shape=[3, 4], dtype=dtypes.float64, name="l") f = constant_op.constant( [0.1, 0.2, 0.3, 0.4, 0.1, 0.4, 0.9, 1.6, 0.1, 0.8, 2.7, 6.4], shape=[3, 4], dtype=dtypes.float64, name="f") x = nn_ops.softmax_cross_entropy_with_logits( labels=l, logits=f, name="xent") err = gradient_checker.compute_gradient_error(f, [3, 4], x, [3]) # Check that no extra computation performed. When only first derivative is requested, # second derivative must not be computed. So when there is no second derivative, # there is no `BatchMatMul` op in the graph. op_names = [ op.op_def.name for op in sess.graph.get_operations() if op.op_def ] self.assertNotIn("BatchMatMul", op_names) print("cross entropy gradient err = ", err) self.assertLess(err, 5e-8)
def testDifferentTensorShapesThroughGradientError(self): pseudo_random = True overlapping = True pooling_ratio = [1, math.sqrt(3), math.sqrt(2), 1] for num_batches in [1, 2]: for num_rows in [5, 13]: for num_cols in [5, 11]: for num_channels in [1, 3]: input_shape = (num_batches, num_rows, num_cols, num_channels) input_data = self._GenerateUniqueRandomInputTensor(input_shape) # Add some randomness to make input_data not so 'integer' input_data += self._PRNG.random_sample(input_shape) with self.cached_session() as _: input_tensor = constant_op.constant(input_data, shape=input_shape) output_tensor, unused_a, unused_b = nn_ops.fractional_max_pool_v2( input_tensor, pooling_ratio, pseudo_random=pseudo_random, overlapping=overlapping, seed=self._SEED) output_data = self.evaluate(output_tensor) output_shape = output_data.shape # error_margin and delta setting is similar to max_pool_grad. error_margin = 1e-3 gradient_error = gradient_checker.compute_gradient_error( input_tensor, input_shape, output_tensor, output_shape, x_init_value=input_data.reshape(input_shape), delta=1e-2) self.assertLess(gradient_error, error_margin)
def testGradientsAxis0(self): for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): data = np.random.randn(*shape) shapes = [shape[1:]] * shape[0] for i in xrange(shape[0]): with self.test_session(use_gpu=True): x = constant_op.constant(data) cs = array_ops.unpack(x, num=shape[0]) err = gradient_checker.compute_gradient_error(x, shape, cs[i], shapes[i]) self.assertLess(err, 1e-6) cs = array_ops.unstack(x, num=shape[0]) err = gradient_checker.compute_gradient_error(x, shape, cs[i], shapes[i]) self.assertLess(err, 1e-6)
def testSecondGradient(self): with self.test_session() as sess: l = constant_op.constant( [ 0.0, 0.0, 1.0 / 3, 0.0, 1.0 / 3, 0.0, 0.0, 0.0, 0.0, 0.5 / 3, 0.0, 0.5 / 3 ], shape=[12], dtype=dtypes.float64, name="l") f = constant_op.constant( [0.1, 0.2, 0.3, 0.4, 0.1, 0.4, 0.9, 1.6, 0.1, 0.8, 2.7, 6.4], shape=[12], dtype=dtypes.float64, name="f") x = nn_ops.softmax_cross_entropy_with_logits( labels=l, logits=f, name="xent") loss = math_ops.reduce_sum(x) gradients = gradients_impl.gradients(loss, [f])[0] err = gradient_checker.compute_gradient_error(f, [12], gradients, [12]) # Check that second derivative is calculated. # (it is equivalent to being `BatchMatMul` op in the graph because of implementation of xentropy grad) op_names = [ op.op_def.name for op in sess.graph.get_operations() if op.op_def ] self.assertIn("BatchMatMul", op_names) print("cross entropy hessian err = ", err) self.assertLess(err, 5e-8)
def run_test(self, x, y): with self.test_session(): error = gradient_checker.compute_gradient_error(x, x.get_shape().as_list(), y, y.get_shape().as_list()) self.assertLess(error, 1e-3)
def _test_grad_different_shape(self, input_shape, output_shape): with self.cached_session(): test_image_shape = input_shape test_image = np.random.randn(*test_image_shape) test_image_tensor = constant_op.constant( test_image, shape=test_image_shape) test_transform = image_ops.angles_to_projective_transforms( np.pi / 2, 4, 4) if len(output_shape) == 2: resize_shape = output_shape elif len(output_shape) == 3: resize_shape = output_shape[0:2] elif len(output_shape) == 4: resize_shape = output_shape[1:3] output = image_ops.transform( images=test_image_tensor, transforms=test_transform, output_shape=resize_shape) left_err = gradient_checker.compute_gradient_error( test_image_tensor, test_image_shape, output, output_shape, x_init_value=test_image) self.assertLess(left_err, 1e-10)
def testAllInputOptionsThroughGradientError(self): input_shape = (1, 7, 13, 1) input_data = self._GenerateUniqueRandomInputTensor(input_shape) # Add some randomness to make input_data not so 'integer' input_data += self._PRNG.random_sample(input_shape) pooling_ratio = [1, math.sqrt(2), math.sqrt(3), 1] for pseudo_random in True, False: for overlapping in True, False: with self.cached_session() as _: input_tensor = constant_op.constant(input_data, shape=input_shape) output_tensor, unused_a, unused_b = nn_ops.fractional_max_pool_v2( input_tensor, pooling_ratio, pseudo_random=pseudo_random, overlapping=overlapping, seed=self._SEED) output_data = self.evaluate(output_tensor) output_shape = output_data.shape # error_margin and delta setting is similar to max_pool_grad. error_margin = 1e-3 gradient_error = gradient_checker.compute_gradient_error( input_tensor, input_shape, output_tensor, output_shape, x_init_value=input_data.reshape(input_shape), delta=1e-2) self.assertLess(gradient_error, error_margin)
def testSequenceToSequenceGradient(self): with self.test_session(): size = (17, 1, 15) output_size = (17, 1, 8) inputs = constant_op.constant(_rand(*size)) outputs = lstm1d.ndlstm_base(inputs, 8, dynamic=False) variables.global_variables_initializer().run() gradients = gradients_impl.gradients(outputs, inputs) if 1: # pylint: disable=using-constant-test gradients = gradients_impl.gradients(outputs, inputs)[0].eval() self.assertEqual(gradients.shape, size) else: # TODO (tmb) tf.test.compute_gradient error is currently broken id:911 gh:912 # with dynamic_rnn. Enable this test case eventually. err = gradient_checker.compute_gradient_error( inputs, size, outputs, output_size, delta=1e-4) self.assert_(not np.isnan(err)) self.assert_(err < 0.1)
def testGradGradGrad(self): with self.cached_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], name="x") y = nn_ops.softplus(x, name="softplus") (grad, ) = gradients_impl.gradients(y, x) (grad_grad, ) = gradients_impl.gradients(grad, x) x_init = np.asarray( [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], dtype=np.float32, order="F") err = gradient_checker.compute_gradient_error(x, [2, 5], grad_grad, [2, 5], x_init_value=x_init) print("softplus (float) third-order gradient err = ", err) self.assertLess(err, 5e-5)
def testGradient(self): x_shape = [2, 3, 4, 3, 2] f_shape = [3, 3, 3, 2, 2] y_shape = [2, 6, 8, 6, 2] strides = [1, 2, 2, 2, 1] np.random.seed(1) # Make it reproducible. x_val = np.random.random_sample(x_shape).astype(np.float64) f_val = np.random.random_sample(f_shape).astype(np.float64) with self.test_session(), self.test_scope(): x = constant_op.constant(x_val, name="x", dtype=dtypes.float32) f = constant_op.constant(f_val, name="f", dtype=dtypes.float32) output = nn_ops.conv3d_transpose( x, f, y_shape, strides=strides, padding="SAME") err = gradient_checker.compute_gradient_error([x, f], [x_shape, f_shape], output, y_shape) print("conv3d_transpose gradient err = %g " % err) err_tolerance = 0.0005 self.assertLess(err, err_tolerance)
def _testGrad(self, shape, dtype=None, max_error=None, bias=None, sigma=None): np.random.seed(7) if dtype in (dtypes.complex64, dtypes.complex128): value = math_ops.complex( self._biasedRandN( shape, bias=bias, sigma=sigma), self._biasedRandN( shape, bias=bias, sigma=sigma)) else: value = ops.convert_to_tensor( self._biasedRandN( shape, bias=bias), dtype=dtype) with self.test_session(use_gpu=True): output = math_ops.abs(value) error = gradient_checker.compute_gradient_error( value, shape, output, output.get_shape().as_list()) self.assertLess(error, max_error)
def _test_grad(self, shape_to_test): with self.cached_session(): test_image_shape = shape_to_test test_image = np.random.randn(*test_image_shape) test_image_tensor = constant_op.constant(test_image, shape=test_image_shape) test_transform = image_ops.angles_to_projective_transforms( np.pi / 2, 4, 4) output_shape = test_image_shape output = image_ops.transform(test_image_tensor, test_transform) left_err = gradient_checker.compute_gradient_error( test_image_tensor, test_image_shape, output, output_shape, x_init_value=test_image) self.assertLess(left_err, 1e-10)
def testGradFromResizeToSmallerInBothDims(self): in_shape = [1, 4, 6, 1] out_shape = [1, 2, 3, 1] x = np.arange(0, 24).reshape(in_shape).astype(np.float32) for align_corners in [True, False]: input_tensor = constant_op.constant(x, shape=in_shape) resize_out = image_ops.resize_bicubic(input_tensor, out_shape[1:3], align_corners=align_corners) with self.cached_session(): err = gradient_checker.compute_gradient_error(input_tensor, in_shape, resize_out, out_shape, x_init_value=x) self.assertLess(err, 1e-3)
def testGradientLabelWithV2(self): with self.test_session(): l = constant_op.constant( [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.5], shape=[3, 4], dtype=dtypes.float64, name="l") f = constant_op.constant( [0.1, 0.2, 0.3, 0.4, 0.1, 0.4, 0.9, 1.6, 0.1, 0.8, 2.7, 6.4], shape=[3, 4], dtype=dtypes.float64, name="f") x = nn_ops.softmax_cross_entropy_with_logits_v2(labels=l, logits=f, name="xent") err = gradient_checker.compute_gradient_error(l, [3, 4], x, [3]) self.assertLess(err, 5e-8)
def testGradGradFloat64(self): with self.test_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], dtype=dtypes.float64, name="x") y = nn_ops.relu(x, name="relu") z = gradients_impl.gradients(y, x) x_init = np.asarray( [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], dtype=np.float64, order="F") err = gradient_checker.compute_gradient_error(x, [2, 5], z[0], [2, 5], x_init_value=x_init) print("relu (float64) gradient of gradient err = ", err) self.assertLess(err, 1e-10)
def testGradientsEmbeddingLookupWithComputedParams(self): vocab_size = 9 num_ids = 5 id_vals = list(np.random.randint(vocab_size, size=num_ids)) tf_logging.vlog(1, id_vals) for num_shards in [1, 3]: with self.cached_session(): ids = constant_op.constant(id_vals, dtype=dtypes.int32) x, params, _ = _EmbeddingParams(num_shards, vocab_size, shape=[2]) # This will force a conversion from IndexedSlices to Tensor. x_squared = [math_ops.square(elem) for elem in x] y = embedding_ops.embedding_lookup(x_squared, ids) y_shape = [num_ids] + list(params[_PName(0) + ":0"].shape[1:]) x_name = [_PName(i) for i in range(num_shards)] x_init_value = [params[x_n + ":0"] for x_n in x_name] x_shape = [i.shape for i in x_init_value] err = gradient_checker.compute_gradient_error( x, x_shape, y, y_shape, x_init_value=x_init_value) self.assertLess(err, 1e-3)
def testGradientsEmbeddingLookup(self): vocab_size = 9 num_ids = 10 id_vals = list(np.random.randint(vocab_size, size=num_ids)) tf_logging.vlog(1, id_vals) for ids_shape in [(10,), (2, 5)]: for num_shards in [1, 3]: with self.cached_session(): ids = constant_op.constant( id_vals, shape=ids_shape, dtype=dtypes.int32) x, params, _ = _EmbeddingParams(num_shards, vocab_size, shape=[2]) y = embedding_ops.embedding_lookup(x, ids) y_shape = ids_shape + tuple(params[_PName(0) + ":0"].shape[1:]) x_name = [_PName(i) for i in range(num_shards)] x_init_value = [params[x_n + ":0"] for x_n in x_name] x_shape = [i.shape for i in x_init_value] err = gradient_checker.compute_gradient_error( x, x_shape, y, y_shape, x_init_value=x_init_value) self.assertLess(err, 1e-4)
def _test_gradient_against_estimate(self, dtype, random, use_gpu): """check sparsemax-loss Rop, against estimated-loss Rop""" z = random.uniform(low=-3, high=3, size=(test_obs, 10)).astype(dtype) q = np.zeros((test_obs, 10)).astype(dtype) q[np.arange(0, test_obs), np.random.randint(0, 10, size=test_obs)] = 1 logits = array_ops.placeholder(dtype, name='z') sparsemax_op = sparsemax(logits) loss_op = sparsemax_loss(logits, sparsemax_op, q) with self.test_session(use_gpu=use_gpu): err = gradient_checker.compute_gradient_error(logits, z.shape, loss_op, (test_obs, ), x_init_value=z, delta=1e-9) self.assertLess(err, 1e-4)
def testIndexedSlicesConcatDim1Grad_UnknownInputDim(self): x_shapes = [[20, 7, 3], [20, 3, 3], [20, 1, 3]] output_shape = [4, 11, 3] with self.test_session(): x_1 = array_ops.placeholder(dtypes.float64) x_2 = array_ops.placeholder(dtypes.float64) x_3 = array_ops.placeholder(dtypes.float64) xs = [x_1, x_2, x_3] x_concat = array_ops.concat(xs, 1) output = array_ops.gather(x_concat, [1, 2, 0, 5]) params = { x_1: np.random.random_sample(x_shapes[0]).astype(np.float64), x_2: np.random.random_sample(x_shapes[1]).astype(np.float64), x_3: np.random.random_sample(x_shapes[2]).astype(np.float64) } err = gradient_checker.compute_gradient_error( xs, x_shapes, output, output_shape, extra_feed_dict=params) self.assertLess(err, 1e-11)
def testGradientDilatedConv(self): if test.is_gpu_available(cuda_only=True): with self.session(): for padding in [ "SAME", "VALID", [(0, 0), (3, 5), (2, 1), (0, 0)], [(0, 0), (5, 2), (5, 1), (0, 0)] ]: for stride in [1, 2]: np.random.seed(1) in_shape = [5, 8, 6, 4] in_val = constant_op.constant( 2 * np.random.random_sample(in_shape) - 1, dtype=dtypes.float32) filter_shape = [3, 3, 4, 6] # Make a convolution op with the current settings, # just to easily get the shape of the output. conv_out = nn_ops.conv2d( in_val, array_ops.zeros(filter_shape), dilations=[1, 2, 2, 1], strides=[1, stride, stride, 1], padding=padding) out_backprop_shape = conv_out.get_shape().as_list() out_backprop_val = constant_op.constant( 2 * np.random.random_sample(out_backprop_shape) - 1, dtype=dtypes.float32) output = nn_ops.conv2d_backprop_filter( in_val, filter_shape, out_backprop_val, dilations=[1, 2, 2, 1], strides=[1, stride, stride, 1], padding=padding) err = gradient_checker.compute_gradient_error( [in_val, out_backprop_val], [in_shape, out_backprop_shape], output, filter_shape) print("conv2d_backprop_filter gradient err = %g " % err) err_tolerance = 1e-2 self.assertLess(err, err_tolerance)
def _impl_test_batch_major_fo_pool_grad(self, FT, tolerance): shape = (3, 5, 2) np_args = [ np.random.random(size=shape).astype(FT), np.random.uniform(0, 1, size=shape).astype(FT), np.random.random(size=(shape[0], shape[-1])).astype(FT) ] with tf.Session() as S: tf_args = [ constant_op.constant(arg, shape=arg.shape, dtype=FT) for arg in np_args ] y = tf.reduce_sum(batch_major_fo_pool_unsliced(*tf_args)) for d in ["cpu"] + self.gpu_devs: with tf.device(d): err = gradient_checker.compute_gradient_error( tf_args, [arg.shape for arg in np_args], y, [], x_init_value=np_args) self.assertLess(err, tolerance)
def testNestedGather(self): np.random.seed(5) # Fix seed to avoid flakiness with self.cached_session(): p_shape = (8, 2) p_size = 16 index_values = [1, 3, 5, 6] index_values2 = [0, 2] y2_shape = [2, 2] params = constant_op.constant( np.arange(p_size).astype(np.float), shape=p_shape, name="p") indices = constant_op.constant(index_values, name="i") y = array_ops.gather(params, indices, name="y") indices2 = constant_op.constant(index_values2, name="i2") y2 = array_ops.gather(y, indices2, name="y2") error = gradient_checker.compute_gradient_error(params, p_shape, y2, y2_shape) tf_logging.info("nested gather error = %f", error) self.assertLess(error, 1e-4)
def testGradient(self, in_shape, ksizes, strides): # Set graph seed for determinism. random_seed = 42 random_seed_lib.set_random_seed(random_seed) with self.cached_session(): np.random.seed(random_seed) in_val = constant_op.constant(np.random.random(in_shape), dtype=dtypes.float32) for padding in ['VALID', 'SAME']: out_val = array_ops.extract_volume_patches( in_val, ksizes, strides, padding) out_shape = out_val.get_shape().as_list() err = gradient_checker.compute_gradient_error( in_val, in_shape, out_val, out_shape) print('extract_volume_patches gradient err: %.4e' % err) self.assertLess(err, 1e-4)
def _testGradient(self, x_value_list, x_shape, lattice_sizes, y_shape, is_hypercube): """Compute the numerical gradients, and check the error.""" for x_value in x_value_list: with self.test_session(use_gpu=False): x = array_ops.placeholder(dtype=dtypes.float32, shape=x_shape, name="x") x_init_value = np.asarray(x_value, dtype=np.float32) if is_hypercube: y = lattice_ops.hypercube_interpolation( x, lattice_sizes=lattice_sizes) else: y = lattice_ops.simplex_interpolation( x, lattice_sizes=lattice_sizes) error = gradient_checker.compute_gradient_error( x, x_shape, y, y_shape, x_init_value=x_init_value) tf_logging.info("x_init_value = %s" % x_init_value) tf_logging.info("x error = %f", error) self.assertTrue(error < 1e-4)
def _testBatchNormGradient(self, param_index, tag, scale_after_normalization, shift_after_normalization, version, err_tolerance=1e-11): x_shape = [3, 5, 4, 5] param_shape = [5] np.random.seed(1) # Make it reproducible. x_val = np.random.random_sample(x_shape).astype(np.float64) m_val = np.random.random_sample(param_shape).astype(np.float64) v_val = np.random.random_sample(param_shape).astype(np.float64) beta_val = np.random.random_sample(param_shape).astype(np.float64) gamma_val = np.random.random_sample(param_shape).astype(np.float64) with self.test_session(): x = constant_op.constant(x_val, name="x") m = constant_op.constant(m_val, name="m") v = constant_op.constant(v_val, name="v") beta = constant_op.constant(beta_val, name="beta") gamma = constant_op.constant(gamma_val, name="gamma") epsilon = 0.001 if version == 1: output = self._tfBatchNormV1(x, m, v, beta, gamma, epsilon, scale_after_normalization) elif version == 2: output = self._tfBatchNormV2(x, m, v, beta, gamma, epsilon, scale_after_normalization, shift_after_normalization) else: print("Invalid version", version) raise ValueError() all_params = [x, m, v, beta, gamma] all_shapes = [x_shape, param_shape, param_shape, param_shape, param_shape] err = gradient_checker.compute_gradient_error(all_params[param_index], all_shapes[param_index], output, x_shape) print("Batch normalization v%d %s gradient %s scale and %s shift err = " % (version, tag, "with" if scale_after_normalization else "without", "with" if shift_after_normalization else "without"), err) self.assertLess(err, err_tolerance)
def _VerifyInput1(self, transpose_a, transpose_b): shape_x = [3, 2] shape_y = [2, 4] if transpose_a: shape_x = list(reversed(shape_x)) if transpose_b: shape_y = list(reversed(shape_y)) with self.test_session(use_gpu=False): x = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=shape_x, dtype=dtypes.float64, name="x") y = constant_op.constant([1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7], shape=shape_y, dtype=dtypes.float64, name="y") m = math_ops.matmul(x, y, transpose_a, transpose_b, name="matmul") err = gradient_checker.compute_gradient_error( y, shape_y, m, [3, 4]) print("matmul input1 gradient err = ", err) self.assertLess(err, 1e-10)
def testGradient(self): # Set graph seed for determinism. random_seed = 42 random_seed_lib.set_random_seed(random_seed) with self.cached_session(): for test_case in self._TEST_CASES: np.random.seed(random_seed) in_shape = test_case['in_shape'] in_val = constant_op.constant(np.random.random(in_shape), dtype=dtypes.float32) for padding in ['VALID', 'SAME']: out_val = array_ops.extract_image_patches( in_val, test_case['ksizes'], test_case['strides'], test_case['rates'], padding) out_shape = out_val.get_shape().as_list() err = gradient_checker.compute_gradient_error( in_val, in_shape, out_val, out_shape) self.assertLess(err, 1e-4)
def testGradient(self): with self.cached_session(): for padding in [ "SAME", "VALID", [(0, 0), (1, 2), (3, 4), (0, 0)], [(0, 0), (0, 3), (4, 2), (0, 0)] ]: for stride in [1, 2]: np.random.seed(1) in_shape = [5, 8, 6, 4] in_val = constant_op.constant( 2 * np.random.random_sample(in_shape) - 1, dtype=dtypes.float32) filter_shape = [3, 3, 4, 6] # Make a convolution op with the current settings, just to easily get # the shape of the output. conv_out = nn_ops.conv2d( in_val, array_ops.zeros(filter_shape), strides=[1, stride, stride, 1], padding=padding) out_backprop_shape = conv_out.get_shape().as_list() out_backprop_val = constant_op.constant( 2 * np.random.random_sample(out_backprop_shape) - 1, dtype=dtypes.float32) output = nn_ops.conv2d_backprop_filter( in_val, filter_shape, out_backprop_val, strides=[1, stride, stride, 1], padding=padding) err = gradient_checker.compute_gradient_error( [in_val, out_backprop_val], [in_shape, out_backprop_shape], output, filter_shape) print("conv2d_backprop_filter gradient err = %g " % err) err_tolerance = 3e-2 if test.is_gpu_available() else 2e-3 self.assertLess( err, err_tolerance, msg="padding={0},stride={1},".format(str(padding), stride))
def testGradient(self): with self.session(use_gpu=True) as sess: l = constant_op.constant([3, 0, 1], name="l") f = constant_op.constant( [0.1, 0.2, 0.3, 0.4, 0.1, 0.4, 0.9, 1.6, 0.1, 0.8, 2.7, 6.4], shape=[3, 4], dtype=dtypes.float64, name="f") x = nn_ops.sparse_softmax_cross_entropy_with_logits( labels=l, logits=f, name="xent") err = gradient_checker.compute_gradient_error(f, [3, 4], x, [3]) # Check that no extra computation performed. When only first derivative is # requested, second derivative must not be computed. So when there is no # second derivative, there is no `BatchMatMul` op in the graph. op_names = [ op.op_def.name for op in sess.graph.get_operations() if op.op_def ] self.assertNotIn("BatchMatMul", op_names) self.assertNotIn("BatchMatMulV2", op_names) self.assertLess(err, 5e-8)
def testGradient(self): with self.session(): # Input: [batch, height, width, input_depth] x_shape = [2, 5, 6, 2] # Filter: [kernel_height, kernel_width, input_depth, output_depth] f_shape = [3, 3, 2, 2] # Output: [batch, height, width, output_depth] y_shape = [2, 5, 6, 2] np.random.seed(1) # Make it reproducible. x_val = np.random.random_sample(x_shape).astype(np.float32) f_val = np.random.random_sample(f_shape).astype(np.float32) x = constant_op.constant(x_val, name="x", dtype=dtypes.float32) f = constant_op.constant(f_val, name="f", dtype=dtypes.float32) for rate in range(1, 4): output = nn_ops.atrous_conv2d(x, f, rate=rate, padding="SAME") err = gradient_checker.compute_gradient_error( [x, f], [x_shape, f_shape], output, y_shape) print("atrous_conv2d gradient err = %g " % err) err_tolerance = 4e-3 if test_util.is_xla_enabled() else 1e-3 self.assertLess(err, err_tolerance)
def _testGradients(self, adjoint_a, adjoint_b, name, values_dtype, indices_dtype): n, k, m = np.random.randint(1, 10, size=3) sp_t, nnz = self._randomTensor( [n, k], values_dtype, adjoint=adjoint_a, sparse=True, indices_dtype=indices_dtype) dense_t = self._randomTensor([k, m], values_dtype, adjoint=adjoint_b) matmul = sparse_ops.sparse_tensor_dense_matmul( sp_t, dense_t, adjoint_a=adjoint_a, adjoint_b=adjoint_b, name=name) with self.cached_session(): dense_t_shape = [m, k] if adjoint_b else [k, m] sp_t_val_shape = [nnz] err = gradient_checker.compute_gradient_error( [dense_t, sp_t.values], [dense_t_shape, sp_t_val_shape], matmul, [n, m]) print("%s gradient err = %s" % (name, err)) self.assertLess(err, 1e-3)
def testAddCustomized(self): np.random.seed(3) # Fix seed to avoid flakiness with self.cached_session(): # a test case for Add operation size = (2, 3) x1 = constant_op.constant(2.0, shape=size, dtype=dtypes.float64, name="x1") x2 = constant_op.constant(3.0, shape=size, dtype=dtypes.float64, name="x2") y = math_ops.add(x1, x2, name="y") # checkint gradients for x2 using a special init_value and delta x_init_value = np.asarray( np.arange(6, dtype=np.float64).reshape(2, 3)) error = gradient_checker.compute_gradient_error( x2, size, y, size, x_init_value=x_init_value, delta=1e-2) tf_logging.info("x2 error = %f", error) self.assertLess(error, 1e-10)
def test_gradient(device): if device == "gpu" and visible_gpu(): pytest.xfail("no gpu is visible") with NumpySeed(100): with tf.device('/{}:0'.format(device)): sprites, n_sprites, scales, offsets, backgrounds = get_data( random_alpha=True, squash=0.99) sprites_tf = constant_op.constant(sprites) n_sprites_tf = constant_op.constant(n_sprites) scales_tf = constant_op.constant(scales) offsets_tf = constant_op.constant(offsets) backgrounds_tf = constant_op.constant(backgrounds) images = render_sprites.render_sprites(sprites_tf, n_sprites_tf, scales_tf, offsets_tf, backgrounds_tf) sess = get_session() with sess.as_default(): with tf.device(device): err = gradient_checker.compute_gradient_error( [sprites_tf, scales_tf, offsets_tf, backgrounds_tf], [ sprites.shape, scales.shape, offsets.shape, backgrounds.shape ], images, backgrounds.shape, [sprites, scales, offsets, backgrounds], delta=0.002) print("Jacobian error: {}".format(err)) threshold = 2e-4 assert err < threshold, "Jacobian error ({}) exceeded threshold ({})".format( err, threshold)
def testSecondGradient(self): with self.cached_session() as sess: l = constant_op.constant([ 0.0, 0.0, 1.0 / 3, 0.0, 1.0 / 3, 0.0, 0.0, 0.0, 0.0, 0.5 / 3, 0.0, 0.5 / 3 ], shape=[12], dtype=dtypes.float64, name="l") f = constant_op.constant( [0.1, 0.2, 0.3, 0.4, 0.1, 0.4, 0.9, 1.6, 0.1, 0.8, 2.7, 6.4], shape=[12], dtype=dtypes.float64, name="f") x = nn_ops.softmax_cross_entropy_with_logits(labels=l, logits=f, name="xent") loss = math_ops.reduce_sum(x) gradients = gradients_impl.gradients(loss, [f])[0] err = gradient_checker.compute_gradient_error( f, [12], gradients, [12]) # Check that second derivative is calculated. # (it is equivalent to being `BatchMatMul` op in the graph because of implementation of xentropy grad) op_names = [ op.op_def.name for op in sess.graph.get_operations() if op.op_def ] if compat.forward_compatible(2019, 4, 25): self.assertIn("BatchMatMulV2", op_names) else: self.assertIn("BatchMatMul", op_names) print("cross entropy hessian err = ", err) self.assertLess(err, 5e-8)
def testDifferentTensorShapesThroughGradientError(self): pseudo_random = True overlapping = True pooling_ratio = [1, math.sqrt(3), math.sqrt(2), 1] for num_batches in [1, 2]: for num_rows in [5, 13]: for num_cols in [5, 11]: for num_channels in [1, 3]: input_shape = (num_batches, num_rows, num_cols, num_channels) input_data = self._GenerateUniqueRandomInputTensor( input_shape) # Add some randomness to make input_data not so 'integer' input_data += self._PRNG.random_sample(input_shape) with self.test_session() as _: input_tensor = constant_op.constant( input_data, shape=input_shape) output_tensor, unused_a, unused_b = nn_ops.fractional_max_pool( input_tensor, pooling_ratio, pseudo_random=pseudo_random, overlapping=overlapping, deterministic=True, seed=self._SEED, seed2=self._SEED2) output_data = output_tensor.eval() output_shape = output_data.shape # error_margin and delta setting is similar to max_pool_grad. error_margin = 1e-3 gradient_error = gradient_checker.compute_gradient_error( input_tensor, input_shape, output_tensor, output_shape, x_init_value=input_data.reshape(input_shape), delta=1e-2) self.assertLess(gradient_error, error_margin)
def _test_grad_different_shape(self, input_shape, output_shape): with self.cached_session(): test_image_shape = input_shape test_image = np.random.randn(*test_image_shape) test_image_tensor = tf.constant(test_image, shape=test_image_shape) test_transform = transform_ops.angles_to_projective_transforms( np.pi / 2, 4, 4) if len(output_shape) == 2: resize_shape = output_shape elif len(output_shape) == 3: resize_shape = output_shape[0:2] elif len(output_shape) == 4: resize_shape = output_shape[1:3] output = transform_ops.transform(images=test_image_tensor, transforms=test_transform, output_shape=resize_shape) left_err = gradient_checker.compute_gradient_error( test_image_tensor, test_image_shape, output, output_shape, x_init_value=test_image) self.assertLess(left_err, 1e-10)
def _RunAndVerifyGradients(self, dtype): with self.test_session(use_gpu=True): # random shape shape = np.random.randint(1, 5, size=4) # Make depth at least 2 to make it meaningful shape[3] += 1 # random depth_radius, bias, alpha, beta. cuDNN requires depth_radius to # be in [1, 7]. lrn_depth_radius = np.random.randint(1, min(8, shape[3])) bias = 1.0 + np.random.rand() alpha = 1.0 * np.random.rand() # cuDNN requires beta >= 0.01. beta = 0.01 + 1.0 * np.random.rand() if dtype == dtypes.float32: inp_array = np.random.rand(*shape).astype(np.float32) else: inp_array = np.random.rand(*shape).astype(np.float16) inp = constant_op.constant(list(inp_array.ravel(order="C")), shape=shape, dtype=dtype) lrn_op = nn.local_response_normalization( inp, name="lrn", depth_radius=lrn_depth_radius, bias=bias, alpha=alpha, beta=beta) err = gradient_checker.compute_gradient_error( inp, shape, lrn_op, shape) print("LRN Gradient error for bias ", bias, "alpha ", alpha, " beta ", beta, " is ", err) if dtype == dtypes.float32: self.assertLess(err, 1e-4) else: self.assertLess(err, 1.0)