def _testLearning(self, use_gradient_checker, data_format): channel = 3 x_shape = [2, 2, 6, channel] scale_shape = [channel] x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) offset_val = np.random.random_sample(scale_shape).astype(np.float32) mean_val = np.random.random_sample(scale_shape).astype(np.float32) var_val = np.random.random_sample(scale_shape).astype(np.float32) epsilon = 0.001 data_format_src = "NHWC" y_ref, mean_ref, var_ref = self._reference_training( x_val, scale_val, offset_val, epsilon, data_format_src) with self.cached_session() as sess, self.test_scope(): # To avoid constant folding x_val_converted = test_utils.ConvertBetweenDataFormats( x_val, data_format_src, data_format) y_ref_converted = test_utils.ConvertBetweenDataFormats( y_ref, data_format_src, data_format) t_val = array_ops.placeholder(np.float32, shape=x_val_converted.shape, name="x") scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale") offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset") y, mean, var = nn.fused_batch_norm(t_val, scale, offset, mean=None, variance=None, epsilon=epsilon, data_format=data_format, is_training=True) # Check gradient. if use_gradient_checker: err = gradient_checker.compute_gradient_error( t_val, x_val_converted.shape, y, x_val_converted.shape, extra_feed_dict={ t_val: x_val_converted, scale: scale_val, offset: offset_val }) self.assertLess(err, 1e-3) y_val, mean_val, var_val = sess.run([y, mean, var], { t_val: x_val_converted, scale: scale_val, offset: offset_val }) self.assertAllClose(mean_val, mean_ref, atol=1e-3) self.assertAllClose(y_val, y_ref_converted, atol=1e-3) self.assertAllClose(var_val, var_ref, atol=1e-3)
def _VerifyValues(self, input_sizes=None, filter_sizes=None, strides=None, dilations=None, padding=None, data_format_src="NHWC", data_format_dst="NHWC", expected=None): """Tests that tf.nn.conv2d produces the expected value. Args: input_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_sizes: Filter tensor dimensions in [kernel_rows, kernel_cols, input_depth, output_depth]. strides: Strides. dilations: RHS dilations. padding: Padding type. data_format_src: Data format input is in. data_format_dst: Data format verification will run and input is converted to. expected: Expected output. """ total_size_1 = np.prod(input_sizes) total_size_2 = np.prod(filter_sizes) x1 = np.arange(1, total_size_1 + 1, dtype=np.float32).reshape(input_sizes) x2 = np.arange(1, total_size_2 + 1, dtype=np.float32).reshape(filter_sizes) strides = [1] + strides + [1] if dilations is None: dilations = [1, 1] dilations = [1] + dilations + [1] # Convert between data formats. expected = test_utils.ConvertBetweenDataFormats(expected, data_format_src, data_format_dst) x1 = test_utils.ConvertBetweenDataFormats(x1, data_format_src, data_format_dst) input_sizes = test_utils.PermuteDimsBetweenDataFormats( input_sizes, data_format_src, data_format_dst) strides = test_utils.PermuteDimsBetweenDataFormats(strides, data_format_src, data_format_dst) dilations = test_utils.PermuteDimsBetweenDataFormats( dilations, data_format_src, data_format_dst) with self.test_session() as sess: t1 = array_ops.placeholder(dtypes.float32, shape=input_sizes) t2 = array_ops.placeholder(dtypes.float32, shape=filter_sizes) with self.test_scope(): out = nn_ops.conv2d( t1, t2, strides=strides, padding=padding, data_format=data_format_dst, dilations=dilations) value = sess.run(out, {t1: x1, t2: x2}) self.assertAllClose(expected, value, 1e-3)
def testGradientInference(self, data_format): # TODO(b/64270657): Use gradient_checker here in addition to comparing with # this reference implementation. channel = 3 x_shape = [2, 2, 6, channel] scale_shape = [channel] grad_val = np.random.random_sample(x_shape).astype(np.float32) x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) mean_val = np.random.random_sample(scale_shape).astype(np.float32) var_val = np.random.random_sample(scale_shape).astype(np.float32) data_format_src = "NHWC" with self.session() as sess, self.test_scope(): grad_val_converted = test_utils.ConvertBetweenDataFormats( grad_val, data_format_src, data_format) x_val_converted = test_utils.ConvertBetweenDataFormats( x_val, data_format_src, data_format) grad = array_ops.placeholder( np.float32, shape=x_val_converted.shape, name="grad") x = array_ops.placeholder( np.float32, shape=x_val_converted.shape, name="x") mean = array_ops.placeholder(np.float32, shape=scale_shape, name="mean") var = array_ops.placeholder(np.float32, shape=scale_shape, name="var") scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale") with self.test_scope(): out = gen_nn_ops.fused_batch_norm_grad( grad, x, scale, mean, var, data_format=data_format, is_training=False) grad_x, grad_scale, grad_offset, _, _ = out ref_x, ref_scale, ref_offset, _, _ = gen_nn_ops.fused_batch_norm_grad( grad, x, scale, mean, var, data_format=data_format, is_training=False) grad_x_val, grad_scale_val, grad_offset_val, = sess.run( [grad_x, grad_scale, grad_offset], { grad: grad_val_converted, x: x_val_converted, mean: mean_val, var: var_val, scale: scale_val }) grad_x_ref, grad_scale_ref, grad_offset_ref, = sess.run( [ref_x, ref_scale, ref_offset], { grad: grad_val_converted, x: x_val_converted, mean: mean_val, var: var_val, scale: scale_val }) self.assertAllClose(grad_x_val, grad_x_ref, atol=1e-2) self.assertAllClose(grad_scale_val, grad_scale_ref, atol=1e-2) self.assertAllClose(grad_offset_val, grad_offset_ref, atol=1e-3)
def testInference(self, data_format): channel = 3 x_shape = [2, 2, 6, channel] scale_shape = [channel] x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) offset_val = np.random.random_sample(scale_shape).astype(np.float32) epsilon = 0.001 exponential_avg_factor = 1.0 data_format_src = "NHWC" y_ref, mean_ref, var_ref, _ = self._reference_training( x_val, scale_val, offset_val, None, None, epsilon, exponential_avg_factor, data_format_src) with self.session() as sess, self.test_scope(): # To avoid constant folding x_val_converted = test_utils.ConvertBetweenDataFormats( x_val, data_format_src, data_format) y_ref_converted = test_utils.ConvertBetweenDataFormats( y_ref, data_format_src, data_format) t_val = array_ops.placeholder(np.float32, shape=x_val_converted.shape, name="x") scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale") offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset") y, mean, variance = nn.fused_batch_norm(t_val, scale, offset, mean=mean_ref, variance=var_ref, epsilon=epsilon, data_format=data_format, is_training=False) y_val, _, _ = sess.run([y, mean, variance], { t_val: x_val_converted, scale: scale_val, offset: offset_val }) self.assertAllClose(y_val, y_ref_converted, atol=1e-3)
def _VerifyValues(self, input_sizes=None, filter_sizes=None, out_backprop_sizes=None, strides=None, dilations=None, padding=None, data_format_src="NHWC", data_format_dst="NHWC", expected=None): """Tests that gen_nn_ops.conv2d_backprop_filter produces the right output. Args: input_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_sizes: Filter tensor dimensions in [kernel_rows, kernel_cols, input_depth, output_depth]. out_backprop_sizes: Output gradients tensor dimensions. strides: Stride. dilations: Dilations. padding: Padding type. data_format_src: Data format input is in. data_format_dst: Data format verification will run and input is converted to. expected: Expected output. """ total_size_1 = np.prod(input_sizes) total_size_2 = np.prod(out_backprop_sizes) x1 = np.arange(1, total_size_1 + 1, dtype=np.float32).reshape(input_sizes) x2 = np.arange(1, total_size_2 + 1, dtype=np.float32).reshape(out_backprop_sizes) strides = [1] + strides + [1] if dilations is not None: dilations = [1] + dilations + [1] expected = np.reshape(expected, filter_sizes) # Convert between data formats. x1 = test_utils.ConvertBetweenDataFormats(x1, data_format_src, data_format_dst) x2 = test_utils.ConvertBetweenDataFormats(x2, data_format_src, data_format_dst) input_sizes = test_utils.PermuteDimsBetweenDataFormats( input_sizes, data_format_src, data_format_dst) out_backprop_sizes = test_utils.PermuteDimsBetweenDataFormats( out_backprop_sizes, data_format_src, data_format_dst) strides = test_utils.PermuteDimsBetweenDataFormats( strides, data_format_src, data_format_dst) if dilations is not None: dilations = test_utils.PermuteDimsBetweenDataFormats( dilations, data_format_src, data_format_dst) with self.cached_session() as sess: t1 = array_ops.placeholder(dtypes.float32, shape=input_sizes) t2 = array_ops.placeholder(dtypes.float32, shape=out_backprop_sizes) with self.test_scope(): tensor = gen_nn_ops.conv2d_backprop_filter( input=t1, filter_sizes=filter_sizes, out_backprop=t2, strides=strides, dilations=dilations, padding=padding, data_format=data_format_dst) value = sess.run(tensor, {t1: x1, t2: x2}) self.assertAllEqual(filter_sizes, value.shape) self.assertAllClose(expected, value, 1e-3)
def testGradientTraining(self, data_format): # TODO(b/64270657): Use gradient_checker here in addition to comparing with # this reference implementation. channel = 3 x_shape = [2, 2, 6, channel] scale_shape = [channel] grad_val = np.random.random_sample(x_shape).astype(np.float32) x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) mean_val = np.random.random_sample(scale_shape).astype(np.float32) var_val = np.random.random_sample(scale_shape).astype(np.float32) epsilon = 0.001 # The TensorFlow FusedBatchNormGrad training operation takes two inputs with # implementation defined values. In theory the only correct value these # inputs are the corresponding reserve_space_{1|2} outputs from the # FusedBatchNorm training operation. However, in practice, we rely on the # first one being mean on {C|G}PU, and the second one being variance on CPU # and inverse(sqrt(variance + epsilon)) on GPU (we test this assumption # separately). reserve_space_1_val = mean_val if self.device == "XLA_GPU": reserve_space_2_val = np.reciprocal(np.sqrt(var_val + epsilon)) else: reserve_space_2_val = var_val data_format_src = "NHWC" grad_x_ref, grad_scale_ref, grad_offset_ref = self._reference_grad( x_val, grad_val, scale_val, mean_val, var_val, epsilon, data_format_src) with self.cached_session() as sess, self.test_scope(): grad_val_converted = test_utils.ConvertBetweenDataFormats( grad_val, data_format_src, data_format) x_val_converted = test_utils.ConvertBetweenDataFormats( x_val, data_format_src, data_format) grad_x_ref_converted = test_utils.ConvertBetweenDataFormats( grad_x_ref, data_format_src, data_format) grad = array_ops.placeholder(np.float32, shape=x_val_converted.shape, name="grad") x = array_ops.placeholder(np.float32, shape=x_val_converted.shape, name="x") reserve_space_1 = array_ops.placeholder(np.float32, shape=scale_shape, name="reserve_space_1") reserve_space_2 = array_ops.placeholder(np.float32, shape=scale_shape, name="reserve_space_2") scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale") grad_x, grad_scale, grad_offset, _, _ = gen_nn_ops.fused_batch_norm_grad( grad, x, scale, reserve_space_1, reserve_space_2, data_format=data_format, is_training=True) grad_x_val, grad_scale_val, grad_offset_val = sess.run( [grad_x, grad_scale, grad_offset], { grad: grad_val_converted, x: x_val_converted, reserve_space_1: reserve_space_1_val, reserve_space_2: reserve_space_2_val, scale: scale_val }) self.assertAllClose(grad_x_val, grad_x_ref_converted, atol=1e-2) self.assertAllClose(grad_scale_val, grad_scale_ref, atol=1e-2) self.assertAllClose(grad_offset_val, grad_offset_ref, atol=1e-3)
def _testLearning(self, use_gradient_checker, data_format, exponential_avg_factor): channel = 3 x_shape = [2, 2, 6, channel] scale_shape = [channel] x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) offset_val = np.random.random_sample(scale_shape).astype(np.float32) mean_val = np.random.random_sample(scale_shape).astype(np.float32) var_val_corr = np.random.random_sample(scale_shape).astype(np.float32) epsilon = 0.001 data_format_src = "NHWC" # When in training mode, fused_batchnorm applies an implicit Bessel's # correction. So we have to use the corrected variance here, as well. y_ref, mean_ref, _, var_ref_corr = self._reference_training( x_val, scale_val, offset_val, mean_val, var_val_corr, epsilon, exponential_avg_factor, data_format_src) with self.session() as sess, self.test_scope(): # To avoid constant folding x_val_converted = test_utils.ConvertBetweenDataFormats( x_val, data_format_src, data_format) y_ref_converted = test_utils.ConvertBetweenDataFormats( y_ref, data_format_src, data_format) t_val = array_ops.placeholder(np.float32, shape=x_val_converted.shape, name="x") scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale") offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset") if exponential_avg_factor == 1.0: old_mean = None old_var = None else: old_mean = array_ops.placeholder(np.float32, shape=scale_shape, name="old_mean") old_var = array_ops.placeholder(np.float32, shape=scale_shape, name="old_var") y, mean, var = nn.fused_batch_norm( t_val, scale, offset, mean=old_mean, variance=old_var, epsilon=epsilon, exponential_avg_factor=exponential_avg_factor, data_format=data_format, is_training=True) if exponential_avg_factor == 1.0: feed_dict = { t_val: x_val_converted, scale: scale_val, offset: offset_val, } else: feed_dict = { t_val: x_val_converted, scale: scale_val, offset: offset_val, old_mean: mean_val, old_var: var_val_corr } # Check gradient. if use_gradient_checker: err = gradient_checker.compute_gradient_error( t_val, x_val_converted.shape, y, x_val_converted.shape, extra_feed_dict=feed_dict) self.assertLess(err, 1e-3) y_tf, mean_tf, var_tf = sess.run([y, mean, var], feed_dict) self.assertAllClose(y_tf, y_ref_converted, atol=1e-3) self.assertAllClose(mean_tf, mean_ref, atol=1e-3) self.assertAllClose(var_tf, var_ref_corr, atol=1e-3)
def testGradientTraining(self, data_format): # TODO(b/64270657): Use gradient_checker here in addition to comparing with # this reference implementation. channel = 3 x_shape = [2, 2, 6, channel] scale_shape = [channel] grad_val = np.random.random_sample(x_shape).astype(np.float32) x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) mean_val = np.random.random_sample(scale_shape).astype(np.float32) var_val = np.random.random_sample(scale_shape).astype(np.float32) epsilon = 0.001 data_format_src = "NHWC" grad_x_ref, grad_scale_ref, grad_offset_ref = self._reference_grad( x_val, grad_val, scale_val, mean_val, var_val, epsilon, data_format_src) # TODO(b/110530713): Support data format HWCN on GPU if self.device == "XLA_GPU" and data_format == "HWCN": self.skipTest("GPU does not support data format HWCN.") with self.test_session() as sess, self.test_scope(): grad_val_converted = test_utils.ConvertBetweenDataFormats( grad_val, data_format_src, data_format) x_val_converted = test_utils.ConvertBetweenDataFormats( x_val, data_format_src, data_format) grad_x_ref_converted = test_utils.ConvertBetweenDataFormats( grad_x_ref, data_format_src, data_format) grad = array_ops.placeholder(np.float32, shape=x_val_converted.shape, name="grad") x = array_ops.placeholder(np.float32, shape=x_val_converted.shape, name="x") mean = array_ops.placeholder(np.float32, shape=scale_shape, name="mean") var = array_ops.placeholder(np.float32, shape=scale_shape, name="var") scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale") grad_x, grad_scale, grad_offset, _, _ = gen_nn_ops.fused_batch_norm_grad( grad, x, scale, mean, var, data_format=data_format, is_training=True) grad_x_val, grad_scale_val, grad_offset_val = sess.run( [grad_x, grad_scale, grad_offset], { grad: grad_val_converted, x: x_val_converted, mean: mean_val, var: var_val, scale: scale_val }) self.assertAllClose(grad_x_val, grad_x_ref_converted, atol=1e-2) self.assertAllClose(grad_scale_val, grad_scale_ref, atol=1e-2) self.assertAllClose(grad_offset_val, grad_offset_ref, atol=1e-3)