def testFoldFusedBatchNorms(self): for data_format, use_gpu, conv2d_func in [ ("NHWC", False, nn_ops.conv2d), ("NCHW", True, nn_ops.conv2d), ("NHWC", False, nn_ops.depthwise_conv2d_native), ("NCHW", True, nn_ops.depthwise_conv2d_native) ]: with self.cached_session(use_gpu=use_gpu) as sess: inputs = [1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6] input_op = constant_op.constant( np.array(inputs), shape=[1, 1, 6, 2] if data_format == "NHWC" else [1, 2, 1, 6], dtype=dtypes.float32) if conv2d_func == nn_ops.conv2d: weights = [1, 2, 3, 4, 0.1, 0.2, 0.3, 0.4] weights_op = constant_op.constant( np.array(weights), shape=[1, 2, 2, 2], dtype=dtypes.float32) else: weights = [1, 2, 0.3, 0.4] weights_op = constant_op.constant( np.array(weights), shape=[1, 2, 2, 1], dtype=dtypes.float32) conv_op = conv2d_func( input_op, weights_op, [1, 1, 1, 1], padding="SAME", data_format=data_format, name="conv_op") mean_op = constant_op.constant( np.array([10, 20]), shape=[2], dtype=dtypes.float32) variance_op = constant_op.constant( np.array([0.25, 0.5]), shape=[2], dtype=dtypes.float32) beta_op = constant_op.constant( np.array([0.1, 0.6]), shape=[2], dtype=dtypes.float32) gamma_op = constant_op.constant( np.array([1.0, 2.0]), shape=[2], dtype=dtypes.float32) ops.get_default_graph().graph_def_versions.producer = 9 gen_nn_ops._fused_batch_norm( conv_op, gamma_op, beta_op, mean_op, variance_op, 0.00001, is_training=False, data_format=data_format, name="output") original_graph_def = sess.graph_def original_result = sess.run(["output:0"]) optimized_graph_def = optimize_for_inference_lib.fold_batch_norms( original_graph_def) _ = importer.import_graph_def( optimized_graph_def, input_map={}, name="optimized") optimized_result = sess.run(["optimized/output:0"]) self.assertAllClose( original_result, optimized_result, rtol=1e-04, atol=1e-06) for node in optimized_graph_def.node: self.assertNotEqual("FusedBatchNorm", node.op)
def testFoldFusedBatchNorms(self): with self.test_session() as sess: inputs = [1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6] input_op = constant_op.constant(np.array(inputs), shape=[1, 1, 6, 2], dtype=dtypes.float32) weights = [1, 2, 3, 4, 0.1, 0.2, 0.3, 0.4] weights_op = constant_op.constant(np.array(weights), shape=[1, 2, 2, 2], dtype=dtypes.float32) conv_op = nn_ops.conv2d(input_op, weights_op, [1, 1, 1, 1], padding="SAME", name="conv_op") mean_op = constant_op.constant(np.array([10, 20]), shape=[2], dtype=dtypes.float32) variance_op = constant_op.constant(np.array([0.25, 0.5]), shape=[2], dtype=dtypes.float32) beta_op = constant_op.constant(np.array([0.1, 0.6]), shape=[2], dtype=dtypes.float32) gamma_op = constant_op.constant(np.array([1.0, 2.0]), shape=[2], dtype=dtypes.float32) ops.get_default_graph().graph_def_versions.producer = 9 gen_nn_ops._fused_batch_norm(conv_op, gamma_op, beta_op, mean_op, variance_op, 0.00001, is_training=False, name="output") original_graph_def = sess.graph_def original_result = sess.run(["output:0"]) optimized_graph_def = optimize_for_inference_lib.fold_batch_norms( original_graph_def) with self.test_session() as sess: _ = importer.import_graph_def(optimized_graph_def, input_map={}, name="optimized") optimized_result = sess.run(["optimized/output:0"]) self.assertAllClose(original_result, optimized_result, rtol=1e-04, atol=1e-06) for node in optimized_graph_def.node: self.assertNotEqual("FusedBatchNorm", node.op)
def testFoldFusedBatchNorms(self): for data_format, use_gpu in [("NHWC", False), ("NCHW", True)]: with self.test_session(use_gpu=use_gpu) as sess: inputs = [1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6] input_op = constant_op.constant( np.array(inputs), shape=[1, 1, 6, 2] if data_format == "NHWC" else [1, 2, 1, 6], dtype=dtypes.float32) weights = [1, 2, 3, 4, 0.1, 0.2, 0.3, 0.4] weights_op = constant_op.constant( np.array(weights), shape=[1, 2, 2, 2], dtype=dtypes.float32) conv_op = nn_ops.conv2d( input_op, weights_op, [1, 1, 1, 1], padding="SAME", data_format=data_format, name="conv_op") mean_op = constant_op.constant( np.array([10, 20]), shape=[2], dtype=dtypes.float32) variance_op = constant_op.constant( np.array([0.25, 0.5]), shape=[2], dtype=dtypes.float32) beta_op = constant_op.constant( np.array([0.1, 0.6]), shape=[2], dtype=dtypes.float32) gamma_op = constant_op.constant( np.array([1.0, 2.0]), shape=[2], dtype=dtypes.float32) ops.get_default_graph().graph_def_versions.producer = 9 gen_nn_ops._fused_batch_norm( conv_op, gamma_op, beta_op, mean_op, variance_op, 0.00001, is_training=False, data_format=data_format, name="output") original_graph_def = sess.graph_def original_result = sess.run(["output:0"]) optimized_graph_def = optimize_for_inference_lib.fold_batch_norms( original_graph_def) with self.test_session(use_gpu=use_gpu) as sess: _ = importer.import_graph_def( optimized_graph_def, input_map={}, name="optimized") optimized_result = sess.run(["optimized/output:0"]) self.assertAllClose( original_result, optimized_result, rtol=1e-04, atol=1e-06) for node in optimized_graph_def.node: self.assertNotEqual("FusedBatchNorm", node.op)
def fused_batch_norm( x, scale, offset, # pylint: disable=invalid-name mean=None, variance=None, epsilon=0.001, data_format="NHWC", is_training=True, name=None): r"""Batch normalization. As described in http://arxiv.org/abs/1502.03167. Args: x: Input `Tensor` of 4 dimensions. scale: A `Tensor` of 1 dimension for scaling. offset: A `Tensor` of 1 dimension for bias. mean: A `Tensor` of 1 dimension for population mean used for inference. variance: A `Tensor` of 1 dimension for population variance used for inference. epsilon: A small float number added to the variance of x. data_format: The data format for x. Either "NHWC" (default) or "NCHW". is_training: A bool value to specify if the operation is used for training or inference. name: A name for this operation (optional). Returns: y: A 4D Tensor for the normalized, scaled, offsetted x. batch_mean: A 1D Tensor for the mean of x. batch_var: A 1D Tensor for the variance of x. Raises: ValueError: If mean or variance is not None when is_training is True. """ x = ops.convert_to_tensor(x, name="input") scale = ops.convert_to_tensor(scale, name="scale") offset = ops.convert_to_tensor(offset, name="offset") if is_training: if (mean is not None) or (variance is not None): raise ValueError("Both 'mean' and 'variance' must be None " "if is_training is True.") if mean is None: mean = constant_op.constant([]) if variance is None: variance = constant_op.constant([]) # Add 1e-12 to epsilon when epsilon <= 1e-5 to prevent CUDNN exception. epsilon = epsilon if epsilon > 1e-5 else epsilon + 1e-12 # pylint: disable=protected-access y, batch_mean, batch_var, _, _ = gen_nn_ops._fused_batch_norm( x, scale, offset, mean, variance, epsilon=epsilon, data_format=data_format, is_training=is_training, name=name) return y, batch_mean, batch_var
def fused_batch_norm( x, scale, offset, # pylint: disable=invalid-name mean=None, variance=None, epsilon=0.001, data_format="NHWC", is_training=True, name=None): r"""Batch normalization. As described in http://arxiv.org/abs/1502.03167. Args: x: Input `Tensor` of 4 dimensions. scale: A `Tensor` of 1 dimension for scaling. offset: A `Tensor` of 1 dimension for bias. mean: A `Tensor` of 1 dimension for population mean used for inference. variance: A `Tensor` of 1 dimension for population variance used for inference. epsilon: A small float number added to the variance of x. data_format: The data format for x. Either "NHWC" (default) or "NCHW". is_training: A bool value to specify if the operation is used for training or inference. name: A name for this operation (optional). Returns: y: A 4D Tensor for the normalized, scaled, offsetted x. batch_mean: A 1D Tensor for the mean of x. batch_var: A 1D Tensor for the variance of x. Raises: ValueError: If mean or variance is not None when is_training is True. """ x = ops.convert_to_tensor(x, name="input") scale = ops.convert_to_tensor(scale, name="scale") offset = ops.convert_to_tensor(offset, name="offset") if is_training: if (mean is not None) or (variance is not None): raise ValueError("Both 'mean' and 'variance' must be None " "if is_training is True.") if mean is None: mean = constant_op.constant([]) if variance is None: variance = constant_op.constant([]) # Add 1e-12 to epsilon when epsilon <= 1e-5 to prevent CUDNN exception. epsilon = epsilon if epsilon > 1e-5 else epsilon + 1e-12 # pylint: disable=protected-access y, batch_mean, batch_var, _, _ = gen_nn_ops._fused_batch_norm( x, scale, offset, mean, variance, epsilon=epsilon, data_format=data_format, is_training=is_training, name=name) return y, batch_mean, batch_var
def testExpectedNaNOpOutputs(self): """Test calling operations with benign NaN output.""" check_numerics_callback.enable_check_numerics() # Empty input tensor x = constant_op.constant(1, dtype=dtypes.float32, shape=[0, 1, 1, 1]) scale = constant_op.constant([1], dtype=dtypes.float32) offset = constant_op.constant([1], dtype=dtypes.float32) # Calling fused_batch_norm with an empty input should output a NaN in the # latter four outputs without triggering the check_numerics callback batch_norm_res = gen_nn_ops._fused_batch_norm( x=x, scale=scale, offset=offset, mean=[], variance=[]) _, batch_mean, batch_variance, _, _ = self.evaluate(batch_norm_res) self.assertTrue(np.isnan(batch_mean.squeeze())) self.assertTrue(np.isnan(batch_variance.squeeze()))
def test_fused_batch_norm(): import tensorflow as tf from tensorflow.python.ops import gen_nn_ops from dace.frontend.tensorflow import TFSession num_channels = 3 size = [8, 224, 224, num_channels] config = tf.ConfigProto() config.gpu_options.allow_growth = True inp = tf.placeholder(tf.float32, size) scale = tf.placeholder(tf.float32, [num_channels]) offset = tf.placeholder(tf.float32, [num_channels]) populationMean = tf.placeholder(tf.float32, [num_channels]) populationVariance = tf.placeholder(tf.float32, [num_channels]) y, mean, var, _, var_sqrt = gen_nn_ops._fused_batch_norm(inp, scale, offset, [], [], epsilon=0.1, is_training=True) outputs = [y, mean, var] test_in = np.random.uniform(size=size).astype(np.float32) test_scale = np.random.uniform(size=[num_channels]).astype(np.float32) test_offset = np.random.uniform(size=[num_channels]).astype(np.float32) sess_tf = tf.Session(config=config) sess_dace = TFSession() outputs_dace = sess_dace.run( outputs, feed_dict={ inp: test_in, scale: test_scale, offset: test_offset, }, ) outputs_tf = sess_tf.run( outputs, feed_dict={ inp: test_in, scale: test_scale, offset: test_offset, }, ) try: assert (tf.linalg.norm(outputs_tf[0] - outputs_dace[0]).eval(session=sess_tf) < 1e-1 and tf.linalg.norm(outputs_dace[2] - outputs_tf[2]).eval(session=sess_tf) < 1e-4 and tf.linalg.norm(outputs_dace[1] - outputs_tf[1]).eval(session=sess_tf) < 1e-4) except: print("FBN test failed") print( tf.linalg.norm(outputs_tf[0] - outputs_dace[0]).eval(session=sess_tf)) print( tf.linalg.norm(outputs_tf[1] - outputs_dace[1]).eval(session=sess_tf)) print( tf.linalg.norm(outputs_tf[2] - outputs_dace[2]).eval(session=sess_tf)) ################# FBN GRADIENT TEST ############################### outputGrad = tf.placeholder(tf.float32, size) x_grad, gamma_grad, beta_grad, _, _ = gen_nn_ops.fused_batch_norm_grad( outputGrad, inp, scale, outputs[1], var_sqrt, epsilon=0.1, is_training=True) gradients = [x_grad, gamma_grad, beta_grad] test_outputgrad = np.random.uniform(size=size).astype(np.float32) outputs_dace = sess_dace.run( gradients, feed_dict={ inp: test_in, outputGrad: test_outputgrad, scale: test_scale, offset: test_offset, }, ) # TF x_grad, gamma_grad, beta_grad, _, _ = gen_nn_ops.fused_batch_norm_grad( outputGrad, inp, scale, outputs[1], tf.math.rsqrt(outputs[2] + float(0.1)) if tf.test.is_built_with_cuda() else outputs[2], epsilon=0.1, is_training=True, ) gradients = [x_grad, gamma_grad, beta_grad] # writer = tf.summary.FileWriter("./", sess_tf.graph) outputs_tf = sess_tf.run( gradients, feed_dict={ inp: test_in, outputGrad: test_outputgrad, scale: test_scale, offset: test_offset, }, ) try: assert (tf.linalg.norm(outputs_tf[0] - outputs_dace[0]).eval(session=sess_tf) < 1e-1 and tf.linalg.norm(outputs_dace[2] - outputs_tf[2]).eval(session=sess_tf) < 10 and tf.linalg.norm(outputs_dace[1] - outputs_tf[1]).eval(session=sess_tf) < 10) except: print("FBN Gradient test failed") print( tf.linalg.norm(outputs_tf[0] - outputs_dace[0]).eval(session=sess_tf)) print( tf.linalg.norm(outputs_tf[1] - outputs_dace[1]).eval(session=sess_tf)) print( tf.linalg.norm(outputs_tf[2] - outputs_dace[2]).eval(session=sess_tf)) print( tf.linalg.norm(outputs_tf[2] - np.sum(test_outputgrad, axis=(0, 1, 2))).eval( session=sess_tf))
if __name__ == '__main__': num_channels = 3 size = [8, 224, 224, num_channels] config = tf.ConfigProto() config.gpu_options.allow_growth = True inp = tf.placeholder(tf.float32, size) scale = tf.placeholder(tf.float32, [num_channels]) offset = tf.placeholder(tf.float32, [num_channels]) populationMean = tf.placeholder(tf.float32, [num_channels]) populationVariance = tf.placeholder(tf.float32, [num_channels]) y, mean, var, _, var_sqrt = gen_nn_ops._fused_batch_norm(inp, scale, offset, [], [], epsilon=0.1, is_training=True) outputs = [y, mean, var] test_in = np.random.uniform(size=size).astype(np.float32) test_scale = np.random.uniform(size=[num_channels]).astype(np.float32) test_offset = np.random.uniform(size=[num_channels]).astype(np.float32) sess_tf = tf.Session(config=config) sess_dace = TFSession() outputs_dace = sess_dace.run( outputs, feed_dict={ inp: test_in, scale: test_scale,