def testFoldFusedBatchNorms(self):
    for data_format, use_gpu, conv2d_func in [
        ("NHWC", False, nn_ops.conv2d), ("NCHW", True, nn_ops.conv2d),
        ("NHWC", False, nn_ops.depthwise_conv2d_native),
        ("NCHW", True, nn_ops.depthwise_conv2d_native)
    ]:
      with self.cached_session(use_gpu=use_gpu) as sess:
        inputs = [1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6]
        input_op = constant_op.constant(
            np.array(inputs),
            shape=[1, 1, 6, 2] if data_format == "NHWC" else [1, 2, 1, 6],
            dtype=dtypes.float32)
        if conv2d_func == nn_ops.conv2d:
          weights = [1, 2, 3, 4, 0.1, 0.2, 0.3, 0.4]
          weights_op = constant_op.constant(
              np.array(weights), shape=[1, 2, 2, 2], dtype=dtypes.float32)
        else:
          weights = [1, 2, 0.3, 0.4]
          weights_op = constant_op.constant(
              np.array(weights), shape=[1, 2, 2, 1], dtype=dtypes.float32)
        conv_op = conv2d_func(
            input_op,
            weights_op, [1, 1, 1, 1],
            padding="SAME",
            data_format=data_format,
            name="conv_op")
        mean_op = constant_op.constant(
            np.array([10, 20]), shape=[2], dtype=dtypes.float32)
        variance_op = constant_op.constant(
            np.array([0.25, 0.5]), shape=[2], dtype=dtypes.float32)
        beta_op = constant_op.constant(
            np.array([0.1, 0.6]), shape=[2], dtype=dtypes.float32)
        gamma_op = constant_op.constant(
            np.array([1.0, 2.0]), shape=[2], dtype=dtypes.float32)
        ops.get_default_graph().graph_def_versions.producer = 9
        gen_nn_ops._fused_batch_norm(
            conv_op,
            gamma_op,
            beta_op,
            mean_op,
            variance_op,
            0.00001,
            is_training=False,
            data_format=data_format,
            name="output")
        original_graph_def = sess.graph_def
        original_result = sess.run(["output:0"])
      optimized_graph_def = optimize_for_inference_lib.fold_batch_norms(
          original_graph_def)

      _ = importer.import_graph_def(
          optimized_graph_def, input_map={}, name="optimized")
      optimized_result = sess.run(["optimized/output:0"])

      self.assertAllClose(
          original_result, optimized_result, rtol=1e-04, atol=1e-06)

      for node in optimized_graph_def.node:
        self.assertNotEqual("FusedBatchNorm", node.op)
Beispiel #2
0
    def testFoldFusedBatchNorms(self):
        with self.test_session() as sess:
            inputs = [1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6]
            input_op = constant_op.constant(np.array(inputs),
                                            shape=[1, 1, 6, 2],
                                            dtype=dtypes.float32)
            weights = [1, 2, 3, 4, 0.1, 0.2, 0.3, 0.4]
            weights_op = constant_op.constant(np.array(weights),
                                              shape=[1, 2, 2, 2],
                                              dtype=dtypes.float32)
            conv_op = nn_ops.conv2d(input_op,
                                    weights_op, [1, 1, 1, 1],
                                    padding="SAME",
                                    name="conv_op")
            mean_op = constant_op.constant(np.array([10, 20]),
                                           shape=[2],
                                           dtype=dtypes.float32)
            variance_op = constant_op.constant(np.array([0.25, 0.5]),
                                               shape=[2],
                                               dtype=dtypes.float32)
            beta_op = constant_op.constant(np.array([0.1, 0.6]),
                                           shape=[2],
                                           dtype=dtypes.float32)
            gamma_op = constant_op.constant(np.array([1.0, 2.0]),
                                            shape=[2],
                                            dtype=dtypes.float32)
            ops.get_default_graph().graph_def_versions.producer = 9
            gen_nn_ops._fused_batch_norm(conv_op,
                                         gamma_op,
                                         beta_op,
                                         mean_op,
                                         variance_op,
                                         0.00001,
                                         is_training=False,
                                         name="output")
            original_graph_def = sess.graph_def
            original_result = sess.run(["output:0"])
        optimized_graph_def = optimize_for_inference_lib.fold_batch_norms(
            original_graph_def)

        with self.test_session() as sess:
            _ = importer.import_graph_def(optimized_graph_def,
                                          input_map={},
                                          name="optimized")
            optimized_result = sess.run(["optimized/output:0"])

        self.assertAllClose(original_result,
                            optimized_result,
                            rtol=1e-04,
                            atol=1e-06)

        for node in optimized_graph_def.node:
            self.assertNotEqual("FusedBatchNorm", node.op)
  def testFoldFusedBatchNorms(self):
    for data_format, use_gpu in [("NHWC", False), ("NCHW", True)]:
      with self.test_session(use_gpu=use_gpu) as sess:
        inputs = [1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6]
        input_op = constant_op.constant(
            np.array(inputs),
            shape=[1, 1, 6, 2] if data_format == "NHWC" else [1, 2, 1, 6],
            dtype=dtypes.float32)
        weights = [1, 2, 3, 4, 0.1, 0.2, 0.3, 0.4]
        weights_op = constant_op.constant(
            np.array(weights), shape=[1, 2, 2, 2], dtype=dtypes.float32)
        conv_op = nn_ops.conv2d(
            input_op,
            weights_op, [1, 1, 1, 1],
            padding="SAME",
            data_format=data_format,
            name="conv_op")
        mean_op = constant_op.constant(
            np.array([10, 20]), shape=[2], dtype=dtypes.float32)
        variance_op = constant_op.constant(
            np.array([0.25, 0.5]), shape=[2], dtype=dtypes.float32)
        beta_op = constant_op.constant(
            np.array([0.1, 0.6]), shape=[2], dtype=dtypes.float32)
        gamma_op = constant_op.constant(
            np.array([1.0, 2.0]), shape=[2], dtype=dtypes.float32)
        ops.get_default_graph().graph_def_versions.producer = 9
        gen_nn_ops._fused_batch_norm(
            conv_op,
            gamma_op,
            beta_op,
            mean_op,
            variance_op,
            0.00001,
            is_training=False,
            data_format=data_format,
            name="output")
        original_graph_def = sess.graph_def
        original_result = sess.run(["output:0"])
      optimized_graph_def = optimize_for_inference_lib.fold_batch_norms(
          original_graph_def)

      with self.test_session(use_gpu=use_gpu) as sess:
        _ = importer.import_graph_def(
            optimized_graph_def, input_map={}, name="optimized")
        optimized_result = sess.run(["optimized/output:0"])

      self.assertAllClose(
          original_result, optimized_result, rtol=1e-04, atol=1e-06)

      for node in optimized_graph_def.node:
        self.assertNotEqual("FusedBatchNorm", node.op)
Beispiel #4
0
def fused_batch_norm(
    x,
    scale,
    offset,  # pylint: disable=invalid-name
    mean=None,
    variance=None,
    epsilon=0.001,
    data_format="NHWC",
    is_training=True,
    name=None):
  r"""Batch normalization.

  As described in http://arxiv.org/abs/1502.03167.

  Args:
    x: Input `Tensor` of 4 dimensions.
    scale: A `Tensor` of 1 dimension for scaling.
    offset: A `Tensor` of 1 dimension for bias.
    mean: A `Tensor` of 1 dimension for population mean used for inference.
    variance: A `Tensor` of 1 dimension for population variance
              used for inference.
    epsilon: A small float number added to the variance of x.
    data_format: The data format for x. Either "NHWC" (default) or "NCHW".
    is_training: A bool value to specify if the operation is used for
                 training or inference.
    name: A name for this operation (optional).

  Returns:
    y: A 4D Tensor for the normalized, scaled, offsetted x.
    batch_mean: A 1D Tensor for the mean of x.
    batch_var: A 1D Tensor for the variance of x.

  Raises:
    ValueError: If mean or variance is not None when is_training is True.
  """
  x = ops.convert_to_tensor(x, name="input")
  scale = ops.convert_to_tensor(scale, name="scale")
  offset = ops.convert_to_tensor(offset, name="offset")
  if is_training:
    if (mean is not None) or (variance is not None):
      raise ValueError("Both 'mean' and 'variance' must be None "
                       "if is_training is True.")
  if mean is None:
    mean = constant_op.constant([])
  if variance is None:
    variance = constant_op.constant([])
  # Add 1e-12 to epsilon when epsilon <= 1e-5 to prevent CUDNN exception.
  epsilon = epsilon if epsilon > 1e-5 else epsilon + 1e-12
  # pylint: disable=protected-access
  y, batch_mean, batch_var, _, _ = gen_nn_ops._fused_batch_norm(
      x,
      scale,
      offset,
      mean,
      variance,
      epsilon=epsilon,
      data_format=data_format,
      is_training=is_training,
      name=name)
  return y, batch_mean, batch_var
Beispiel #5
0
def fused_batch_norm(
    x,
    scale,
    offset,  # pylint: disable=invalid-name
    mean=None,
    variance=None,
    epsilon=0.001,
    data_format="NHWC",
    is_training=True,
    name=None):
  r"""Batch normalization.

  As described in http://arxiv.org/abs/1502.03167.

  Args:
    x: Input `Tensor` of 4 dimensions.
    scale: A `Tensor` of 1 dimension for scaling.
    offset: A `Tensor` of 1 dimension for bias.
    mean: A `Tensor` of 1 dimension for population mean used for inference.
    variance: A `Tensor` of 1 dimension for population variance
              used for inference.
    epsilon: A small float number added to the variance of x.
    data_format: The data format for x. Either "NHWC" (default) or "NCHW".
    is_training: A bool value to specify if the operation is used for
                 training or inference.
    name: A name for this operation (optional).

  Returns:
    y: A 4D Tensor for the normalized, scaled, offsetted x.
    batch_mean: A 1D Tensor for the mean of x.
    batch_var: A 1D Tensor for the variance of x.

  Raises:
    ValueError: If mean or variance is not None when is_training is True.
  """
  x = ops.convert_to_tensor(x, name="input")
  scale = ops.convert_to_tensor(scale, name="scale")
  offset = ops.convert_to_tensor(offset, name="offset")
  if is_training:
    if (mean is not None) or (variance is not None):
      raise ValueError("Both 'mean' and 'variance' must be None "
                       "if is_training is True.")
  if mean is None:
    mean = constant_op.constant([])
  if variance is None:
    variance = constant_op.constant([])
  # Add 1e-12 to epsilon when epsilon <= 1e-5 to prevent CUDNN exception.
  epsilon = epsilon if epsilon > 1e-5 else epsilon + 1e-12
  # pylint: disable=protected-access
  y, batch_mean, batch_var, _, _ = gen_nn_ops._fused_batch_norm(
      x,
      scale,
      offset,
      mean,
      variance,
      epsilon=epsilon,
      data_format=data_format,
      is_training=is_training,
      name=name)
  return y, batch_mean, batch_var
  def testExpectedNaNOpOutputs(self):
    """Test calling operations with benign NaN output."""
    check_numerics_callback.enable_check_numerics()

    # Empty input tensor
    x = constant_op.constant(1, dtype=dtypes.float32, shape=[0, 1, 1, 1])
    scale = constant_op.constant([1], dtype=dtypes.float32)
    offset = constant_op.constant([1], dtype=dtypes.float32)

    # Calling fused_batch_norm with an empty input should output a NaN in the
    # latter four outputs without triggering the check_numerics callback
    batch_norm_res = gen_nn_ops._fused_batch_norm(
        x=x, scale=scale, offset=offset, mean=[], variance=[])

    _, batch_mean, batch_variance, _, _ = self.evaluate(batch_norm_res)

    self.assertTrue(np.isnan(batch_mean.squeeze()))
    self.assertTrue(np.isnan(batch_variance.squeeze()))
Beispiel #7
0
def test_fused_batch_norm():
    import tensorflow as tf
    from tensorflow.python.ops import gen_nn_ops
    from dace.frontend.tensorflow import TFSession

    num_channels = 3
    size = [8, 224, 224, num_channels]

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    inp = tf.placeholder(tf.float32, size)
    scale = tf.placeholder(tf.float32, [num_channels])
    offset = tf.placeholder(tf.float32, [num_channels])
    populationMean = tf.placeholder(tf.float32, [num_channels])
    populationVariance = tf.placeholder(tf.float32, [num_channels])
    y, mean, var, _, var_sqrt = gen_nn_ops._fused_batch_norm(inp,
                                                             scale,
                                                             offset, [], [],
                                                             epsilon=0.1,
                                                             is_training=True)
    outputs = [y, mean, var]
    test_in = np.random.uniform(size=size).astype(np.float32)
    test_scale = np.random.uniform(size=[num_channels]).astype(np.float32)
    test_offset = np.random.uniform(size=[num_channels]).astype(np.float32)

    sess_tf = tf.Session(config=config)
    sess_dace = TFSession()

    outputs_dace = sess_dace.run(
        outputs,
        feed_dict={
            inp: test_in,
            scale: test_scale,
            offset: test_offset,
        },
    )
    outputs_tf = sess_tf.run(
        outputs,
        feed_dict={
            inp: test_in,
            scale: test_scale,
            offset: test_offset,
        },
    )

    try:
        assert (tf.linalg.norm(outputs_tf[0] -
                               outputs_dace[0]).eval(session=sess_tf) < 1e-1
                and tf.linalg.norm(outputs_dace[2] -
                                   outputs_tf[2]).eval(session=sess_tf) < 1e-4
                and tf.linalg.norm(outputs_dace[1] -
                                   outputs_tf[1]).eval(session=sess_tf) < 1e-4)
    except:
        print("FBN test failed")
        print(
            tf.linalg.norm(outputs_tf[0] -
                           outputs_dace[0]).eval(session=sess_tf))
        print(
            tf.linalg.norm(outputs_tf[1] -
                           outputs_dace[1]).eval(session=sess_tf))
        print(
            tf.linalg.norm(outputs_tf[2] -
                           outputs_dace[2]).eval(session=sess_tf))

    ################# FBN GRADIENT TEST ###############################
    outputGrad = tf.placeholder(tf.float32, size)
    x_grad, gamma_grad, beta_grad, _, _ = gen_nn_ops.fused_batch_norm_grad(
        outputGrad,
        inp,
        scale,
        outputs[1],
        var_sqrt,
        epsilon=0.1,
        is_training=True)
    gradients = [x_grad, gamma_grad, beta_grad]
    test_outputgrad = np.random.uniform(size=size).astype(np.float32)
    outputs_dace = sess_dace.run(
        gradients,
        feed_dict={
            inp: test_in,
            outputGrad: test_outputgrad,
            scale: test_scale,
            offset: test_offset,
        },
    )
    # TF
    x_grad, gamma_grad, beta_grad, _, _ = gen_nn_ops.fused_batch_norm_grad(
        outputGrad,
        inp,
        scale,
        outputs[1],
        tf.math.rsqrt(outputs[2] + float(0.1))
        if tf.test.is_built_with_cuda() else outputs[2],
        epsilon=0.1,
        is_training=True,
    )
    gradients = [x_grad, gamma_grad, beta_grad]
    # writer = tf.summary.FileWriter("./", sess_tf.graph)
    outputs_tf = sess_tf.run(
        gradients,
        feed_dict={
            inp: test_in,
            outputGrad: test_outputgrad,
            scale: test_scale,
            offset: test_offset,
        },
    )
    try:
        assert (tf.linalg.norm(outputs_tf[0] -
                               outputs_dace[0]).eval(session=sess_tf) < 1e-1
                and tf.linalg.norm(outputs_dace[2] -
                                   outputs_tf[2]).eval(session=sess_tf) < 10
                and tf.linalg.norm(outputs_dace[1] -
                                   outputs_tf[1]).eval(session=sess_tf) < 10)
    except:
        print("FBN Gradient test failed")
        print(
            tf.linalg.norm(outputs_tf[0] -
                           outputs_dace[0]).eval(session=sess_tf))
        print(
            tf.linalg.norm(outputs_tf[1] -
                           outputs_dace[1]).eval(session=sess_tf))
        print(
            tf.linalg.norm(outputs_tf[2] -
                           outputs_dace[2]).eval(session=sess_tf))
        print(
            tf.linalg.norm(outputs_tf[2] -
                           np.sum(test_outputgrad, axis=(0, 1, 2))).eval(
                               session=sess_tf))
Beispiel #8
0
if __name__ == '__main__':
    num_channels = 3
    size = [8, 224, 224, num_channels]

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    inp = tf.placeholder(tf.float32, size)
    scale = tf.placeholder(tf.float32, [num_channels])
    offset = tf.placeholder(tf.float32, [num_channels])
    populationMean = tf.placeholder(tf.float32, [num_channels])
    populationVariance = tf.placeholder(tf.float32, [num_channels])
    y, mean, var, _, var_sqrt = gen_nn_ops._fused_batch_norm(inp,
                                                             scale,
                                                             offset, [], [],
                                                             epsilon=0.1,
                                                             is_training=True)
    outputs = [y, mean, var]
    test_in = np.random.uniform(size=size).astype(np.float32)
    test_scale = np.random.uniform(size=[num_channels]).astype(np.float32)
    test_offset = np.random.uniform(size=[num_channels]).astype(np.float32)

    sess_tf = tf.Session(config=config)
    sess_dace = TFSession()

    outputs_dace = sess_dace.run(
        outputs,
        feed_dict={
            inp: test_in,
            scale: test_scale,