Esempio n. 1
0
    def testAtrousDepthwiseConv2DForward(self):
        strides = [1, 1, 1, 1]
        with self.session():
            # Input: [batch, height, width, input_depth]
            height = 9
            for width in [9, 10]:  # Test both odd and even width.
                x_shape = [2, height, width, 2]
                x = np.arange(np.prod(x_shape),
                              dtype=np.float32).reshape(x_shape)

                # Filter: [kernel_height, kernel_width, input_depth, output_depth]
                for kernel_height in range(1, 4):
                    for kernel_width in range(1, 4):
                        f_shape = [kernel_height, kernel_width, 2, 2]
                        f = np.arange(np.prod(f_shape),
                                      dtype=np.float32).reshape(f_shape)

                        for rate in range(1, 4):
                            f_up = _upsample_filters(f, rate)

                            for padding in ["SAME", "VALID"]:
                                y1 = nn_impl.depthwise_conv2d(
                                    x, f, strides, padding, rate=[rate, rate])
                                y2 = nn_impl.depthwise_conv2d(
                                    x, f_up, strides, padding)
                                self.assertAllClose(y1,
                                                    y2,
                                                    rtol=1e-3,
                                                    atol=1e-3)
Esempio n. 2
0
 def testDepthwiseConv2dGradWRTFilter(self):
   x = constant_op.constant([0.5],
                            dtype=dtypes.float32,
                            shape=[1, 4, 4, 3],
                            name='input')
   f = array_ops.placeholder(
       dtype=dtypes.float32, shape=[2, 2, 3, 2], name='filter')
   strides = [1, 1, 1, 1]
   padding = 'SAME'
   y = nn_impl.depthwise_conv2d(x, f, strides, padding)
   self.run_test(f, y)
 def testDepthwiseConv2DWithUnknownShape(self):
   # GitHub issue 22110.
   if not test.is_gpu_available():
     return
   with self.session(use_gpu=True):
     x = array_ops.placeholder(dtypes.float32)
     f = np.ones([1, 1, 1, 1], np.float32)
     v = nn_impl.depthwise_conv2d(
         x, f, [1, 1, 1, 1], "VALID", rate=[2, 1], data_format="NCHW")
     self.assertAllEqual(
         np.ones([1, 1, 1, 1], np.float32),
         v.eval(feed_dict={x: np.ones([1, 1, 1, 1], np.float32)}))
Esempio n. 4
0
 def testDepthwiseConv2dGradWRTFilter(self):
     x = constant_op.constant([0.5],
                              dtype=dtypes.float32,
                              shape=[1, 4, 4, 3],
                              name='input')
     f = array_ops.placeholder(dtype=dtypes.float32,
                               shape=[2, 2, 3, 2],
                               name='filter')
     strides = [1, 1, 1, 1]
     padding = 'SAME'
     y = nn_impl.depthwise_conv2d(x, f, strides, padding)
     self.run_test(f, y)
Esempio n. 5
0
 def testDepthwiseConv2DWithUnknownShape(self):
   # GitHub issue 22110.
   if not test.is_gpu_available():
     return
   with self.session(use_gpu=True):
     x = array_ops.placeholder(dtypes.float32)
     f = np.ones([1, 1, 1, 1], np.float32)
     v = nn_impl.depthwise_conv2d(
         x, f, [1, 1, 1, 1], "VALID", rate=[2, 1], data_format="NCHW")
     self.assertAllEqual(
         np.ones([1, 1, 1, 1], np.float32),
         v.eval(feed_dict={x: np.ones([1, 1, 1, 1], np.float32)}))
Esempio n. 6
0
  def _VerifyValues(self, input_size, filter_size, stride, padding):
    imag = np.random.rand(*input_size).astype(np.float32)
    filt = np.random.rand(*filter_size).astype(np.float32)
    strides = [1, stride, stride, 1]

    with self.test_session():
      with self.test_scope():
        imag_ph = array_ops.placeholder(dtypes.float32, shape=input_size)
        filt_ph = array_ops.placeholder(dtypes.float32, shape=filter_size)
        feed_dict = {imag_ph: imag, filt_ph: filt}
        xla_out = nn_impl.depthwise_conv2d(imag_ph, filt_ph, strides,
                                           padding).eval(feed_dict=feed_dict)

    with self.test_session():
      with ops.device(self.CPU_DEVICE):
        imag_ph = array_ops.placeholder(dtypes.float32, shape=input_size)
        filt_ph = array_ops.placeholder(dtypes.float32, shape=filter_size)
        feed_dict = {imag_ph: imag, filt_ph: filt}
        cpu_out = nn_impl.depthwise_conv2d(imag_ph, filt_ph, strides,
                                           padding).eval(feed_dict=feed_dict)

    self.assertAllClose(xla_out, cpu_out)
  def testAtrousDepthwiseConv2DForward(self):
    strides = [1, 1, 1, 1]
    with self.session(use_gpu=True):
      # Input: [batch, height, width, input_depth]
      height = 9
      for width in [9, 10]:  # Test both odd and even width.
        x_shape = [2, height, width, 2]
        x = np.arange(np.prod(x_shape), dtype=np.float32).reshape(x_shape)

        # Filter: [kernel_height, kernel_width, input_depth, output_depth]
        for kernel_height in range(1, 4):
          for kernel_width in range(1, 4):
            f_shape = [kernel_height, kernel_width, 2, 2]
            f = np.arange(np.prod(f_shape), dtype=np.float32).reshape(f_shape)

            for rate in range(1, 4):
              f_up = _upsample_filters(f, rate)

              for padding in ["SAME", "VALID"]:
                y1 = nn_impl.depthwise_conv2d(
                    x, f, strides, padding, rate=[rate, rate])
                y2 = nn_impl.depthwise_conv2d(x, f_up, strides, padding)
                self.assertAllClose(
                    y1.eval(), self.evaluate(y2), rtol=1e-3, atol=1e-3)
Esempio n. 8
0
File: conv.py Progetto: wujinke/MDNT
    def call(self, inputs):
        if self.rank == 1:
            inputs = array_ops.expand_dims(inputs, axis=self.exp_dim_pos)
        outputs = nn_impl.depthwise_conv2d(input=inputs,
                                           filter=self.kernel,
                                           strides=self._strides,
                                           padding=self.op_padding.upper(),
                                           rate=self.dilation_rate,
                                           data_format=self._data_format)
        # Grouplize the output channels.
        r2_outputs_shape = outputs.get_shape().as_list()
        if self.data_format == 'channels_first':
            #get_oshape = r2_outputs_shape[:1].concatenate([self.lgroups*self.lfilters, self.group_input_dim]).concatenate(r2_outputs_shape[2:])
            get_oshape = [
                -1, self.lgroups * self.lfilters, self.group_input_dim,
                *r2_outputs_shape[2:]
            ]
            outputs = array_ops.reshape(outputs, get_oshape)
            outputs = math_ops.reduce_sum(outputs, axis=1, keepdims=False)
        else:
            #get_oshape = r2_outputs_shape[:-1].concatenate([self.lgroups*self.lfilters, self.group_input_dim])
            get_oshape = [
                -1, *r2_outputs_shape[1:-1], self.lgroups * self.lfilters,
                self.group_input_dim
            ]
            outputs = array_ops.reshape(outputs, get_oshape)
            outputs = math_ops.reduce_sum(outputs, axis=-1, keepdims=False)
        if self.rank == 1:
            outputs = array_ops.squeeze(outputs, axis=self.exp_dim_pos)
        outputs_list = []

        if self.use_bias:
            if self.data_format == 'channels_first':
                if self.rank == 1:
                    # nn.bias_add does not accept a 1D input tensor.
                    bias = array_ops.reshape(
                        self.bias, (1, self.lfilters * self.lgroups, 1))
                    outputs += bias
                if self.rank == 2:
                    outputs = nn.bias_add(outputs,
                                          self.bias,
                                          data_format='NCHW')
            else:
                outputs = nn.bias_add(outputs, self.bias, data_format='NHWC')

        if self.activation is not None:
            return self.activation(outputs)
        return outputs
Esempio n. 9
0
  def testConv2dBackpropFilterGrad(self):
    x = array_ops.placeholder(
        dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input')
    f = constant_op.constant([0.5],
                             dtype=dtypes.float32,
                             shape=[2, 2, 3, 2],
                             name='filter')
    strides = [1, 1, 1, 1]
    padding = 'SAME'
    out = nn_impl.depthwise_conv2d(x, f, strides, padding)

    grad_wrt_input = gradients_impl.gradients(out, x)[0]
    self.run_test(f, grad_wrt_input)

    grad_wrt_filter = gradients_impl.gradients(out, f)[0]
    self.run_test(x, grad_wrt_filter)
Esempio n. 10
0
    def testDepthwiseConv2dBackpropFilterGrad(self):
        x = array_ops.placeholder(dtype=dtypes.float32,
                                  shape=[1, 4, 4, 3],
                                  name='input')
        f = constant_op.constant([0.5],
                                 dtype=dtypes.float32,
                                 shape=[2, 2, 3, 2],
                                 name='filter')
        strides = [1, 1, 1, 1]
        padding = 'SAME'
        out = nn_impl.depthwise_conv2d(x, f, strides, padding)

        grad_wrt_input = gradients_impl.gradients(out, x)[0]
        self.run_test(f, grad_wrt_input)

        grad_wrt_filter = gradients_impl.gradients(out, f)[0]
        self.run_test(x, grad_wrt_filter)
  def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding,
                    use_gpu):
    """Verifies the output values of the convolution function.

    Args:
      tensor_in_sizes: Input tensor dimensions in
        [batch, input_rows, input_cols, input_depth].
      filter_in_sizes: Filter tensor dimensions in
        [filter_rows, filter_cols, input_depth, depth_multiplier].
      stride: Stride.
      padding: Padding type.
      use_gpu: Whether to use GPU.
    """
    total_size_1 = 1
    total_size_2 = 1
    for s in tensor_in_sizes:
      total_size_1 *= s
    for s in filter_in_sizes:
      total_size_2 *= s
    # Initializes the input tensor with array containing incrementing
    # numbers from 1.
    x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
    x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
    with self.test_session(use_gpu=use_gpu) as sess:
      t1 = constant_op.constant(x1, shape=tensor_in_sizes)
      t1.set_shape(tensor_in_sizes)
      t2 = constant_op.constant(x2, shape=filter_in_sizes)
      conv_native = nn_ops.depthwise_conv2d_native(
          t1, t2, strides=[1, stride, stride, 1], padding=padding)

      conv_gold = nn_impl.depthwise_conv2d(
          t1, t2, strides=[1, stride, stride, 1], padding=padding)
      native_result = sess.run(conv_native)
      gold_result = sess.run(conv_gold)

    print("diff matrix:",
          np.amax(np.ravel(native_result) - np.ravel(gold_result)))
    self.assertArrayNear(np.ravel(native_result), np.ravel(gold_result), 1e-5)
    self.assertShapeEqual(native_result, conv_native)
    self.assertShapeEqual(native_result, conv_gold)
    def _VerifyValues(self,
                      tensor_in_sizes,
                      filter_in_sizes,
                      stride,
                      padding,
                      data_type,
                      use_gpu,
                      grouped_conv=False,
                      data_format="NHWC"):
        """Verifies the output values of the convolution function.

    Args:
      tensor_in_sizes: Input tensor dimensions in
        [batch, input_rows, input_cols, input_depth].
      filter_in_sizes: Filter tensor dimensions in
        [filter_rows, filter_cols, input_depth, depth_multiplier].
      stride: Stride.
      padding: Padding type.
      data_type: The data type to use.
      use_gpu: Whether to use GPU.
      grouped_conv: Whether to use cuDNN 7's grouped convolution.
      data_format: The data_format of the input. "NHWC" or "NCHW".
    """
        input_size = 1
        filter_size = 1
        for s in tensor_in_sizes:
            input_size *= s
        for s in filter_in_sizes:
            filter_size *= s
        # Initializes the input and filter tensor with numbers incrementing from 1.
        x1 = [f * 1.0 / input_size for f in range(1, input_size + 1)]
        x2 = [f * 1.0 / filter_size for f in range(1, filter_size + 1)]
        ops.reset_default_graph()
        graph = ops.get_default_graph()
        with self.session(graph=graph, use_gpu=use_gpu) as sess:
            tolerance = {
                dtypes.float16: 4e-2,
                dtypes.float32: 1e-8,
                dtypes.float64: 1e-13,
            }[data_type]

            t1 = constant_op.constant(x1,
                                      shape=tensor_in_sizes,
                                      dtype=data_type)
            t1.set_shape(tensor_in_sizes)
            t2 = constant_op.constant(x2,
                                      shape=filter_in_sizes,
                                      dtype=data_type)

            native_t1 = t1
            strides = [1, stride, stride, 1]
            if data_format == "NCHW":
                # Transpose from NHWC input to NCHW
                # Ex. [4, 5, 5, 48] to [4, 48, 5, 5]
                native_t1 = array_ops.transpose(t1, [0, 3, 1, 2])
                strides = [1, 1, stride, stride]

            with sess.graph._kernel_label_map(
                {"DepthwiseConv2dNative": "cudnn_grouped_convolution"}
                    if grouped_conv else {}):
                conv_native = nn_ops.depthwise_conv2d_native(
                    native_t1,
                    t2,
                    strides=strides,
                    data_format=data_format,
                    padding=padding)

            if data_format == "NCHW":
                # Transpose back from NCHW to NHWC
                conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1])

            try:
                native_result = sess.run(conv_native)
            except errors.InvalidArgumentError as e:
                # Grouped convolution kernel is only registered for cuDNN 7. Silently
                # return when we are running on an earlier version or without GPU.
                if e.message.startswith(
                        "No OpKernel was registered to support Op 'DepthwiseConv2dNative'"
                ):
                    tf_logging.warn("Skipping grouped convolution test")
                    return
                raise e

            conv_interface = nn_impl.depthwise_conv2d(
                t1, t2, strides=[1, stride, stride, 1], padding=padding)
            interface_result = sess.run(conv_interface)

        tf_logging.info(
            "data_type: %r, use_gpu: %r, grouped_conv: %r, max diff = %f",
            data_type, use_gpu, grouped_conv,
            np.amax(np.absolute(native_result - interface_result)))
        self.assertArrayNear(np.ravel(native_result),
                             np.ravel(interface_result), tolerance)
        self.assertShapeEqual(native_result, conv_native)
        self.assertShapeEqual(native_result, conv_interface)
Esempio n. 13
0
    def _VerifyValues(self,
                      tensor_in_sizes,
                      filter_in_sizes,
                      stride,
                      padding,
                      use_gpu,
                      data_format="NHWC"):
        """Verifies the output values of the convolution function.

    Args:
      tensor_in_sizes: Input tensor dimensions in
        [batch, input_rows, input_cols, input_depth].
      filter_in_sizes: Filter tensor dimensions in
        [filter_rows, filter_cols, input_depth, depth_multiplier].
      stride: Stride.
      padding: Padding type.
      use_gpu: Whether to use GPU.
      data_format: The data_format of the input.  "NHWC" or "NCHW".
    """
        total_size_1 = 1
        total_size_2 = 1
        for s in tensor_in_sizes:
            total_size_1 *= s
        for s in filter_in_sizes:
            total_size_2 *= s
        # Initializes the input and filter tensor with numbers incrementing from 1.
        x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
        x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
        with self.test_session(use_gpu=use_gpu) as sess:
            with sess.graph._kernel_label_map(
                {"DepthwiseConv2dNative": "neon"}):
                t1 = constant_op.constant(x1, shape=tensor_in_sizes)
                t1.set_shape(tensor_in_sizes)
                t2 = constant_op.constant(x2, shape=filter_in_sizes)

            native_t1 = t1
            strides = [1, stride, stride, 1]
            if data_format == "NCHW":
                # Transpose from NWHC input to NCHW
                # Ex. [4, 5, 5, 48] to [4, 48, 5, 5]
                native_t1 = array_ops.transpose(t1, [0, 3, 1, 2])
                strides = [1, 1, stride, stride]

            conv_native = nn_ops.depthwise_conv2d_native(
                native_t1,
                t2,
                strides=strides,
                data_format=data_format,
                padding=padding)

            if data_format == "NCHW":
                # Transpose back from NCHW to NHWC
                conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1])

            conv_interface = nn_impl.depthwise_conv2d(
                t1, t2, strides=[1, stride, stride, 1], padding=padding)

            native_result = sess.run(conv_native)
            interface_result = sess.run(conv_interface)

        print("depthwise conv_2d: ", tensor_in_sizes, "*", filter_in_sizes,
              ", stride:", stride, ", padding: ", padding, ", max diff: ",
              np.amax(np.absolute(native_result - interface_result)))
        self.assertAllClose(np.ravel(native_result),
                            np.ravel(interface_result), 1e-5)
        self.assertShapeEqual(native_result, conv_native)
        self.assertShapeEqual(native_result, conv_interface)
Esempio n. 14
0
  def _ConstructAndTestGradient(self,
                                input_shape,
                                filter_shape,
                                output_shape,
                                stride,
                                padding,
                                data_type,
                                test_input,
                                use_gpu,
                                grouped_conv=False,
                                data_format="NHWC",
                                dilations=None):
    input_size = 1
    for x in input_shape:
      input_size *= x
    filter_size = 1
    for x in filter_shape:
      filter_size *= x
    input_data = [x * 1.0 / input_size for x in range(0, input_size)]
    input_np = np.array(input_data).reshape(input_shape)
    filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]
    filter_np = np.array(filter_data).reshape(filter_shape)
    ops.reset_default_graph()
    graph = ops.get_default_graph()
    with self.session(graph=graph, use_gpu=use_gpu) as sess:
      tolerance = {
          dtypes.float16: 4e-0,
          dtypes.float32: 8e-4,
          dtypes.float64: 1e-12,
      }[data_type]

      input_tensor = constant_op.constant(
          input_np, shape=input_shape, dtype=data_type, name="input")
      filter_tensor = constant_op.constant(
          filter_np, shape=filter_shape, dtype=data_type, name="filter")

      native_input = input_tensor
      strides = [1, stride, stride, 1]
      if data_format == "NCHW":
        # Transpose from NHWC input to NCHW
        # Ex. [4, 5, 5, 48] to [4, 48, 5, 5]
        native_input = array_ops.transpose(input_tensor, [0, 3, 1, 2])
        input_shape = [
            input_shape[0], input_shape[3], input_shape[1], input_shape[2]
        ]
        output_shape = [
            output_shape[0], output_shape[3], output_shape[1], output_shape[2]
        ]
        strides = [1, 1, stride, stride]

      with sess.graph._kernel_label_map({
          "DepthwiseConv2dNative": "cudnn_grouped_convolution",
          "DepthwiseConv2dNativeBackpropInput": "cudnn_grouped_convolution",
          "DepthwiseConv2dNativeBackpropFilter": "cudnn_grouped_convolution",
      } if grouped_conv else {}):
        depthwise_conv2d = nn_impl.depthwise_conv2d(
            native_input,
            filter_tensor,
            strides,
            padding,
            data_format=data_format,
            dilations=dilations,
            name="depthwise_conv2d")

      self.assertEqual(output_shape, depthwise_conv2d.get_shape())

      try:
        if test_input:
          err = gradient_checker.compute_gradient_error(
              native_input, input_shape, depthwise_conv2d, output_shape)
        else:
          err = gradient_checker.compute_gradient_error(
              filter_tensor, filter_shape, depthwise_conv2d, output_shape)
      except errors.InvalidArgumentError as e:
        # Grouped convolution kernel is only registered for cuDNN 7. Silently
        # return when we are running on an earlier version or without GPU.
        if grouped_conv and e.message.startswith(
            "No OpKernel was registered to support Op 'DepthwiseConv2dNative'"):
          tf_logging.warn("Skipping grouped convolution test")
          return
        raise e

      tf_logging.info(
          "data_type: %r, use_gpu: %r, grouped_conv: %r, error = %f", data_type,
          use_gpu, grouped_conv, err)
      self.assertLess(err, tolerance)
Esempio n. 15
0
  def _VerifyValues(self,
                    tensor_in_sizes,
                    filter_in_sizes,
                    stride,
                    padding,
                    data_type,
                    use_gpu,
                    grouped_conv=False,
                    data_format="NHWC",
                    dilations=None):
    """Verifies the output values of the convolution function.

    Args:
      tensor_in_sizes: Input tensor dimensions in
        [batch, input_rows, input_cols, input_depth].
      filter_in_sizes: Filter tensor dimensions in
        [filter_rows, filter_cols, input_depth, depth_multiplier].
      stride: Stride.
      padding: Padding type.
      data_type: The data type to use.
      use_gpu: Whether to use GPU.
      grouped_conv: Whether to use cuDNN 7's grouped convolution.
      data_format: The data_format of the input. "NHWC" or "NCHW".
      dilations: A list of 2 elements, representing the dilations.
    """
    input_size = 1
    filter_size = 1
    for s in tensor_in_sizes:
      input_size *= s
    for s in filter_in_sizes:
      filter_size *= s
    # Initializes the input and filter tensor with numbers incrementing from 1.
    x1 = [f * 1.0 / input_size for f in range(1, input_size + 1)]
    x1 = np.array(x1).reshape(tensor_in_sizes)
    x2 = [f * 1.0 / filter_size for f in range(1, filter_size + 1)]
    x2 = np.array(x2).reshape(filter_in_sizes)
    # Compute reference result
    strides = [1, stride, stride, 1]
    np_result = _DepthwiseConv2dNumpy(x1, x2, strides, padding, "NHWC",
                                      dilations)

    ops.reset_default_graph()
    graph = ops.get_default_graph()
    with self.session(graph=graph, use_gpu=use_gpu) as sess:
      tolerance = {
          dtypes.float16: 4e-2,
          dtypes.float32: 1e-5,
          dtypes.float64: 1e-12,
      }[data_type]

      t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=data_type)
      t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=data_type)

      if data_format == "NCHW":
        # Transpose from NHWC input to NCHW
        # Ex. [4, 5, 5, 48] to [4, 48, 5, 5]
        t1 = array_ops.transpose(t1, [0, 3, 1, 2])
        strides = [1, 1, stride, stride]

      # depthwise_conv2d_native does not support dilations except on TPUs.
      if dilations is None:
        with sess.graph._kernel_label_map({
            "DepthwiseConv2dNative": "cudnn_grouped_convolution"
        } if grouped_conv else {}):
          conv_native = nn_ops.depthwise_conv2d_native(
              t1,
              t2,
              strides=strides,
              data_format=data_format,
              padding=padding)

        if data_format == "NCHW":
          # Transpose back from NCHW to NHWC
          conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1])

        try:
          # The Numpy array from calling depthwise_conv2d_native
          native_result = self.evaluate(conv_native)
        except errors.InvalidArgumentError as e:
          # Grouped convolution kernel is only registered for cuDNN 7. Silently
          # return when we are running on an earlier version or without GPU.
          if e.message.startswith(
              "No OpKernel was registered to support Op "
              "'DepthwiseConv2dNative'"):
            tf_logging.warn("Skipping grouped convolution test")
            return
          raise e

      conv_interface = nn_impl.depthwise_conv2d(
          t1, t2, strides=strides, padding=padding,
          data_format=data_format, dilations=dilations)
      if data_format == "NCHW":
        # Transpose back from NCHW to NHWC
        conv_interface = array_ops.transpose(conv_interface, [0, 2, 3, 1])

      # The Numpy array from calling depthwise_conv2d
      interface_result = self.evaluate(conv_interface)

    if dilations is None:
      self.assertAllClose(native_result, np_result, atol=tolerance, rtol=0.)
    self.assertAllClose(interface_result, np_result, atol=tolerance, rtol=0.)
  def _VerifyValues(self,
                    tensor_in_sizes,
                    filter_in_sizes,
                    stride,
                    padding,
                    data_type,
                    use_gpu,
                    grouped_conv=False,
                    data_format="NHWC"):
    """Verifies the output values of the convolution function.

    Args:
      tensor_in_sizes: Input tensor dimensions in
        [batch, input_rows, input_cols, input_depth].
      filter_in_sizes: Filter tensor dimensions in
        [filter_rows, filter_cols, input_depth, depth_multiplier].
      stride: Stride.
      padding: Padding type.
      data_type: The data type to use.
      use_gpu: Whether to use GPU.
      grouped_conv: Whether to use cuDNN 7's grouped convolution.
      data_format: The data_format of the input. "NHWC" or "NCHW".
    """
    input_size = 1
    filter_size = 1
    for s in tensor_in_sizes:
      input_size *= s
    for s in filter_in_sizes:
      filter_size *= s
    # Initializes the input and filter tensor with numbers incrementing from 1.
    x1 = [f * 1.0 / input_size for f in range(1, input_size + 1)]
    x2 = [f * 1.0 / filter_size for f in range(1, filter_size + 1)]
    ops.reset_default_graph()
    graph = ops.get_default_graph()
    with self.session(graph=graph, use_gpu=use_gpu) as sess:
      tolerance = {
          dtypes.float16: 4e-2,
          dtypes.float32: 1e-5,
          dtypes.float64: 1e-12,
      }[data_type]

      t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=data_type)
      t1.set_shape(tensor_in_sizes)
      t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=data_type)

      native_t1 = t1
      strides = [1, stride, stride, 1]
      if data_format == "NCHW":
        # Transpose from NHWC input to NCHW
        # Ex. [4, 5, 5, 48] to [4, 48, 5, 5]
        native_t1 = array_ops.transpose(t1, [0, 3, 1, 2])
        strides = [1, 1, stride, stride]

      with sess.graph._kernel_label_map({
          "DepthwiseConv2dNative": "cudnn_grouped_convolution"
      } if grouped_conv else {}):
        conv_native = nn_ops.depthwise_conv2d_native(
            native_t1,
            t2,
            strides=strides,
            data_format=data_format,
            padding=padding)

      if data_format == "NCHW":
        # Transpose back from NCHW to NHWC
        conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1])

      try:
        native_result = sess.run(conv_native)
      except errors.InvalidArgumentError as e:
        # Grouped convolution kernel is only registered for cuDNN 7. Silently
        # return when we are running on an earlier version or without GPU.
        if e.message.startswith(
            "No OpKernel was registered to support Op 'DepthwiseConv2dNative'"):
          tf_logging.warn("Skipping grouped convolution test")
          return
        raise e

      conv_interface = nn_impl.depthwise_conv2d(
          t1, t2, strides=[1, stride, stride, 1], padding=padding)
      interface_result = sess.run(conv_interface)

    tf_logging.info(
        "data_type: %r, use_gpu: %r, grouped_conv: %r, max diff = %f",
        data_type, use_gpu, grouped_conv,
        np.amax(np.absolute(native_result - interface_result)))
    self.assertArrayNear(
        np.ravel(native_result), np.ravel(interface_result), tolerance)
    self.assertShapeEqual(native_result, conv_native)
    self.assertShapeEqual(native_result, conv_interface)
  def _VerifyValues(self,
                    tensor_in_sizes,
                    filter_in_sizes,
                    stride,
                    padding,
                    use_gpu,
                    data_format="NHWC"):
    """Verifies the output values of the convolution function.

    Args:
      tensor_in_sizes: Input tensor dimensions in
        [batch, input_rows, input_cols, input_depth].
      filter_in_sizes: Filter tensor dimensions in
        [filter_rows, filter_cols, input_depth, depth_multiplier].
      stride: Stride.
      padding: Padding type.
      use_gpu: Whether to use GPU.
      data_format: The data_format of the input.  "NHWC" or "NCHW".
    """
    total_size_1 = 1
    total_size_2 = 1
    for s in tensor_in_sizes:
      total_size_1 *= s
    for s in filter_in_sizes:
      total_size_2 *= s
    # Initializes the input and filter tensor with numbers incrementing from 1.
    x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
    x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
    with self.test_session(use_gpu=use_gpu) as sess:
      with sess.graph._kernel_label_map({"DepthwiseConv2dNative": "neon"}):
        t1 = constant_op.constant(x1, shape=tensor_in_sizes)
        t1.set_shape(tensor_in_sizes)
        t2 = constant_op.constant(x2, shape=filter_in_sizes)

      native_t1 = t1
      strides = [1, stride, stride, 1]
      if data_format == "NCHW":
        # Transpose from NHWC input to NCHW
        # Ex. [4, 5, 5, 48] to [4, 48, 5, 5]
        native_t1 = array_ops.transpose(t1, [0, 3, 1, 2])
        strides = [1, 1, stride, stride]

      conv_native = nn_ops.depthwise_conv2d_native(
          native_t1,
          t2,
          strides=strides,
          data_format=data_format,
          padding=padding)

      if data_format == "NCHW":
        # Transpose back from NCHW to NHWC
        conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1])

      conv_interface = nn_impl.depthwise_conv2d(
          t1, t2, strides=[1, stride, stride, 1], padding=padding)

      native_result = sess.run(conv_native)
      interface_result = sess.run(conv_interface)

    print("depthwise conv_2d: ", tensor_in_sizes, "*", filter_in_sizes,
          ", stride:", stride, ", padding: ", padding, ", max diff: ",
          np.amax(np.absolute(native_result - interface_result)))
    self.assertAllClose(
        np.ravel(native_result), np.ravel(interface_result), 1e-5)
    self.assertShapeEqual(native_result, conv_native)
    self.assertShapeEqual(native_result, conv_interface)
    def _VerifyValuesWithDilation(self,
                                  tensor_in_sizes,
                                  filter_in_sizes,
                                  stride,
                                  dilation,
                                  padding,
                                  data_type,
                                  data_format="NHWC"):
        """Verifies the output values of the convolution function.

    Args:
      tensor_in_sizes: Input tensor dimensions in [batch, input_rows,
        input_cols, input_depth].
      filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols,
        input_depth, depth_multiplier].
      stride: Stride.
      dilation: Dilation.
      padding: Padding type.
      data_type: The data type to use.
      data_format: The data_format of the input. "NHWC" or "NCHW".
    """
        total_size_1 = 1
        total_size_2 = 1
        for s in tensor_in_sizes:
            total_size_1 *= s
        for s in filter_in_sizes:
            total_size_2 *= s
        # Initializes the input and filter tensor with numbers incrementing from 1.
        x1 = np.array([f * 1.0 for f in range(1, total_size_1 + 1)],
                      dtype=data_type).reshape(tensor_in_sizes)
        x2 = np.array([f * 1.0 for f in range(1, total_size_2 + 1)],
                      dtype=data_type).reshape(filter_in_sizes)
        with self.session() as sess:
            if data_type == np.float32:
                # TODO(b/64210055): Tolerance for TPU is high.
                tolerance = 1e-2
            else:
                self.assertEqual(data_type, np.float64)
                tolerance = 1e-8

            t1 = array_ops.placeholder(shape=tensor_in_sizes, dtype=data_type)
            t2 = array_ops.placeholder(shape=filter_in_sizes, dtype=data_type)

            native_t1 = t1
            strides = [1, stride, stride, 1]
            dilations = [dilation, dilation]
            if data_format == "NCHW":
                # Transpose from NWHC input to NCHW
                # Ex. [4, 5, 5, 48] to [4, 48, 5, 5]
                native_t1 = array_ops.transpose(t1, [0, 3, 1, 2])
                strides = [1, 1, stride, stride]

            with self.test_scope():
                conv_native = nn_impl.depthwise_conv2d(native_t1,
                                                       t2,
                                                       strides=strides,
                                                       rate=dilations,
                                                       data_format=data_format,
                                                       padding=padding)

            if data_format == "NCHW":
                # Transpose back from NCHW to NHWC
                conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1])

            with ops.device("CPU"):
                # CPU only support NHWC format
                strides = [1, stride, stride, 1]
                conv_interface = nn_impl.depthwise_conv2d(t1,
                                                          t2,
                                                          strides=strides,
                                                          rate=dilations,
                                                          padding=padding)

            native_result = sess.run(conv_native, {t1: x1, t2: x2})
            interface_result = sess.run(conv_interface, {t1: x1, t2: x2})

        print("data_type:", data_type, "max diff = ",
              np.amax(np.absolute(native_result - interface_result)))
        self.assertAllClose(np.ravel(native_result),
                            np.ravel(interface_result),
                            rtol=tolerance)
  def _VerifyValues(self,
                    tensor_in_sizes,
                    filter_in_sizes,
                    stride,
                    padding,
                    data_type,
                    use_gpu,
                    data_format="NHWC"):
    """Verifies the output values of the convolution function.

    Args:
      tensor_in_sizes: Input tensor dimensions in
        [batch, input_rows, input_cols, input_depth].
      filter_in_sizes: Filter tensor dimensions in
        [filter_rows, filter_cols, input_depth, depth_multiplier].
      stride: Stride.
      padding: Padding type.
      data_type: The data type to use.
      use_gpu: Whether to use GPU.
      data_format: The data_format of the input. "NHWC" or "NCHW".
    """
    total_size_1 = 1
    total_size_2 = 1
    for s in tensor_in_sizes:
      total_size_1 *= s
    for s in filter_in_sizes:
      total_size_2 *= s
    # Initializes the input and filter tensor with numbers incrementing from 1.
    x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
    x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
    with self.test_session(use_gpu=use_gpu) as sess:
      if data_type == dtypes.float16:
        tolerance = 1e-5
      elif data_type == dtypes.float32:
        tolerance = 1e-5
      else:
        self.assertEqual(data_type, dtypes.float64)
        tolerance = 1e-8

      t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=data_type)
      t1.set_shape(tensor_in_sizes)
      t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=data_type)

      native_t1 = t1
      strides = [1, stride, stride, 1]
      if data_format == "NCHW":
        # Transpose from NWHC input to NCHW
        # Ex. [4, 5, 5, 48] to [4, 48, 5, 5]
        native_t1 = array_ops.transpose(t1, [0, 3, 1, 2])
        strides = [1, 1, stride, stride]

      conv_native = nn_ops.depthwise_conv2d_native(
          native_t1,
          t2,
          strides=strides,
          data_format=data_format,
          padding=padding)

      if data_format == "NCHW":
        # Transpose back from NCHW to NHWC
        conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1])

      conv_interface = nn_impl.depthwise_conv2d(
          t1, t2, strides=[1, stride, stride, 1], padding=padding)

      native_result = sess.run(conv_native)
      interface_result = sess.run(conv_interface)

    print("data_type:", data_type, "use_gpu:", use_gpu, "max diff = ",
          np.amax(np.absolute(native_result - interface_result)))
    self.assertArrayNear(
        np.ravel(native_result), np.ravel(interface_result), tolerance)
    self.assertShapeEqual(native_result, conv_native)
    self.assertShapeEqual(native_result, conv_interface)