Ejemplo n.º 1
0
 def test_average_pool(self):
   # TODO: fix this test
   return
   device = "CUDA"
   if not supports_device(device):
     raise unittest.SkipTest(
         "Backend doesn't support device {}".format(device))
   shape = [1, 1, 40, 40]
   node_def = helper.make_node(
       "AveragePool", ["X"], ["Y"],
       kernel_shape=[1, 2],
       pads=[1, 1],
       strides=[1, 1])
   x = self._get_rnd(shape)
   output = run_node(node_def, [x], device=device)
   test_output = np.zeros(shape)
   for i1 in range(0, shape[0]):
     for i2 in range(0, shape[1]):
       for j1 in range(0, shape[2]):
         for j2 in range(0, shape[3]):
           test_output[i1][i2][j1][j2] = 0
           count = 0
           for k in range(j2, min(j2 + 2, shape[3])):
             test_output[i1][i2][j1][j2] += x[i1][i2][j1][k]
             count += 1
           test_output[i1][i2][j1][j2] /= count
   np.testing.assert_almost_equal(output["Y"], test_output)
Ejemplo n.º 2
0
 def test_conv_transpose(self):
   # Fix test in the future.
   return
   device = "CUDA"
   if not supports_device(device):
     raise unittest.SkipTest(
         "Backend doesn't support device {}".format(device))
   node_def = helper.make_node(
       "ConvTranspose", ["X", "weights"], ["Y"], pads=[1, 1])
   x_shape = [1, 5, 4]
   x = self._get_rnd(x_shape)
   weight_shape = [5, 3, 2]
   weights = self._get_rnd(weight_shape)
   output = run_node(node_def, [x, weights], device=device)
   out_shape = [x_shape[0], weight_shape[1], x_shape[2]]
   test_output = np.zeros(out_shape)
   for b in range(0, x_shape[0]):
     for m in range(0, weight_shape[1]):
       for h in range(0, x_shape[2]):
         v = 0
         for c in range(0, x_shape[1]):
           for k in range(h, min(h + weight_shape[2], x_shape[2])):
             v += x[b][c][k] * weights[c][m][k - h]
         test_output[b][m][h] = v
   np.testing.assert_almost_equal(output["Y"], test_output, decimal=5)
Ejemplo n.º 3
0
    def c_first_cuda_only(cls, tf_func, inputs, attrs):
        """ Handle operator that channel first is only supported by CUDA.
    When using CPU, two transposes should be added.

    :param tf_func: Callable Tf function.
    :param inputs: Inputs tensor.
    :param attrs: Attributes.
    :return: Tensor.
    """
        support_cuda = supports_device("CUDA")
        if not support_cuda:
            return cls._tuck_transpose(tf_func, inputs, attrs)
        return cls._run_tf_func(tf_func, inputs, attrs)
Ejemplo n.º 4
0
  def test_conv(self):
    device = "CUDA"
    if not supports_device(device):
      raise unittest.SkipTest(
          "Backend doesn't support device {}".format(device))

    N, C, H, W = 4, 3, 5, 5
    x_shape = [N, C, H, W]
    K, kH, kW = 6, 3, 3
    weight_shape = [K, C, kH, kW]
    node_def = helper.make_node(
        "Conv", ["X", "weights"], ["Y"],
        pads=[1, 1, 1, 1],
        kernel_shape=[kH, kW])

    x = self._get_rnd(x_shape)
    weights = self._get_rnd(weight_shape)
    output = run_node(node_def, [x, weights], device=device)

    out_shape = [N, K, H, W]
    test_output = np.zeros(out_shape)
    for n in range(N):
      for c in range(C):
        for h in range(H):
          for w in range(W):
            for k in range(K):
              for kh in range(kH):
                for kw in range(kW):
                  h_in_range = (h - kH // 2 + kh) < H and (
                      h - kH // 2 + kh) >= 0
                  w_in_range = (w - kW // 2 + kw) < W and (
                      w - kW // 2 + kw) >= 0
                  if h_in_range and w_in_range:
                    test_output[n][k][h][w] += (
                        x[n][c][h - kH // 2 + kh][w - kW // 2 + kw] *
                        weights[k][c][kh][kw])

    np.testing.assert_almost_equal(output["Y"], test_output, decimal=5)
Ejemplo n.º 5
0
  def conv(cls, node, input_dict, transpose=False):
    """ Convolution method for both conv and transposed conv
    For transposed conv,
      Attr pads is not used for input, but declares how much output is padded.
      Here, output means output from transposed conv which already pad output_padding if set.
      So the pseudo explanation for output should be:
        output = conv_transpose_output + output_padding - pads
      And conv_transpose_output shape should be:
        conv_transpose_output_shape[i] = strides[i] * (input_shape[i] - 1) + kernel_shape[i]
    """
    x = input_dict[node.inputs[0]]
    x_rank = len(x.get_shape())
    x_shape = x.get_shape().as_list()
    spatial_size = x_rank - 2

    support_cuda = supports_device("CUDA")
    storage_format, compute_format = get_data_format(x_rank)
    compute_c_idx = compute_format.find("C")
    spatial_format = "".join([d for d in compute_format if d not in ["N", "C"]])

    in_weights = input_dict[node.inputs[1]]
    weights_rank = len(in_weights.get_shape())
    if transpose:
      # Translate weights from (C x M x KH x KW) to (KH x KW X M X C)
      perm = list(range(2, weights_rank)) + [1, 0]
    else:
      # Translate weights from (M x C x KH x KW) to (KH x KW X C X M)
      perm = list(range(2, weights_rank)) + [1, 0]

    if "kernel_shape" in node.attrs.keys():
      kernel_shape = node.attrs["kernel_shape"]
      assert in_weights.get_shape().as_list()[2:] == kernel_shape, (
          "kernel_shape "
          "attr of convolution does not match the actual weight "
          "passed to this operation, attr {}, actual {}").format(
              kernel_shape,
              in_weights.get_shape().as_list())

    weights = tf.transpose(in_weights, perm)
    dilations = node.attrs.get("dilations", [1] * spatial_size)
    strides = node.attrs.get("strides", [1] * spatial_size)

    pads = node.attrs.get("pads", [0, 0] * spatial_size)

    if not transpose:
      x = PadMixin.get_padding_as_op(x, pads)

    group = node.attrs.get("group", 1)

    weight_groups = tf.split(weights, num_or_size_splits=group, axis=-1)

    if support_cuda:
      xs = tf.split(x, num_or_size_splits=group, axis=1)
    else:
      x = tf.transpose(
          x, perm=get_perm_from_formats(storage_format, compute_format))
      xs = tf.split(x, num_or_size_splits=group, axis=-1)

    if transpose:
      if dilations != [1] * spatial_size:
        raise RuntimeError("Cannot set non-1 dilation for conv transpose.")
      convolved = []
      for (x, weight) in zip(xs, weight_groups):
        x_spatial_shape = [
            x_shape[storage_format.find(d)] for d in spatial_format
        ]
        weights_shape = weights.get_shape().as_list()

        # calculate output shape
        output_shape = node.attrs.get("output_shape", None)
        conv_output_shape = [x_shape[storage_format.find("N")]]
        if output_shape is None:
          conv_output_shape += [
              strides[i] * (x_spatial_shape[i] - 1) + weights_shape[i]
              for i in list(range(spatial_size))
          ]
        else:
          conv_output_shape += [
              s + pads[i] + pads[spatial_size + i]
              for i, s in enumerate(output_shape[-2:])
          ]
        conv_output_shape.insert(compute_c_idx, weights_shape[-2])

        # make strides to match input rank
        strides_full = [1] + strides
        strides_full.insert(compute_c_idx, 1)

        # get corresponding function in tf
        if spatial_size == 1:
          conv_func = tf.contrib.nn.conv1d_transpose
          strides_full = strides[0]
        elif spatial_size == 2:
          conv_func = tf.nn.conv2d_transpose
        elif spatial_size == 3:
          conv_func = tf.nn.conv3d_transpose
        else:
          raise NotImplementedError(
              "Transposed convolution for {}d is not implemented in Tensorflow".
              format(spatial_size))

        # use raw input x to do transposed conv
        conv_rs = conv_func(
            x,
            weight,
            conv_output_shape,
            strides_full,
            padding="VALID",
            data_format=compute_format)

        # pad output first by output_padding attr
        if "output_padding" in node.attrs and output_shape is None:
          output_padding = [[0, 0]
                           ] + [[0, p] for p in node.attrs["output_padding"]]
          output_padding.insert(compute_c_idx, [0, 0])
          conv_rs = tf.pad(conv_rs, output_padding)

        # remove pads set in pads attr
        conv_rs_shape = conv_rs.get_shape().as_list()
        begin = [0] + pads[:spatial_size]
        begin.insert(compute_c_idx, 0)
        size = [
            s if d in ["N", "C"] else s - pads[spatial_format.find(d)] -
            pads[spatial_format.find(d) + spatial_size]
            for d, s in zip(compute_format, conv_rs_shape)
        ]
        conv_rs = tf.slice(conv_rs, begin=begin, size=size)

        convolved.append(conv_rs)
    else:
      if group != weights.shape[-1]:
        convolved = [
            tf.nn.convolution(
                x,
                weight,
                "VALID",
                strides=strides,
                dilation_rate=dilations,
                data_format=compute_format)
            for (x, weight) in zip(xs, weight_groups)
        ]
      else:
        # convert to depthwise convolutions if num group==channels
        convolved = [
          tf.nn.depthwise_conv2d(
               x,
               tf.transpose(weights, [0, 1, 3, 2]),  # [filter_height, filter_width, in_channels, multiplier (=1)]
               strides=_get_sequence(strides, 2, channel_index=3, name="strides"),  # requires a 4-d list
               padding="VALID",
               rate=dilations, # NOTE I'm not sure if it's a correct. In the newer tensorflow versions there is dilations parameter.
               data_format=compute_format,
           )
        ]

    if len(node.inputs) == 2:
      if support_cuda:
        output = tf.concat(convolved, axis=1)
      else:
        output = tf.concat(convolved, axis=-1)
        output = tf.transpose(
            output, perm=get_perm_from_formats(compute_format, storage_format))
    else:
      bias = input_dict[node.inputs[2]]
      bias = cls.explicit_broadcast([x, bias], compute_c_idx)

      if support_cuda:
        output = tf.concat(convolved, axis=1)
        output = tf.add(output, bias)
      else:
        output = tf.concat(convolved, axis=-1)
        output = tf.add(output, bias)
        output = tf.transpose(
            output, perm=get_perm_from_formats(compute_format, storage_format))

    return [output]
Ejemplo n.º 6
0
    def conv(cls, node, input_dict, transpose=False):
        """ Convolution method for both conv and transposed conv
    For transposed conv,
      Attr pads is not used for input, but declares how much output is padded.
      Here, output means output from transposed conv which already pad output_padding if set.
      So the pseudo explanation for output should be:
        output = conv_transpose_output + output_padding - pads
      And conv_transpose_output shape should be:
        conv_transpose_output_shape[i] = strides[i] * (input_shape[i] - 1) + kernel_shape[i]
    """
        x = input_dict[node.inputs[0]]
        x_rank = len(x.get_shape())
        x_shape = x.get_shape().as_list()
        spatial_size = x_rank - 2

        support_cuda = supports_device("CUDA")
        storage_format, compute_format = get_data_format(x_rank)
        compute_c_idx = compute_format.find("C")
        spatial_format = "".join(
            [d for d in compute_format if d not in ["N", "C"]])

        in_weights = input_dict[node.inputs[1]]
        weights_rank = len(in_weights.get_shape())
        if transpose:
            # Translate weights from (C x M x KH x KW) to (KH x KW X M X C)
            perm = list(range(2, weights_rank)) + [1, 0]
        else:
            # Translate weights from (M x C x KH x KW) to (KH x KW X C X M)
            perm = list(range(2, weights_rank)) + [1, 0]

        if "kernel_shape" in node.attrs.keys():
            kernel_shape = node.attrs["kernel_shape"]
            assert in_weights.get_shape().as_list()[2:] == kernel_shape, (
                "kernel_shape "
                "attr of convolution does not match the actual weight "
                "passed to this operation, attr {}, actual {}").format(
                    kernel_shape,
                    in_weights.get_shape().as_list())

        weights = tf.transpose(in_weights, perm)
        dilations = node.attrs.get("dilations", [1] * spatial_size)
        strides = node.attrs.get("strides", [1] * spatial_size)

        pads = node.attrs.get("pads", [0, 0] * spatial_size)

        # Check auto_pad nonexistent or NOTSET first
        if "auto_pad" not in node.attrs or node.attrs["auto_pad"] == "NOTSET":
            if not transpose:
                if pads != [0, 0] * spatial_size:
                    x = PadMixin.get_padding_as_op(x, pads)
                pad_mode = "VALID"
            else:
                pad_mode = "NOTSET"
        # Then we use auto_pad to setup pad_mode
        elif node.attrs["auto_pad"] == "SAME_UPPER":
            pad_mode = "SAME"
        elif node.attrs["auto_pad"] == "VALID":
            pad_mode = "VALID"
        elif node.attrs["auto_pad"] == "SAME_LOWER":
            pad_mode = PAD_TF_INCOMPATIBLE
        else:
            raise ValueError("Invalid auto_pad attribute: {}".format(
                node.attrs["auto_pad"]))

        # Currently auto_pad = SAME_LOWER is not supported
        if pad_mode is PAD_TF_INCOMPATIBLE:
            if transpose:
                exception.OP_UNSUPPORTED_EXCEPT(
                    "ConvTranspose with auto_pad `SAME_LOWER`", "Tensorflow")
            else:
                exception.OP_UNSUPPORTED_EXCEPT(
                    "Conv with auto_pad `SAME_LOWER`", "Tensorflow")

        group = node.attrs.get("group", 1)

        weight_groups = tf.split(weights, num_or_size_splits=group, axis=-1)

        if support_cuda:
            xs = tf.split(x, num_or_size_splits=group, axis=1)
        else:
            x = tf.transpose(x,
                             perm=get_perm_from_formats(
                                 storage_format, compute_format))
            xs = tf.split(x, num_or_size_splits=group, axis=-1)

        if transpose:
            if dilations != [1] * spatial_size:
                raise RuntimeError(
                    "Cannot set non-1 dilation for conv transpose.")
            convolved = []
            for (x, weight) in zip(xs, weight_groups):
                x_spatial_shape = [
                    x_shape[storage_format.find(d)] for d in spatial_format
                ]
                weights_shape = weights.get_shape().as_list()
                output_shape = node.attrs.get("output_shape", None)
                conv_output_shape = [x_shape[storage_format.find("N")]]

                # calculate output shape
                if pad_mode == "NOTSET":
                    if output_shape is None:
                        conv_output_shape += [
                            strides[i] * x_spatial_shape[i] +
                            max(weights_shape[i] - strides[i], 0)
                            for i in list(range(spatial_size))
                        ]
                    else:
                        conv_output_shape += [
                            s + pads[i] + pads[spatial_size + i]
                            for i, s in enumerate(output_shape[-2:])
                        ]
                    conv_output_shape.insert(compute_c_idx, weights_shape[-2])

                    # make strides to match input rank
                    strides_full = [1] + strides
                    strides_full.insert(compute_c_idx, 1)

                    # get corresponding function in tf
                    if spatial_size == 1:
                        conv_func = tf.nn.conv1d_transpose
                        strides_full = strides[0]
                    elif spatial_size == 2:
                        conv_func = tf.nn.conv2d_transpose
                    elif spatial_size == 3:
                        conv_func = tf.nn.conv3d_transpose
                    else:
                        raise NotImplementedError(
                            "Transposed convolution for {}d is not implemented in Tensorflow"
                            .format(spatial_size))

                    # use raw input x to do transposed conv
                    conv_rs = conv_func(x,
                                        weight,
                                        conv_output_shape,
                                        strides_full,
                                        padding="VALID",
                                        data_format=compute_format)

                    # pad output first by output_padding attr
                    if "output_padding" in node.attrs and output_shape is None:
                        output_padding = [[
                            0, 0
                        ]] + [[0, p] for p in node.attrs["output_padding"]]
                        output_padding.insert(compute_c_idx, [0, 0])
                        conv_rs = tf.pad(conv_rs, output_padding)

                    # remove pads set in pads attr
                    conv_rs_shape = conv_rs.get_shape().as_list()
                    begin = [0] + pads[:spatial_size]
                    begin.insert(compute_c_idx, 0)
                    size = [
                        s if d in ["N", "C"] else s -
                        pads[spatial_format.find(d)] -
                        pads[spatial_format.find(d) + spatial_size]
                        for d, s in zip(compute_format, conv_rs_shape)
                    ]
                    conv_rs = tf.slice(conv_rs, begin=begin, size=size)

                    convolved.append(conv_rs)
                else:
                    # No need to check pads if auto_pad is specifically provided.
                    # The assumption is that once auto_pad is provided as either VALID
                    # or SAME_UPPER (SAME_LOWER is currently not supported in TF) the
                    # output_shape will always be inferred. That is, the output_shape
                    # and output_padding will not be used in this case.
                    if pad_mode == "VALID":
                        conv_output_shape += [
                            strides[i] * (x_spatial_shape[i] - 1) +
                            weights_shape[i] for i in list(range(spatial_size))
                        ]
                    else:
                        conv_output_shape += [
                            strides[i] * x_spatial_shape[i]
                            for i in list(range(spatial_size))
                        ]
                    conv_output_shape.insert(compute_c_idx, weights_shape[-2])

                    # make strides to match input rank
                    strides_full = [1] + strides
                    strides_full.insert(compute_c_idx, 1)

                    # get corresponding function in tf
                    if spatial_size == 1:
                        conv_func = tf.contrib.nn.conv1d_transpose
                        strides_full = strides[0]
                    elif spatial_size == 2:
                        conv_func = tf.nn.conv2d_transpose
                    elif spatial_size == 3:
                        conv_func = tf.nn.conv3d_transpose
                    else:
                        raise NotImplementedError(
                            "Transposed convolution for {}d is not implemented in Tensorflow"
                            .format(spatial_size))

                    # use raw input x to do transposed conv
                    conv_rs = conv_func(x,
                                        weight,
                                        conv_output_shape,
                                        strides_full,
                                        padding=pad_mode,
                                        data_format=compute_format)
                    convolved.append(conv_rs)

        else:
            convolved = [
                tf.nn.convolution(x,
                                  weight,
                                  padding=pad_mode,
                                  strides=strides,
                                  dilations=dilations,
                                  data_format=compute_format)
                for (x, weight) in zip(xs, weight_groups)
            ]

        if len(node.inputs) == 2:
            if support_cuda:
                output = tf.concat(convolved, axis=1)
            else:
                output = tf.concat(convolved, axis=-1)
                output = tf.transpose(output,
                                      perm=get_perm_from_formats(
                                          compute_format, storage_format))
        else:
            bias = input_dict[node.inputs[2]]
            bias = cls.explicit_broadcast([x, bias], compute_c_idx)

            if support_cuda:
                output = tf.concat(convolved, axis=1)
                output = tf.add(output, bias)
            else:
                output = tf.concat(convolved, axis=-1)
                output = tf.add(output, bias)
                output = tf.transpose(output,
                                      perm=get_perm_from_formats(
                                          compute_format, storage_format))

        return [output]
Ejemplo n.º 7
0
    def pool(cls, node, input_dict, pool_func, pooling_type, strict=True):
        x = input_dict[node.inputs[0]]
        x_rank = len(x.get_shape())
        x_shape = x.get_shape().as_list()
        spatial_size = x_rank - 2

        support_cuda = supports_device("CUDA")
        storage_format, compute_format = get_data_format(x_rank)

        kernel_shape = node.attrs["kernel_shape"]
        strides = node.attrs.get("strides", [1] * spatial_size)
        pads = node.attrs.get("pads", None)
        pad = PAD_TF_INCOMPATIBLE
        # from version 7
        count_include_pad = node.attrs.get("count_include_pad", 0)

        # If padding is specified, try to recover it from explicit padding
        # specification to tensorflow padding mode:
        if pads is not None:
            pad = cls._get_tf_pad(x_shape[2:], kernel_shape, strides, pads)
        else:
            # Neither pad nor auto_pad is specified, assume no padding.
            if "auto_pad" not in node.attrs:
                pad = "VALID"
            # We consult auto_pad if pad is not specified and auto_pad
            # is available.
            else:
                if node.attrs["auto_pad"] == "SAME_UPPER":
                    pad = "SAME"
                elif node.attrs["auto_pad"] == "VALID":
                    pad = "VALID"
                elif node.attrs["auto_pad"] == "SAME_LOWER":
                    pad = PAD_TF_INCOMPATIBLE
                if count_include_pad == 1:
                    _, pads = cls._pool_get_shapes(node.attrs["auto_pad"],
                                                   x_shape[2:], kernel_shape,
                                                   strides,
                                                   [0] * spatial_size * 2)

        if strict and count_include_pad == 0:
            if pad is PAD_TF_INCOMPATIBLE:
                return cls._compatibility_pool(node, input_dict, pooling_type)
        else:
            if pads != [0] * spatial_size * 2:
                x = PadMixin.get_padding_as_op(x, pads)
            pad = "VALID"

        if support_cuda:
            pooled = pool_func(x,
                               kernel_shape,
                               padding=pad,
                               strides=strides,
                               data_format=compute_format)
        else:
            x = tf.transpose(x,
                             perm=get_perm_from_formats(
                                 storage_format, compute_format))
            pooled = pool_func(x,
                               kernel_shape,
                               padding=pad,
                               strides=strides,
                               data_format=compute_format)
            pooled = tf.transpose(pooled,
                                  perm=get_perm_from_formats(
                                      compute_format, storage_format))

        return [pooled]
Ejemplo n.º 8
0
    onnx_model = tensorflow_graph_to_onnx_model(graph_def, backend_output_names)

    model = onnx_model
    tf_rep = prepare(model)
    output_onnx_tf = tf_rep.run(backend_feed_dict)

    assert len(output_tf) == len(output_onnx_tf)
    for tf_output, onnx_backend_output in zip(output_tf, output_onnx_tf):
      np.testing.assert_allclose(
          tf_output, onnx_backend_output, rtol=1e-3, atol=1e-7)

  return do_test_expected


dir_path = os.path.dirname(os.path.realpath(__file__))
with open(dir_path + "/test_model.yaml", 'r') as config:
  try:
    for test_model in yaml.safe_load_all(config):
      for device in test_model["devices"]:
        if supports_device(device):
          test_method = create_test(test_model)
          test_name_parts = ["test", test_model["name"], device]
          test_name = str("_".join(map(str, test_name_parts)))
          test_method.__name__ = test_name
          setattr(TestModel, test_method.__name__, test_method)
  except yaml.YAMLError as exception:
    print(exception)

if __name__ == '__main__':
  unittest.main()
Ejemplo n.º 9
0
    def pool(cls, node, input_dict, pool_func, pooling_type, strict=True):
        x = input_dict[node.inputs[0]]
        x_rank = len(x.get_shape())
        x_shape = x.get_shape().as_list()
        spatial_size = x_rank - 2

        if spatial_size > 3:
            exception.OP_UNSUPPORTED_EXCEPT(
                "MaxPool with {}D input".format(x_rank), "Tensorflow")

        support_cuda = supports_device("CUDA")
        storage_format, compute_format = get_data_format(x_rank)

        kernel_shape = node.attrs["kernel_shape"]
        strides = node.attrs.get("strides", [1] * spatial_size)
        pads = node.attrs.get("pads", None)
        pad = PAD_TF_INCOMPATIBLE
        # from version 7
        count_include_pad = node.attrs.get("count_include_pad", 0)

        auto_pad = node.attrs.get("auto_pad", "NOTSET")
        # if auto_pad is NOTSET, we check pads
        if auto_pad == "NOTSET":
            # If padding is specified, try to recover it from explicit padding
            # specification to tensorflow padding mode:
            if pads is not None:
                pad = cls._get_tf_pad(x_shape[2:], kernel_shape, strides, pads)
            else:
                pad = "VALID"
        else:
            if auto_pad == "SAME_UPPER":
                pad = "SAME"
            elif auto_pad == "VALID":
                pad = "VALID"
            elif auto_pad == "SAME_LOWER":
                pad = PAD_TF_INCOMPATIBLE
            if count_include_pad == 1:
                _, pads = cls._pool_get_shapes(auto_pad, x_shape[2:],
                                               kernel_shape, strides,
                                               [0] * spatial_size * 2)

        if pooling_type in ("AVG", "MAX"):
            if strict and count_include_pad == 0:
                if pad is PAD_TF_INCOMPATIBLE:
                    return cls._compatibility_pool(node, input_dict,
                                                   pooling_type)
            else:
                if pads != [0] * spatial_size * 2:
                    x = PadMixin.get_padding_as_op(x, pads)
                pad = "VALID"
        elif pooling_type == "MAX_WITH_ARGMAX":
            if pad is PAD_TF_INCOMPATIBLE:
                exception.OP_UNSUPPORTED_EXCEPT(
                    "MaxPoolWithArgmax with pad is None or incompatible mode",
                    "Tensorflow")
            if x_rank != 4:
                exception.OP_UNSUPPORTED_EXCEPT(
                    "MaxPoolWithArgmax with {}D input".format(x_rank),
                    "Tensorflow")
            if node.attrs.get("storage_order", 0) != 0:
                exception.OP_UNSUPPORTED_EXCEPT(
                    "MaxPoolWithArgmax with column major", "Tensorflow")

            need_trans = storage_format != "NHWC"
            if need_trans:
                x = tf.transpose(x,
                                 perm=get_perm_from_formats(
                                     storage_format, "NHWC"))
            pooled, argmax = pool_func(x, [1] + kernel_shape + [1],
                                       padding=pad,
                                       strides=[1] + strides + [1])
            if need_trans:
                pooled = tf.transpose(pooled,
                                      perm=get_perm_from_formats(
                                          "NHWC", storage_format))
                argmax = tf.transpose(argmax,
                                      perm=get_perm_from_formats(
                                          "NHWC", storage_format))

            return [pooled, argmax]

        if support_cuda:
            pooled = pool_func(x,
                               kernel_shape,
                               padding=pad,
                               strides=strides,
                               data_format=compute_format)
        else:
            x = tf.transpose(x,
                             perm=get_perm_from_formats(
                                 storage_format, compute_format))
            pooled = pool_func(x,
                               kernel_shape,
                               padding=pad,
                               strides=strides,
                               data_format=compute_format)
            pooled = tf.transpose(pooled,
                                  perm=get_perm_from_formats(
                                      compute_format, storage_format))

        return [pooled]