Ejemplo n.º 1
0
    def version_13(cls, node, **kwargs):
        tensor_dict = kwargs["tensor_dict"]
        x = tensor_dict[node.inputs[0]]
        x = tf.cast(x, tf.float32)
        x_scale = tensor_dict[node.inputs[1]]
        axis = node.attrs.get("axis", 1)

        x_shape = tf_shape(x)
        x_rank = len(x_shape)
        x_scale_shape = tf_shape(x_scale)
        x_scale_rank = len(x_scale_shape)

        # Reshape process is needed for per-axis dequantization
        # when scale is a 1-D tensor
        if x_scale_rank == 1:
            shape_broadcast = list([1 for _ in range(axis)] + [x_shape[axis]] +
                                   [1 for _ in range(axis + 1, x_rank)])
            x_scale = tf.reshape(x_scale, shape_broadcast)

        if len(node.inputs) == 3 and x.dtype != tf.int32:
            x_zero_point = tensor_dict[node.inputs[2]]
            x_zero_point = tf.cast(x_zero_point, tf.float32)
            x_zero_point = tf.reshape(
                x_zero_point,
                shape_broadcast) if x_scale_rank == 1 else x_zero_point
            x = tf.subtract(x, x_zero_point)

        y = tf.multiply(x, x_scale)

        return [y]
Ejemplo n.º 2
0
    def version_13(cls, node, **kwargs):
        tensor_dict = kwargs["tensor_dict"]
        x = tensor_dict[node.inputs[0]]
        y_scale = tensor_dict[node.inputs[1]]
        axis = node.attrs.get("axis", 1)

        x = tf.cast(x, tf.float32)
        x_shape = tf_shape(x)
        x_rank = len(x_shape)
        y_scale_shape = tf_shape(y_scale)
        y_scale_rank = len(y_scale_shape)

        # Reshape process is needed for per-axis quantization
        # when scale is a 1-D tensor
        if y_scale_rank == 1:
            shape_broadcast = list([1 for _ in range(axis)] + [x_shape[axis]] +
                                   [1 for _ in range(axis + 1, x_rank)])
            y_scale = tf.reshape(y_scale, shape_broadcast)

        y = tf.divide(x, y_scale)
        y = tf.round(y)
        if len(node.inputs) == 3:
            y_zero_point = tensor_dict[node.inputs[2]]
            y_dtype = y_zero_point.dtype
            y_zero_point = tf.cast(y_zero_point, tf.float32)
            y_zero_point = tf.reshape(
                y_zero_point,
                shape_broadcast) if y_scale_rank == 1 else y_zero_point
            y = tf.add(y, y_zero_point)
        else:  # y_zero_point default dtype = uint8
            y_dtype = tf.uint8

        y = tf.saturate_cast(y, y_dtype)

        return [y]
Ejemplo n.º 3
0
    def create_nodes(boxes, scores, max_output_boxes_per_class, iou_threshold,
                     score_threshold, result):
      # get number of batches in boxes
      num_batches = tf_shape(boxes)[0]
      for batch_i in tf.range(num_batches):
        # get boxes in batch_i only
        tf_boxes = tf.squeeze(tf.gather(boxes, [batch_i]), axis=0)
        # get scores of all classes in batch_i only
        batch_i_scores = tf.squeeze(tf.gather(scores, [batch_i]), axis=0)
        # get number of classess in batch_i only
        num_classes = tf_shape(batch_i_scores)[0]
        for class_j in tf.range(num_classes):
          # get scores in class_j for batch_i only
          tf_scores = tf.squeeze(tf.gather(batch_i_scores, [class_j]), axis=0)
          # get the selected boxes indices
          selected_indices = tf.image.non_max_suppression(
              tf_boxes, tf_scores, max_output_boxes_per_class, iou_threshold,
              score_threshold)
          # add batch and class information into the indices
          output = tf.transpose([tf.cast(selected_indices, dtype=tf.int64)])
          paddings = tf.constant([[0, 0], [1, 0]])
          output = tf.pad(output,
                          paddings,
                          constant_values=tf.cast(class_j, dtype=tf.int64))
          output = tf.pad(output,
                          paddings,
                          constant_values=tf.cast(batch_i, dtype=tf.int64))
          # tf.function will auto convert "result" from variable to placeholder
          # therefore don't need to use assign here
          result = output if tf.equal(batch_i, 0) and tf.equal(
              class_j, 0) else tf.concat([result, output], 0)

      return result
Ejemplo n.º 4
0
    def chk_idx_out_of_bounds(cls, data, indices, batch_dims=0):
        """ Check indices out of bounds for ScatterND and GatherND
    In Tensorflow GPU version, if an out of bound index is found,
    a 0 is stored in the corresponding output value for GatherND;
    and the index is ignored for ScatterND/TensorScatterNDUpdate.
    But ONNX spec state that it is an error if any index values
    are out of bounds. Therefore the converter need to run this
    function to verify all the indices are in bounds before send
    it to Tensoflow. If out of bound is detected then the caller
    of this function need to throw InvalidArgumentError exception.
    """
        data_shape = tf_shape(data)
        indices_shape = tf_shape(indices)
        if batch_dims > 0:
            new_shape = indices_shape[0]
            for d in range(1, batch_dims):
                new_shape = tf.multiply(new_shape, indices_shape[d])
            new_shape = [new_shape, indices_shape[-1]]
            indices = tf.reshape(indices, new_shape)

        def _chk_idx_out_of_bounds(i, result):
            indices_i = tf.transpose(indices)[i]
            limit_i = tf.cast(data_shape, indices.dtype)[i + batch_dims]
            cond1 = tf.greater_equal(indices_i, tf.negative(limit_i))
            cond2 = tf.less(indices_i, limit_i)
            result = tf.reduce_all(tf.logical_and(cond1, cond2))
            return i + 1, result

        _, result = tf.while_loop(
            lambda i, result: tf.logical_and(tf.less(i, indices_shape[-1]),
                                             result), _chk_idx_out_of_bounds,
            [tf.zeros([], tf.int64), True])
        return result
Ejemplo n.º 5
0
    def version_11(cls, node, **kwargs):
        # split the input first
        tensor_dict = kwargs["tensor_dict"]
        dtype = tensor_dict[node.inputs[0]].dtype
        original_input = tensor_dict[node.inputs[0]]
        split = tensor_dict[node.inputs[1]] if len(node.inputs) > 1 else None
        axis = node.attrs.get("axis", 0)
        keepdims = node.attrs.get("keepdims", 1)
        input_shape = tf_shape(original_input)

        if len(node.inputs) > 1:
            split_shape = tf_shape(split)
            # check if the split is 1-d or scalar
            if split_shape.shape[0] == 1:
                split_sizes = split
            else:
                # Need to build the split sizes
                # First int(size/n) of ns [n, n, n...]
                # Then append m if needed [n, n, n..., m] where m=size(mod n)
                # Currently tf.split does not take an unknown shape tensor
                # for the num_or_size_splits input. Since this parameter
                # has to be calculated based on ONNX inputs, the shape is
                # unknown during graph generation time, causing a Tensorflow
                # exception.
                # Due to the limitation in tf.split, this option is currently
                # not supported.
                # split_sizes = tf.tile([split], tf.reshape(tf.math.floordiv(
                #    tf.cast(input_shape[axis], dtype=tf.int32), split), [1]))
                raise RuntimeError(
                    "Split to sequence with scalar split is not supported due to API limitations."
                )
            split_inputs = tf.split(original_input, split_sizes, axis=axis)

        else:
            # split is not provided, use default 1
            split_sizes = tf.tile([1], tf.reshape(input_shape[axis], [1]))
            split_inputs = tf.split(original_input, split_sizes, axis=axis)
            if keepdims == 0:
                split_inputs = [
                    tf.squeeze(split_input) for split_input in split_inputs
                ]

        # create an empty sequence next
        input_sequence = tf.ragged.constant([], dtype=dtype)

        # insert tensors at the end of sequence
        for i in range(len(split_inputs)):
            input_tensor = tf.expand_dims(split_inputs[i], 0)
            if input_sequence.shape[0] == 0:
                output_seq = tf.RaggedTensor.from_tensor(input_tensor)
            else:
                output_seq = tf.concat([input_sequence, input_tensor], axis=0)
            input_sequence = output_seq

        return [output_seq]
Ejemplo n.º 6
0
 def process_neg_idx(cls, data, indices):
   """ Convert all the negative indices to positive
   GatherND and ScatterND/TensorScatterNDUpdate in Tensorflow
   doesn't support negative indices. Therefore need to run this
   function to convert all the negative indices to positive before
   send it to Tensorflow.
   """
   data_shape = tf_shape(data)
   indices_shape = tf_shape(indices)
   max_i = tf.cast(data_shape[:indices_shape[-1]], indices.dtype)
   return tf_floormod(tf.add(indices, max_i), max_i)
Ejemplo n.º 7
0
    def version_10(cls, node, **kwargs):
        # x, roi and scales are all in NCHW format
        x = kwargs["tensor_dict"][node.inputs[0]]
        x_shape = tf_shape(x)
        x_dtype = x.dtype
        scales = kwargs["tensor_dict"][node.inputs[1]]

        # get the new size from scales
        h_w_scale = scales[2:]
        h_w_shape = x_shape[2:]
        new_h_w_shape = tf.cast(h_w_scale * tf.cast(h_w_shape, scales.dtype),
                                tf.int32)

        mode = node.attrs.get("mode", "nearest")
        if mode.lower() == "linear":
            mode = tf.image.ResizeMethod.BILINEAR
        else:
            mode = tf.image.ResizeMethod.NEAREST_NEIGHBOR

        # process tf.image.resize unsupported datatype for x
        x = tf.cast(
            x, cls.x_cast_map[x_dtype]) if x_dtype in cls.x_cast_map else x

        # The input image is in NCHW format. But tf.image.resize only
        # support channel last data format. Therefore need to transpose
        # to NHWC format first then process resize and then transpose
        # back to NCHW format.
        x_t = tf.transpose(x, perm=[0, 2, 3, 1])
        y = tf.image.resize(x_t, size=new_h_w_shape, method=mode)
        output = tf.transpose(y, perm=[0, 3, 1, 2])
        # cast output back to the original x.dtype
        output = tf.cast(output,
                         x_dtype) if x_dtype is not tf.float32 else output

        return [output]
Ejemplo n.º 8
0
 def _common(cls, node, **kwargs):
     tensor_dict = kwargs["tensor_dict"]
     x = tensor_dict[node.inputs[0]]
     x_shape = tf_shape(x)
     attrs = copy.deepcopy(node.attrs)
     axis = attrs.get("axis", 0)
     axis = axis if axis >= 0 else len(x.get_shape()) + axis
     if "split" in node.attrs:
         split = attrs["split"]
     elif len(node.inputs) == 2:  # since version 1
         split = tensor_dict[node.inputs[1]]
     else:
         per_part = x_shape[axis] / len(node.outputs)
         if x.get_shape().is_fully_defined():
             if int(per_part) != per_part:
                 raise ValueError("Split can not be evenly divided.")
             split = [int(per_part)] * len(node.outputs)
         else:
             split = [tf.cast(per_part, tf.int32)] * len(node.outputs)
     #attrs["num_or_size_splits"] = split
     attrs["num_or_size_splits"] = len(node.outputs)
     return list(
         cls.make_tensor_from_onnx_node(node,
                                        inputs=[x],
                                        attrs=attrs,
                                        **kwargs))
Ejemplo n.º 9
0
    def version_9(cls, node, **kwargs):
        x = kwargs["tensor_dict"][node.inputs[0]]
        x_shape = tf_shape(x)
        attrs = copy.deepcopy(node.attrs)
        scales = kwargs["tensor_dict"][node.inputs[1]]

        assert_n_c_scale_is_one = tf.Assert(
            tf.logical_and(tf.equal(scales[0], 1), tf.equal(scales[1], 1)),
            [scales])

        with tf.control_dependencies([assert_n_c_scale_is_one]):
            h_w_scale = scales[2:]
            h_w_shape = x_shape[2:]
            new_h_w_shape = tf.cast(
                h_w_scale * tf.cast(h_w_shape, scales.dtype), tf.int32)

            mode = attrs.get("mode", "nearest")
            if mode.lower() == "bilinear" or mode.lower() == "linear":
                mode = tf.image.ResizeMethod.BILINEAR
            else:
                mode = tf.image.ResizeMethod.NEAREST_NEIGHBOR

            attrs["size"] = new_h_w_shape
            attrs["method"] = mode

            # Remove scale.
            upsample_node = copy.deepcopy(node)
            del upsample_node.inputs[1]
            return [
                cls.make_tensor_from_onnx_node(upsample_node,
                                               attrs=attrs,
                                               c_last_only=True,
                                               **kwargs)
            ]
Ejemplo n.º 10
0
  def __init__(self,
               input,
               kernel_shape,
               strides,
               dilations,
               padding="VALID",
               ceil_mode=False,
               count_include_pad=False,
               pooling_type="MAX"):
    self.input = tf.convert_to_tensor(input)

    self.kernel_shape = kernel_shape
    self.strides = strides
    self.dilations = dilations
    self.padding = padding
    self.is_explicit_padding = type(padding) is list
    self.ceil_mode = ceil_mode
    self.count_include_pad = count_include_pad
    self.pooling_type = pooling_type.upper()

    self.is_known_shape = self.input.shape.is_fully_defined()
    self.spatial_size = len(kernel_shape)
    self.input_rank = self.spatial_size + 2

    # if the rank is not defined, set it to the calculated input_rank
    # rank should be known for ops like tf.gather_nd
    if not input.shape.rank:
      input.set_shape([None] * self.input_rank)
    self.orig_input_shape = tf_shape(input)
    self.input_shape = self.orig_input_shape

    if pooling_type.startswith("MAX"):
      self.padding_constant = input.dtype.min
    else:
      self.padding_constant = 0
Ejemplo n.º 11
0
  def _pad_input(self):
    """
            Pad the input according to the parameters
        """
    # check if we need to do any padding at all
    if not self.ceil_mode and ((type(self.padding) is list and
                                self.padding == [0] * self.spatial_size * 2) or
                               self.padding == "VALID"):
      self.pads = np.array([0] * self.spatial_size * 2)
      return (self.input, self.pads)

    in_spatial_shape = self.input_shape[1:self.spatial_size + 1]
    pads = self._calc_pads(in_spatial_shape)

    if self.is_known_shape and np.count_nonzero(pads) == 0:
      self.pads = pads
      return (self.input, pads)

    tf_paddings = [[0, 0]]
    for i in range(self.spatial_size):
      tf_paddings += [[pads[i * 2], pads[i * 2 + 1]]]
    tf_paddings += [[0, 0]]

    self.input = tf.pad(
        self.input,
        tf_paddings,
        mode='CONSTANT',
        constant_values=self.padding_constant)
    # update input shape and pads values
    self.input_shape = tf_shape(self.input)
    self.pads = pads
Ejemplo n.º 12
0
  def version_10(cls, node, **kwargs):
    # x, roi and scales are all in NCHW format
    x = kwargs["tensor_dict"][node.inputs[0]]
    x_shape = tf_shape(x)
    scales = kwargs["tensor_dict"][node.inputs[1]]

    h_w_scale = scales[2:]
    h_w_shape = x_shape[2:]
    new_h_w_shape = tf.cast(h_w_scale * tf.cast(h_w_shape, scales.dtype),
                            tf.int32)

    mode = node.attrs.get("mode", "nearest")
    if mode.lower() == "linear":
      mode = tf.image.ResizeMethod.BILINEAR
    else:
      mode = tf.image.ResizeMethod.NEAREST_NEIGHBOR

    # The input image is in NCHW format. But tf.image.resize only
    # support channel last data format. Therefore need to transpose
    # to NHWC format first then process resize and then transpose
    # back to NCHW format.
    x_t = tf.transpose(x, perm=[0, 2, 3, 1])
    y = tf.image.resize(x_t, size=new_h_w_shape, method=mode)
    output = tf.transpose(y, perm=[0, 3, 1, 2])

    return [output]
Ejemplo n.º 13
0
    def version_14(cls, node, **kwargs):
        x = kwargs["tensor_dict"][node.inputs[0]]
        if len(node.inputs) >= 2:
            k = kwargs["tensor_dict"][node.inputs[1]]
            #handle pos out
            x_shape = tf_shape(x, dtype=k.dtype)
            if k > x_shape[-1]:
                k = x_shape[-1]
            elif k < 0 - x_shape[-2]:
                k = 0 - x_shape[-2]
        else:
            k = tf.constant(0, dtype=tf.int64)
        keep_triangle = tf.constant(-1, dtype=k.dtype)
        upper = node.attrs.get("upper", 1)

        if upper == 1:
            if k > 0:
                return [
                    tf.subtract(x, tf.linalg.band_part(x, keep_triangle,
                                                       k - 1))
                ]
            else:
                return [tf.linalg.band_part(x, -k, keep_triangle)]
        else:
            if k >= 0:
                return [tf.linalg.band_part(x, keep_triangle, k)]
            else:
                return [
                    tf.subtract(x, tf.linalg.band_part(x, -1 - k,
                                                       keep_triangle))
                ]
Ejemplo n.º 14
0
    def max_unpool(cls, node, input_dict):
        """
            MaxUnpooling operation
    """
        x = input_dict[node.inputs[0]]
        ind = input_dict[node.inputs[1]]
        if len(node.inputs) > 2:
            output_shape = input_dict.get(node.inputs[2], None)
        else:
            output_shape = None

        kernel_shape = node.attrs["kernel_shape"]

        spatial_size = len(kernel_shape)

        # if strides are not provided default is 1 along each spatial axis
        strides = node.attrs.get("strides", [1] * spatial_size)
        pads = node.attrs.get("pads", None)

        input_shape = tf_shape(x)
        default_shape = cls._get_default_shape(input_shape, kernel_shape,
                                               strides)
        default_shape = [input_shape[0]] + [input_shape[1]] + default_shape

        unpooled = cls._unpool(x, ind, default_shape)

        if output_shape is not None:
            pads = cls._get_pads_from_output_shape(unpooled, output_shape)
        if pads is not None:
            unpooled = cls._pad_output(unpooled, pads, 0)

        return [unpooled]
Ejemplo n.º 15
0
 def process_neg_idx_along_axis(cls, data, axis, indices):
     """ Convert all the negative indices to positive
 ScatterND/TensorScatterNDUpdate in Tensorflow doesn't support
 negative indices. Therefore need to run this function to convert
 all the negative indices to positive before send it to Tensorflow.
 """
     data_shape = tf_shape(data)
     max_i = tf.cast(data_shape[axis], indices.dtype)
     return tf.math.floormod(tf.add(indices, max_i), max_i)
Ejemplo n.º 16
0
 def process_neg_idx(cls, data, indices, batch_dims=0):
     """ Convert all the negative indices to positive
 GatherND and ScatterND/TensorScatterNDUpdate in Tensorflow
 doesn't support negative indices. Therefore need to run this
 function to convert all the negative indices to positive before
 send it to Tensorflow.
 """
     data_shape = tf_shape(data)
     if data.get_shape().is_fully_defined():
         indices_shape = indices.get_shape().as_list()
     else:
         indices_shape = tf_shape(indices)
     if batch_dims > 0:
         max_i = tf.cast(
             data_shape[batch_dims:indices_shape[-1] + batch_dims],
             indices.dtype)
     else:
         max_i = tf.cast(data_shape[:indices_shape[-1]], indices.dtype)
     return tf.math.floormod(tf.add(indices, max_i), max_i)
Ejemplo n.º 17
0
    def version_11(cls, node, **kwargs):
        axis = node.attrs.get("axis", 0)
        data = kwargs["tensor_dict"][node.inputs[0]]
        indices = kwargs["tensor_dict"][node.inputs[1]]
        updates = kwargs["tensor_dict"][node.inputs[2]]

        # poocess negative axis
        axis = axis if axis >= 0 else tf.add(tf.rank(data), axis)

        # check are there any indices are out of bounds
        result = cls.chk_idx_out_of_bounds_along_axis(data, axis, indices)
        msg = 'ScatterElements indices are out of bounds, please double check the indices and retry.'
        with tf.control_dependencies(
            [tf.compat.v1.assert_equal(result, True, message=msg)]):
            # process negative indices
            indices = cls.process_neg_idx_along_axis(data, axis, indices)

            # Calculate shape of the tensorflow version of indices tensor.
            sparsified_dense_idx_shape = tf_shape(updates)

            # Move on to convert ONNX indices to tensorflow indices in 2 steps:
            #
            # Step 1:
            #   What would the index tensors look like if updates are all
            #   dense? In other words, produce a coordinate tensor for updates:
            #
            #   coordinate[i, j, k ...] = [i, j, k ...]
            #   where the shape of "coordinate" tensor is same as that of updates.
            #
            # Step 2:
            #   But the coordinate tensor needs some correction because coord
            #   vector at position axis is wrong (since we assumed update is dense,
            #   but it is not at the axis specified).
            #   So we update coordinate vector tensor elements at psotion=axis with
            #   the sparse coordinate indices.

            idx_tensors_per_axis = tf.meshgrid(*list(
                map(lambda x: tf.range(x, dtype=tf.dtypes.int64),
                    sparsified_dense_idx_shape)),
                                               indexing='ij')
            idx_tensors_per_axis[axis] = indices
            dim_expanded_idx_tensors_per_axis = list(
                map(lambda x: tf.expand_dims(x, axis=-1),
                    idx_tensors_per_axis))
            coordinate = tf.concat(dim_expanded_idx_tensors_per_axis, axis=-1)

            # Now the coordinate tensor is in the shape
            # [updates.shape, updates.rank]
            # we need it to flattened into the shape:
            # [product(updates.shape), updates.rank]
            indices = tf.reshape(coordinate, [-1, tf.rank(data)])
            updates = tf.reshape(updates, [-1])

            return [tf.tensor_scatter_nd_update(data, indices, updates)]
Ejemplo n.º 18
0
    def max_unpool(cls, node, input_dict):
        """
            MaxUnpooling operation
        """
        x = input_dict[node.inputs[0]]
        ind = input_dict[node.inputs[1]]
        if len(node.inputs) > 2:
            output_shape = input_dict.get(node.inputs[2], None)
        else:
            output_shape = None

        kernel_shape = node.attrs["kernel_shape"]

        spatial_size = len(kernel_shape)
        x_rank = spatial_size + 2
        storage_format, _ = get_data_format(x_rank)

        # if strides are not provided default is 1 along each spatial axis
        strides = node.attrs.get("strides", [1] * spatial_size)
        pads = node.attrs.get("pads", None)

        input_shape = tf_shape(x)
        default_shape = cls._get_default_shape(input_shape, kernel_shape,
                                               strides)

        need_trans = storage_format != "NHWC"
        if need_trans:
            x = tf.transpose(x,
                             perm=get_perm_from_formats(
                                 storage_format, "NHWC"))
            ind = tf.transpose(ind,
                               perm=get_perm_from_formats(
                                   storage_format, "NHWC"))

        # default_shape to NHWC storage format
        default_shape = [input_shape[0]] + default_shape + \
                        [input_shape[1]]

        unpooled = cls._unpool(x, ind, default_shape)

        if need_trans:
            unpooled = tf.transpose(unpooled,
                                    perm=get_perm_from_formats(
                                        "NHWC", storage_format))

        if output_shape is not None:
            pads = cls._get_pads_from_output_shape(unpooled, output_shape)
        if pads is not None:
            unpooled = cls._pad_output(unpooled, pads, 0)

        return [unpooled]
Ejemplo n.º 19
0
    def _common(cls, node, **kwargs):
        attrs = copy.deepcopy(node.attrs)
        tensor_dict = kwargs["tensor_dict"]
        indices = tensor_dict[node.inputs[0]]
        depth = tensor_dict[node.inputs[1]]
        axis = attrs.get("axis", -1)

        # poocess negative axis
        axis = axis if axis >= 0 else len(tf_shape(indices)) + axis + 1

        # cast indices to tf.int64 and depth to tf.int32 if dtype is not
        # supported natively by Tensorflow. It is fairly safe since indices
        # and depth are integers
        indices = tf.cast(indices, tf.int64) if indices.dtype not in [
            tf.uint8, tf.int32, tf.int64
        ] else indices
        depth = tf.cast(depth, tf.int32) if depth.dtype not in [tf.int32
                                                                ] else depth

        # depth can be either a scalar or a 1D tensor of size 1 according
        # to ONNX schema, although operators doc states only scalar.
        # So we support both now.
        depth = tf.squeeze(depth) if len(tf_shape(depth)) == 1 else depth

        # process negative indices
        indices = cls.process_neg_indices(depth, indices)

        off_value = tensor_dict[node.inputs[2]][0]
        on_value = tensor_dict[node.inputs[2]][1]
        attrs["dtype"] = on_value.dtype
        attrs["axis"] = axis
        return [
            cls.make_tensor_from_onnx_node(
                node,
                inputs=[indices, depth, on_value, off_value],
                attrs=attrs,
                **kwargs)
        ]
Ejemplo n.º 20
0
    def _common(cls, node, **kwargs):
        attrs = copy.deepcopy(node.attrs)
        tensor_dict = kwargs["tensor_dict"]
        indices = tensor_dict[node.inputs[0]]
        depth = tensor_dict[node.inputs[1]]
        axis = attrs.get("axis", -1)

        # poocess negative axis
        axis = axis if axis >= 0 else len(tf_shape(indices)) + axis + 1

        # process tf.one_hot unsupported datatype for indices
        indices = tf.cast(
            indices, cls.indices_cast_map[indices.dtype]
        ) if indices.dtype in cls.indices_cast_map else indices

        # process tf.one_hot unsupported datatype for depth
        depth = tf.cast(depth, cls.depth_cast_map[
            depth.dtype]) if depth.dtype in cls.depth_cast_map else depth

        # depth can be either a scalar or a 1D tensor of size 1 according
        # to ONNX schema, although operators doc states only scalar.
        # So we support both now.
        depth = tf.squeeze(depth) if len(tf_shape(depth)) == 1 else depth

        # process negative indices
        indices = cls.process_neg_indices(depth, indices)

        off_value = tensor_dict[node.inputs[2]][0]
        on_value = tensor_dict[node.inputs[2]][1]
        attrs["dtype"] = on_value.dtype
        attrs["axis"] = axis
        return [
            cls.make_tensor_from_onnx_node(
                node,
                inputs=[indices, depth, on_value, off_value],
                attrs=attrs,
                **kwargs)
        ]
Ejemplo n.º 21
0
 def version_10(cls, node, **kwargs):
     inp = kwargs["tensor_dict"][node.inputs[0]]
     dtype = inp.dtype
     shape = tf_shape(inp)
     zero = tf.zeros(shape, dtype)
     dn = node.attrs.get("detect_negative", 1)
     dp = node.attrs.get("detect_positive", 1)
     # detecting only positive infinity, zero out elements < 0
     if dn == 0:
         inp = tf.maximum(zero, inp)
     # detecting only negative infinity, zero out elements > 0
     if dp == 0:
         inp = tf.minimum(zero, inp)
     return [cls.make_tensor_from_onnx_node(node, inputs=[inp], **kwargs)]
Ejemplo n.º 22
0
 def chk_idx_out_of_bounds_along_axis(cls, data, axis, indices):
     """ Check indices out of bounds for ScatterElement
 In Tensorflow GPU version, if an out of bound index is found,
 the index is ignored for ScatterND/TensorScatterNDUpdate.
 But ONNX spec state that it is an error if any index values
 are out of bounds. Therefore the converter need to run this
 function to verify all the indices are in bounds along the
 axis before send it to Tensoflow. If out of bound is detected
 then the caller of this function need to throw
 InvalidArgumentError exception.
 """
     data_shape = tf.cast(tf_shape(data), indices.dtype)
     limit = data_shape[axis]
     cond1 = tf.greater_equal(indices, tf.negative(limit))
     cond2 = tf.less(indices, limit)
     return tf.logical_and(cond1, cond2)
Ejemplo n.º 23
0
 def _common(cls, node, **kwargs):
   axis = node.attrs.get("axis", 0)
   keepdims = node.attrs.get("keepdims", 1)
   select_last_index = node.attrs.get("select_last_index", 0)
   if select_last_index == 0:
     arg_max = cls.make_tensor_from_onnx_node(node, **kwargs)
   else:
     # reverse the input and apply argmax on that to get last occurrence of max
     x = kwargs["tensor_dict"][node.inputs[0]]
     x = tf.reverse(x, axis=[axis])
     arg_max = cls.make_tensor_from_onnx_node(node, inputs=[x], **kwargs)
     # adjust indices to account for the reverse
     arg_max = tf_shape(x)[axis] - arg_max - 1
   if keepdims == 1:
     return [tf.expand_dims(arg_max, axis=axis)]
   return [arg_max]
Ejemplo n.º 24
0
      def convert_NHWC_indices_to_NCHW_indices(argmax):
        # i - index in NCHW
        # I - index in NHWC
        # C - number of channels
        # b - batch = I // CHW
        # c - channel = I % C
        # H - height
        # W - weight
        # I = i - c(HW - 1) + (C - 1)(i - bCHW - cHW)
        # i = (I + c(HW - 1) + (C - 1)(bCHW + cHW))/C

        # x_shape will always be in NCHW format here,
        # because maxpool_with_argmax only support 2d input
        x_shape = tf_shape(x)
        N = x_shape[0]
        C = x_shape[1]
        H = x_shape[2]
        W = x_shape[3]
        HW = tf.math.multiply(H, W)
        CHW = tf.math.multiply(C, HW)
        argmax_b = tf.math.floordiv(argmax, CHW)
        argmax_c = tf.math.floormod(argmax, C)
        new_ind = tf.math.add(
            argmax, tf.math.multiply(argmax_c, tf.math.subtract(HW, 1)))
        new_ind = tf.math.add(
            new_ind,
            tf.math.multiply(
                tf.math.subtract(C, 1),
                tf.math.add(tf.math.multiply(argmax_b, CHW),
                            tf.math.multiply(argmax_c, HW))))
        new_ind = tf.math.floordiv(new_ind, C)

        # add batch dimension into the argmax index
        batch_offsets = tf.math.multiply(tf.range(N, dtype=new_ind.dtype), CHW)
        for _ in range(new_ind.shape.rank - 1):
          batch_offsets = tf.expand_dims(batch_offsets, -1)
        new_ind = tf.math.add(new_ind, batch_offsets)

        return new_ind
Ejemplo n.º 25
0
    def version_10(cls, node, **kwargs):
        x = kwargs["tensor_dict"][node.inputs[0]]
        x_shape = tf_shape(x)
        scales = kwargs["tensor_dict"][node.inputs[1]]

        n_in_scales_is_one = tf.equal(scales[0], 1)
        c_in_scales_is_one = tf.logical_or(tf.equal(scales[1], 1),
                                           tf.equal(scales[3], 1))
        assert_n_c_in_scales_are_ones = tf.Assert(
            tf.logical_and(n_in_scales_is_one, c_in_scales_is_one), [scales])

        with tf.control_dependencies([assert_n_c_in_scales_are_ones]):
            x_in_NCHW_format = tf.equal(scales[1], 1)
            h_w_scale = tf.where(x_in_NCHW_format, scales[2:], scales[1:3])
            h_w_shape = tf.where(x_in_NCHW_format, x_shape[2:], x_shape[1:3])
            new_h_w_shape = tf.cast(
                h_w_scale * tf.cast(h_w_shape, scales.dtype), tf.int32)

            mode = node.attrs.get("mode", "nearest")
            if mode.lower() == "linear":
                mode = tf.image.ResizeMethod.BILINEAR
            else:
                mode = tf.image.ResizeMethod.NEAREST_NEIGHBOR

            def process_NCHW_format(x):
                x_t = tf.transpose(x, perm=[0, 2, 3, 1])
                y = tf.image.resize(x_t, size=new_h_w_shape, method=mode)
                y_t = tf.transpose(y, perm=[0, 3, 1, 2])
                return y_t

            def process_NHWC_format(x):
                y = tf.image.resize(x, size=new_h_w_shape, method=mode)
                return y

            output = tf.cond(x_in_NCHW_format, lambda: process_NCHW_format(x),
                             lambda: process_NHWC_format(x))

            return [output]
Ejemplo n.º 26
0
  def _common(cls, node, **kwargs):
    if cls.SINCE_VERSION < 15:
      return [cls.make_tensor_from_onnx_node(node, **kwargs)]

    x = kwargs["tensor_dict"][node.inputs[0]]
    x_shape = tf_shape(x)
    x_rank = len(x_shape)

    start = node.attrs.get("start", 0)
    if start < 0:
      start += x_rank
      # Clip if start is still < 0
      start = 0 if start < 0 else start

    end = node.attrs.get("end", x_rank)
    if end < 0:
      end += x_rank
      # Clip if end is still < 0
      end = 0 if end < 0 else end


    result = cls.make_tensor_from_onnx_node(node, **kwargs)

    return [tf.slice(result, [start], [end - start])]
Ejemplo n.º 27
0
  def version_13(cls, node, **kwargs):
    x = kwargs["tensor_dict"][node.inputs[0]]
    attrs = copy.deepcopy(node.attrs)
    noop_with_empty_axes = attrs.pop("noop_with_empty_axes", 0)
    axis = None

    if len(node.inputs) > 1:
      axes = kwargs["tensor_dict"][node.inputs[1]]
      axes_shape = tf_shape(axes)
      if len(axes_shape) > 1:
        axis = axes
      else:
        axis = axes[0] if axes_shape[0] != 0 else axis

    # return the input tensor when axis is None and noop_with_empty_axes is True
    if axis is None and noop_with_empty_axes:
      return [x]

    attrs["axis"] = axis
    # https://github.com/onnx/onnx/issues/585
    attrs["keepdims"] = attrs.pop("keepdims", 1) == 1
    return [
        cls.make_tensor_from_onnx_node(node, inputs=[x], attrs=attrs, **kwargs)
    ]
Ejemplo n.º 28
0
  def conv(cls, node, input_dict, transpose=False):
    """ Convolution method for both conv and transposed conv
    For transposed conv,
      Attr pads is not used for input, but declares how much output is padded.
      Here, output means output from transposed conv which already pad output_padding if set.
      So the pseudo explanation for output should be:
        output = conv_transpose_output + output_padding - pads
      And conv_transpose_output shape should be:
        conv_transpose_output_shape[i] = strides[i] * (input_shape[i] - 1) + kernel_shape[i]
    """
    x = input_dict[node.inputs[0]]
    x_rank = len(x.get_shape())
    x_shape = tf_shape(x, tf.int32)
    spatial_size = x_rank - 2

    storage_format, compute_format = get_data_format(x_rank)
    compute_c_idx = compute_format.find("C")
    spatial_format = "".join([d for d in compute_format if d not in ["N", "C"]])

    in_weights = input_dict[node.inputs[1]]
    weights_rank = len(in_weights.get_shape())
    if transpose:
      # Translate weights from (C x M x KH x KW) to (KH x KW X M X C)
      perm = list(range(2, weights_rank)) + [1, 0]
    else:
      # Translate weights from (M x C x KH x KW) to (KH x KW X C X M)
      perm = list(range(2, weights_rank)) + [1, 0]

    if "kernel_shape" in node.attrs.keys():
      kernel_shape = node.attrs["kernel_shape"]
      if in_weights.get_shape().is_fully_defined():
        assert in_weights.get_shape().as_list()[2:] == kernel_shape, (
            "kernel_shape "
            "attr of convolution does not match the actual weight "
            "passed to this operation, attr {}, actual {}").format(
                kernel_shape,
                in_weights.get_shape().as_list())
    else:
      kernel_shape = tf_shape(in_weights, tf.int32)[2:]

    weights = tf.transpose(in_weights, perm)
    dilations = node.attrs.get("dilations", [1] * spatial_size)
    strides = node.attrs.get("strides", [1] * spatial_size)

    pads = node.attrs.get("pads", [0, 0] * spatial_size)

    # Check auto_pad nonexistent or NOTSET first
    if "auto_pad" not in node.attrs or node.attrs["auto_pad"] == "NOTSET":
      if not transpose:
        if pads != [0, 0] * spatial_size:
          x = PadMixin.get_padding_as_op(x, pads)
        pad_mode = "VALID"
      else:
        pad_mode = "NOTSET"
    # Then we use auto_pad to setup pad_mode
    elif node.attrs["auto_pad"] == "SAME_UPPER":
      pad_mode = "SAME"
    elif node.attrs["auto_pad"] == "VALID":
      pad_mode = "VALID"
    elif node.attrs["auto_pad"] == "SAME_LOWER":
      pad_mode = PAD_TF_INCOMPATIBLE
    else:
      raise ValueError("Invalid auto_pad attribute: {}".format(
          node.attrs["auto_pad"]))

    # Currently auto_pad = SAME_LOWER is not supported
    if pad_mode is PAD_TF_INCOMPATIBLE:
      if transpose:
        exception.OP_UNSUPPORTED_EXCEPT(
            "ConvTranspose with auto_pad `SAME_LOWER`", "Tensorflow")
      else:
        exception.OP_UNSUPPORTED_EXCEPT("Conv with auto_pad `SAME_LOWER`",
                                        "Tensorflow")

    group = node.attrs.get("group", 1)
    weight_shape = weights.get_shape().as_list()
    # Is this convolution depthwise we can support?
    depthwise = (x_rank == 4 and len(weight_shape) == 4 and group != 1 and
                 not transpose and not (None in weight_shape))
    if depthwise and isinstance(x_shape, np.ndarray):
      depthwise = bool(group == x_shape[1])

    if depthwise is True:
      # Depthwise convolution.
      # The convolution kernel layout in tf.depthwise_conv is:
      # [filter_height, filter_width, in_channels, channel_multiplier]
      # Weight is now (KH x KW X C/g X M), or more precisely, (KH x KW X C/g X (g * M/g)),
      # we reshape it to (KH x KW x C x M/g)
      # NOTE: Assuming weight has fixed shape.

      depthwise_filter_shape = weight_shape[0:2] + [
          -1, weight_shape[3] // group
      ]
      weights = tf.reshape(weights, depthwise_filter_shape)

      if not sys_config.device == 'CUDA':
        # transpose input to NHWC layout
        x = tf.transpose(x,
                         perm=get_perm_from_formats(storage_format,
                                                    compute_format))
      weight_groups = [weights]
      xs = [x]
    else:
      weight_groups = tf.split(weights, num_or_size_splits=group, axis=-1)
      if sys_config.device == 'CUDA':
        if group == 1:
          xs = [x]
        else:
          xs = tf.split(x, num_or_size_splits=group, axis=1)
      else:
        x = tf.transpose(x,
                         perm=get_perm_from_formats(storage_format,
                                                    compute_format))
        if group == 1:
          xs = [x]
        else:
          xs = tf.split(x, num_or_size_splits=group, axis=-1)

    if transpose:
      if dilations != [1] * spatial_size:
        raise RuntimeError("Cannot set non-1 dilation for conv transpose.")
      convolved = []
      # this is a workaround for tensorflow AutoGraph not detecting
      # corretly x. This is fixed in tf>=2.2.0
      x = None
      for (x, weight) in zip(xs, weight_groups):
        x_spatial_shape = [
            x_shape[storage_format.find(d)] for d in spatial_format
        ]
        weights_shape = tf_shape(weights, tf.int32)
        output_shape = node.attrs.get("output_shape", None)
        conv_output_shape = [x_shape[storage_format.find("N")]]

        # calculate output shape
        if pad_mode == "NOTSET":
          if output_shape is None:
            conv_output_shape += [
                strides[i] * x_spatial_shape[i] - strides[i] +
                (kernel_shape[i] - 1) * dilations[i] + 1
                for i in list(range(spatial_size))
            ]
          else:
            conv_output_shape += [
                s + pads[i] + pads[spatial_size + i]
                for i, s in enumerate(output_shape[-2:])
            ]
          conv_output_shape.insert(compute_c_idx, weights_shape[-2])

          # make strides to match input rank
          strides_full = [1] + strides
          strides_full.insert(compute_c_idx, 1)

          # get corresponding function in tf
          if spatial_size == 1:
            conv_func = tf.nn.conv1d_transpose
            strides_full = strides[0]
          elif spatial_size == 2:
            conv_func = tf.nn.conv2d_transpose
          elif spatial_size == 3:
            conv_func = tf.nn.conv3d_transpose
          else:
            raise NotImplementedError(
                "Transposed convolution for {}d is not implemented in Tensorflow"
                .format(spatial_size))

          # use raw input x to do transposed conv
          conv_rs = conv_func(x,
                              weight,
                              conv_output_shape,
                              strides_full,
                              padding="VALID",
                              data_format=compute_format)

          # pad output first by output_padding attr
          if "output_padding" in node.attrs and output_shape is None:
            output_padding = [[0, 0]
                             ] + [[0, p] for p in node.attrs["output_padding"]]
            output_padding.insert(compute_c_idx, [0, 0])
            conv_rs = tf.pad(conv_rs, output_padding)

          # remove pads set in pads attr
          conv_rs_shape = tf_shape(conv_rs, tf.int32)
          conv_rs_shape_list = [
              conv_rs_shape[i] for i in range(conv_rs.shape.rank)
          ]
          begin = [0] + pads[:spatial_size]
          begin.insert(compute_c_idx, 0)
          size = [
              s if d in ["N", "C"] else s - pads[spatial_format.find(d)] -
              pads[spatial_format.find(d) + spatial_size]
              for d, s in zip(compute_format, conv_rs_shape_list)
          ]

          conv_rs = tf.slice(conv_rs, begin=begin, size=size)

          convolved.append(conv_rs)
        else:
          # No need to check pads if auto_pad is specifically provided.
          # The assumption is that once auto_pad is provided as either VALID
          # or SAME_UPPER (SAME_LOWER is currently not supported in TF) the
          # output_shape will always be inferred. That is, the output_shape
          # and output_padding will not be used in this case.
          if pad_mode == "VALID":
            conv_output_shape += [
                strides[i] * (x_spatial_shape[i] - 1) + weights_shape[i]
                for i in list(range(spatial_size))
            ]
          else:
            conv_output_shape += [
                strides[i] * x_spatial_shape[i]
                for i in list(range(spatial_size))
            ]
          conv_output_shape.insert(compute_c_idx, weights_shape[-2])

          # make strides to match input rank
          strides_full = [1] + strides
          strides_full.insert(compute_c_idx, 1)

          # get corresponding function in tf
          if spatial_size == 1:
            conv_func = tf.nn.conv1d_transpose
            strides_full = strides[0]
          elif spatial_size == 2:
            conv_func = tf.nn.conv2d_transpose
          elif spatial_size == 3:
            conv_func = tf.nn.conv3d_transpose
          else:
            raise NotImplementedError(
                "Transposed convolution for {}d is not implemented in Tensorflow"
                .format(spatial_size))

          # use raw input x to do transposed conv
          conv_rs = conv_func(x,
                              weight,
                              conv_output_shape,
                              strides_full,
                              padding=pad_mode,
                              data_format=compute_format)
          convolved.append(conv_rs)

    else:  # not transpose:
      if depthwise is True:
        if compute_format == "NHWC":
          strides = [1] + strides + [1]
        elif compute_format == 'NCHW':
          strides = [1, 1] + strides
        else:
          raise ValueError("Invalid compute_format: {}".format(compute_format))

        convolved = [
            tf.nn.depthwise_conv2d(x,
                                   weight,
                                   padding=pad_mode,
                                   strides=strides,
                                   dilations=dilations,
                                   data_format=compute_format)
            for (x, weight) in zip(xs, weight_groups)
        ]

      else:
        convolved = [
            tf.nn.convolution(x,
                              weight,
                              padding=pad_mode,
                              strides=strides,
                              dilations=dilations,
                              data_format=compute_format)
            for (x, weight) in zip(xs, weight_groups)
        ]

    if len(node.inputs) == 2:
      if sys_config.device == 'CUDA':
        output = tf.concat(convolved, axis=1)
      else:
        output = tf.concat(convolved, axis=-1)
        output = tf.transpose(output,
                              perm=get_perm_from_formats(
                                  compute_format, storage_format))
    else:
      bias = input_dict[node.inputs[2]]
      bias = cls.explicit_broadcast([x, bias], compute_c_idx)

      if sys_config.device == 'CUDA':
        output = tf.concat(convolved, axis=1)
        output = tf.add(output, bias)
      else:
        output = tf.concat(convolved, axis=-1)
        output = tf.add(output, bias)
        output = tf.transpose(output,
                              perm=get_perm_from_formats(
                                  compute_format, storage_format))

    return [output]
Ejemplo n.º 29
0
  def version_11(cls, node, **kwargs):
    # x, roi, scales and sizes are all in NCHW format
    tensor_dict = kwargs["tensor_dict"]
    x = tensor_dict[node.inputs[0]]
    x_shape = tf_shape(x)
    roi = tensor_dict[node.inputs[1]]
    scales = tensor_dict[node.inputs[2]]
    sizes = tensor_dict[node.inputs[3]] if len(
        node.inputs) == 4 else tf.constant([], dtype=tf.int64)
    coordinate_transformation_mode = node.attrs.get(
        "coordinate_transformation_mode", "half_pixel")
    extrapolation_value = node.attrs.get("extrapolation_value", 0.0)
    mode = node.attrs.get("mode", "nearest")

    if mode.lower() == "linear":
      mode = tf.image.ResizeMethod.BILINEAR
      tf_resize = tf.compat.v1.image.resize_bilinear
    elif mode.lower() == "cubic":
      mode = tf.image.ResizeMethod.BICUBIC
      tf_resize = tf.compat.v1.image.resize_bicubic
    else:
      mode = tf.image.ResizeMethod.NEAREST_NEIGHBOR
      tf_resize = tf.compat.v1.image.resize_nearest_neighbor

    if len(node.inputs) == 3:  # only scales is defined
      h_w_scale = scales[2:]
      h_w_shape = x_shape[2:]
      new_size = tf.cast(h_w_scale * tf.cast(h_w_shape, scales.dtype),
                         tf.int32)
    else:  # sizes is defined
      # The number of elements of 'sizes' should be the same as the rank of input 'X'
      sizes.set_shape(x_shape.shape)
      new_size = tf.cast(sizes[2:], tf.int32)
    # Tensorflow require the shape of "size" in the "tf.image.resize" must be known at
    # graph creation time. However in the dynamic shape situation, the shape of "new_size"
    # will be "None", the actual shape can only be determine at runtime. But we know
    # "new_size" should always contain [h, w], therefore the shape must be 2.
    new_size.set_shape([2])

    # get boxes for crop
    indices = []
    x_rank = len(x.get_shape())
    for i in range(2, x_rank):
      indices.insert(i - 2, i)
      indices.insert(i, i + x_rank)
    boxes = tf.expand_dims(tf.gather(roi, indices, axis=0), 0)

    # get box_indices for crop
    box_indices = tf.cast(tf.range(0, x_shape[0]), dtype=tf.int32)

    # The input image is in NCHW format. But tf.image.crop_and_resize,
    # tf.image.resize and tf.compat.v1.image.resize_xx only support
    # channel last data format. Therefore need to transpose to NHWC
    # formar first then process resize and then transpose back to
    # NCHW format.
    x_t = tf.transpose(x, perm=[0, 2, 3, 1])
    if coordinate_transformation_mode == "tf_crop_and_resize":
      y = tf.image.crop_and_resize(x_t, boxes, box_indices, new_size, mode,
                                   extrapolation_value)
    elif coordinate_transformation_mode == "align_corners":
      y = tf_resize(x_t,
                    size=new_size,
                    align_corners=True,
                    half_pixel_centers=False)
    elif coordinate_transformation_mode == "asymmetric":
      y = tf_resize(x_t,
                    size=new_size,
                    align_corners=False,
                    half_pixel_centers=False)
    else:  # half_pixel or tf_half_pixel_for_nn
      y = tf.image.resize(x_t, size=new_size, method=mode)
    output = tf.transpose(y, perm=[0, 3, 1, 2])

    return [output]
Ejemplo n.º 30
0
  def _remove_dilations(self):
    """
            This method removes the dilations by extracting the values from
            the input for every sliding window according to the dilations,
            strides and kernel size and generates output that can be used by
            pooling operations with strides = kernel_shape to accomplish
            dilated pooling

            Example:
              Input:     [[  0,  1,  2,  3],
                          [  4,  5,  6,  7],
                          [  8,  9, 10, 11],
                          [ 12, 13, 14, 15]]

              Kernel:    [2, 2]
              Dilations: [2, 2]
              Strides:   [1, 1]

              Will return:
                         [[  0,  2,  1,  3],
                          [  8, 10,  9, 11],
                          [  4,  6,  5,  7],
                          [ 12, 14, 13, 15]]

              After max_pool2d with kernel_shape = strides = [2, 2]
              the result is:
                         [[ 10, 11],
                          [ 14, 15]]
        """

    input_shape = tf_shape(self.input)
    in_spatial_shape = input_shape[1:self.spatial_size + 1]

    channels_count = input_shape[self.spatial_size + 1]
    # Initialize gather_ind with the range of channels
    # e.g. [0 1]
    gather_ind = tf.range(channels_count, dtype=tf.int64)
    # convert the vector to column vector
    # in the following logic we use column vectors
    gather_ind = tf.expand_dims(gather_ind, 1)

    # initilize the output_shape with zeros
    # self.output_shape will contain the shape of the
    # output tensor after the loop below is executed
    self.output_shape = [0] * (self.spatial_size + 2)
    self.output_shape[0] = input_shape[0]
    """
            Loop over the input spatial dimensions starting from the
            last (most internal) going up to the first dimension

            On every step of the loop calculate the output indices and
            map them to the input indices using `_calc_input_ind`,
            then "combine" with the already calculated indices from the
            previous dimensions using cartesian product.

            For the following example input:

              Input:     [[  0,  1,  2,  3],
                          [  4,  5,  6,  7],
                          [  8,  9, 10, 11],
                          [ 12, 13, 14, 15]]

              Kernel:    [2, 2]
              Dilations: [2, 2]
              Strides:   [1, 1]

            these are the steps that will be executed:

            1. Initilize gather_ind = [[0]]     # we have only 1 channel

            2. Loop step 0 (axis 1):
                  filter_size = 3
                  output_size = 4
                  dim_ind = [[0]
                             [2]
                             [1]
                             [3]]

                  gather_ind = [[0 0]
                                [2 0]
                                [1 0]
                                [3 0]]

            3. Loop step 1 (axis 0):
                  filter_size = 3
                  output_size = 4
                  dim_ind = [[0]
                             [2]
                             [1]
                             [3]]

                  gather_ind = [[0 0 0]
                                [0 2 0]
                                [0 1 0]
                                [0 3 0]
                                [2 0 0]
                                [2 2 0]
                                [2 1 0]
                                [2 3 0]
                                [1 0 0]
                                [1 2 0]
                                [1 1 0]
                                [1 3 0]
                                [3 0 0]
                                [3 2 0]
                                [3 1 0]
                                [3 3 0]]

            These are the indices used for gather_nd operation to collect
            the values from the input data.
        """

    for dim in range(self.spatial_size - 1, -1, -1):
      filter_size = (self.kernel_shape[dim] - 1) * \
                     self.dilations[dim] + 1
      output_size = ((
          (in_spatial_shape[dim] - filter_size) // self.strides[dim]) + 1
                    ) * self.kernel_shape[dim]
      self.output_shape[dim + 1] = output_size

      # initialize the output dimension index with the range of the
      # dimension output size (e.g. 4): [0, 1, 2, 3]
      dim_ind = tf.range(output_size)

      # calculate the matching indices in the input data
      # [0, 1, 2, 3] will calculate to [0, 2, 1, 3]
      # from the above example
      dim_ind = self._calc_input_ind(dim_ind, self.kernel_shape[dim],
                                     self.dilations[dim], self.strides[dim])
      # convert to column vector
      dim_ind = tf.expand_dims(dim_ind, 1)

      # "combine" current dimension indices with the previous dimensions
      # using cartesian product
      gather_ind = tf_product(dim_ind, gather_ind)

    # The result from the above loop for 2D data will be:
    # [[y1, x1, c], [y2, x2, c], ..., [yn, xm, c]] where n is the height,
    # m is the width and c is the channel number.

    # set the channels count in the output_shape
    self.output_shape[self.spatial_size + 1] = channels_count

    # expand the dimensions to match the input dimensions + 1
    for x in range(self.spatial_size):
      gather_ind = tf.expand_dims(gather_ind, 0)
    # dublicate the indices for every batch
    gather_ind = tf.tile(gather_ind,
                         [input_shape[0]] + [1] * (self.spatial_size + 1))

    # extract the selected values from the input
    output = tf.gather_nd(self.input, gather_ind, batch_dims=1)
    # reshape the output to the correct shape calculated earlier
    output = tf.reshape(output, self.output_shape)

    return output