Python TensorInputType 예제들, coremltools.converters.mil.mil.TensorInputType Python 예제들

예제 #1

0

파일 보기

class conv_quantized(conv):
    """
    Note: This is experimental and may change in the future.
    Supports weight quantization for parameters while performing convolution over input.
    ``W_float = W_quantized * scale + bias``.

    Parameters
    ----------
    In addition to convolutional layer parameters, the following additional parameters
    are required.

    quantization_type: const str (Required)
        * One of ``linear``, or ``lut``.

    nbits: const tensor<[], i32> (Optional. Default to 8)
        * Denotes the bit-width of the quantization. ``1 <= nbits <= 8``.

    quant_scale: tensor<*?, T> (Required)
        * Denotes the scale of quantization.

    quant_bias: tensor<*?, T> (Required)
        * Denotes the bias that is used to quantize/dequantize.

    Returns
    -------
    tensor<[n, C_out, *d_out], T>
        * Output activation has the same rank and spatial dimension as the input.
          That is, ``len(d_out) == len(d_in)``.

    Attributes
    ----------
    T: fp16, fp32
    """
    input_spec = InputSpec(
        x=TensorInputType(),
        weight=TensorInputType(),
        bias=TensorInputType(const=True, optional=True),
        quantization_type=StringInputType(const=True),
        nbits=IntInputType(const=True, optional=True),
        quant_scale=ScalarOrTensorInputType(const=True),
        quant_bias=ScalarOrTensorInputType(const=True),
        strides=IntTensorInputType(const=True, optional=True),
        pad_type=StringInputType(const=True, optional=True),
        pad=IntTensorInputType(const=True, optional=True),
        dilations=IntTensorInputType(const=True, optional=True),
        groups=IntInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return super().default_inputs() + \
            DefaultInputs(
                nbits=8,
                )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

예제 #2

0

파일 보기

파일: normalization.py 프로젝트: apple/coremltools

class instance_norm(Operation):
    """
    Apply instance normalization to the n-dimensional input tensor.

    Parameters
    ----------
    x: tensor<[n,C,*D], T>  (Required)
        * ``3 <= rank(x) <= 4``.
        * ``*D`` refers to the spatial dimensions, ``1 <= rank(*D) <= 2``.
        * ``n`` is the batch dimension.
    gamma: const tensor<[C], T> (Optional)
        * Optional scale applied to normalized tensor.
        * Default to all ones.
    beta: const tensor<[C], T> (Optional)
        * Optional offset applied to normalized tensor.
        * Default to all zeros.
    epsilon: const f32 (Optional)
        * Default to ``1e-5``.

    Returns
    -------
    tensor<[n,C,*D], T>
        * Output tensor has the same shape and type as the input ``x``.

    Attributes
    ----------
    T: fp16, fp32
    """

    input_spec = InputSpec(
        x=TensorInputType(),
        gamma=TensorInputType(const=True, optional=True),
        beta=TensorInputType(const=True, optional=True),
        epsilon=FloatInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(
            gamma=None,
            beta=None,
            epsilon=1e-5,
        )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def type_inference(self):
        x_shape = self.x.shape
        return types.tensor(self.x.dtype, tuple(x_shape))

예제 #3

0

파일 보기

파일: image_resizing.py 프로젝트: aseemw/coremltools

class crop(Operation):
    """
    Crop the spatial dimensions (last two dimensions) of the input by the
    specified amounts.

    Parameters
    ----------
    x: tensor<[\*D, H1, W1],T> (Required)
        * Must be at least rank ``3``.
    crop_height: const<2, i32> (Required)
        * Amount to be cropped from the top and bottom of the height dimension
          (``axis=-2``).
    crop_width: const<2, i32> (Required)
        * Amount to be cropped from the left and right sides of the width dimension (``axis=-1``).

    Returns
    -------
    tensor<[\*D, H2, W2],T>
        * Tensor with same type as the input.
        * ``H2`` = ``H1`` - crop_height[0] - crop_height[1].
        * ``W2`` = ``W1`` - crop_width[0] - crop_width[1].

    Attributes
    ----------
    T: fp32
    """

    input_spec = InputSpec(
        x=TensorInputType(),
        crop_height=IntTensorInputType(const=True),
        crop_width=IntTensorInputType(const=True),
    )

    def __init__(self, **kwargs):
        super(crop, self).__init__(**kwargs)

    def type_inference(self):
        if self.x.rank < 3:
            raise ValueError(
                'input to the "crop" op must at least be of rank 3. Provided {}'
                .format(self.x.rank))

        crop_height = self.crop_height.val
        crop_width = self.crop_width.val

        if len(crop_height.flatten()) != 2:
            raise ValueError(
                "crop_height must have 2 elements. Provided {}".format(
                    len(crop_height.flatten())))

        if len(crop_width.flatten()) != 2:
            raise ValueError(
                "crop_width must have 2 elements. Provided {}".format(
                    len(crop_width.flatten())))

        input_shape = list(self.x.shape)
        ret_shape = (input_shape[:-2] +
                     [input_shape[-2] - crop_height[0] - crop_height[1]] +
                     [input_shape[-1] - crop_width[0] - crop_width[1]])
        return types.tensor(self.x.dtype, ret_shape)

예제 #4

0

파일 보기

파일: image_resizing.py 프로젝트: aseemw/coremltools

class upsample_nearest_neighbor(Operation):
    """
    Upsample the spatial dimensions (last two dimensions) of the input
    by integer scale factors using nearest-neighbor interpolation.

    Parameters
    ----------
    x: tensor<[\*D, H1, W1],T>  (Required)
        * Must be at least rank ``3``.
    scale_factor_height: const<i32> or const<fp32> (Optional, default=1)
        * Scale factor for the height dimension (``axis=-2``).
        * Can be either an integer or fractional.
    scale_factor_width: const<i32> or const<fp32> (Optional, default=1)
        * Scale factor for the width dimension (``axis=-1``).
        * Can be either an integer or fractional.

    Returns
    -------
    tensor<[\*D, H2, W2],T>
        * Tensor with same type as the input.
        * ``H2`` = floor(``H1`` * ``scale_factor_height``).
        * ``W2`` = floor(``W1`` * ``scale_factor_width``).

    Attributes
    ----------
    T: fp32
    """

    input_spec = InputSpec(
        x=TensorInputType(),
        scale_factor_height=IntOrFloatInputType(const=True, optional=True),
        scale_factor_width=IntOrFloatInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(
            scale_factor_height=1,
            scale_factor_width=1,
        )

    def __init__(self, **kwargs):
        super(upsample_nearest_neighbor, self).__init__(**kwargs)

    def type_inference(self):
        if self.x.rank < 3:
            raise ValueError(
                'input to the "upsample_nearest_neighbor" op must have rank at least 3'
            )

        ret_shape = list(self.x.shape)
        ret_shape[-1] = np.floor(self.scale_factor_width.val *
                                 ret_shape[-1]) if not is_symbolic(
                                     ret_shape[-1]) else get_new_symbol()
        ret_shape[-2] = np.floor(self.scale_factor_height.val *
                                 ret_shape[-2]) if not is_symbolic(
                                     ret_shape[-2]) else get_new_symbol()
        return types.tensor(self.x.dtype, ret_shape)

예제 #5

0

파일 보기

파일: normalization.py 프로젝트: apple/coremltools

class local_response_norm(Operation):
    """
    Apply local response normalization to the n-dimensional input tensor:

    .. math::
       x_i \\leftarrow \\dfrac{x_i}{\\left ( k + \\dfrac{\\alpha}{\\text{size}} \\sum_j x_j^2 \\right )^\\beta}


    Parameters
    ----------
    x: tensor<[n,C,*D], T> (Required)
        * Input tensor, ``3 <= rank(x) <= 4``.
        * ``*D`` refers to the spatial dimensions, ``1 <= rank(*D) <= 2``.
        * ``n`` is the batch dimension.
    size: const i32 (Required)
        * Amount of neighboring channels to normalize.
    alpha: const fp32 (Optional)
        * Scale factor.
        * Default is ``1e-4``.
    beta: const fp32 (Optional)
        * An exponent.
        * Default is ``0.75``.
    k: const fp32 (Optional)
        * Additive factor.
        * Default is ``1.0``.

    Returns
    -------
    tensor<[n,C,*D], T>
        * Same type and shape as the input tensor ``x``.

    Attributes
    ----------
    T: fp16, fp32
    """

    input_spec = InputSpec(
        x=TensorInputType(),
        size=IntInputType(const=True),
        alpha=FloatInputType(const=True, optional=True),
        beta=FloatInputType(const=True, optional=True),
        k=FloatInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(
            alpha=1e-4,
            beta=0.75,
            k=1.,
        )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def type_inference(self):
        x_shape = self.x.shape
        return types.tensor(self.x.dtype, tuple(x_shape))

예제 #6

0

파일 보기

class resize_nearest_neighbor(Operation):
    """
    Resize the spatial (last two) dimensions to the specified target size
    using nearest neighbor interpolation. Although this op is similar to
    ``upsample_nearest_neighbor``, ``resize_nearest_neighbor`` works with
    a target size rather than with scale factors.

    Parameters
    ----------
    x: tensor<[\*D, H1, W1], T> (Required)
        * Must be at least rank ``3``.
    target_size_height: const<int32> (Required)
        * Target spatial size for the height dimension (``axis=-2``).
    target_size_width: const<int32> (Required)
        * Target spatial size for the width dimension (``axis=-1``).

    Notes
    -----
    See ``resize_bilinear`` for examples.

    See Also
    --------
    resize_bilinear

    Returns
    -------
    tensor<[\*D, H2, W2], T>
        * Tensor with same type as the input.
        * ``H2`` = ``target_size_height``.
        * ``W2`` = ``target_size_width``.

    Attributes
    ----------
    T: fp16, fp32
    """

    input_spec = InputSpec(
        x=TensorInputType(),
        target_size_height=IntInputType(const=True),
        target_size_width=IntInputType(const=True),
    )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def type_inference(self):
        if self.x.rank < 3:
            raise ValueError(
                'input to the "resize_nearest_neighbor" op must have rank at least 3'
            )

        ret_shape = list(self.x.shape)
        ret_shape[-1] = int(self.target_size_width.val)
        ret_shape[-2] = int(self.target_size_height.val)
        return types.tensor(self.x.dtype, ret_shape)

예제 #7

0

파일 보기

파일: normalization.py 프로젝트: aseemw/coremltools

class l2_norm(Operation):
    """
    Apply L2 normalization to the n-dimensional input tensor. That is, divide the input
    tensor by the square root of the sum of squares of all elements of the input.

    .. math::
       x_i \\leftarrow \\dfrac{x_i}{\\sqrt{\\sum{x_i^2} + \\epsilon}}


    Parameters
    ----------
    x: tensor<[*D,C,H,W], T> (Required)
        * Input tensor, ``rank(x) >= 3``.
        * ``*D`` refers to the spatial dimensions, ``rank(*D) >= 0``.
        * ``n`` is the batch dimension.
        * For ranks greater than 3, the leading dimensions, starting from ``0`` to ``-4`` (inclusive),
          are all treated as batch.
    epsilon: const fp32 (Optional)
        * Small constant to avoid division by ``0``.
        * Optional, defaults to ``1e-6``.

    Returns
    -------
    tensor<[\*D,C,H,W], T>
        * Same type and shape as the input tensor ``x``.

    Attributes
    ----------
    T: fp32
    """

    input_spec = InputSpec(
        x=TensorInputType(),
        epsilon=FloatInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(epsilon=1e-6, )

    def __init__(self, **kwargs):
        super(l2_norm, self).__init__(**kwargs)

    def type_inference(self):
        x_shape = self.x.shape
        return types.tensor(self.x.dtype, tuple(x_shape))

예제 #8

0

파일 보기

class conv(Operation):
    """
    Perform convolution over input. Currently supports only 1-D and 2-D
    convolution.

    Parameters
    ----------
    x: tensor<[n, C_in, \*d_in], T> (Required)

        * ``d_in`` are (possibly runtime-determined) spatial dimensions. For example,
          ``d_in = [224, 224]`` for 2D convolution.
        * ``1 <= len(d_in) <= 2``: Only 1-D and 2-D convolution.
        * ``C_in`` is the number of input channels or depth dimensions.
        * ``n``  is the batch dimension.

    weight: tensor<[C_out, C_in/groups, \*K], T> (Required)

        * Filter weights.
        * ``C_in`` is the number of input channels.
        * ``C_in`` must be divisible by ``groups``.
        * ``K`` are kernel sizes. For example, ``K = [KH, KW]`` for 2-D conv.
        * When ``dilations`` is not all ``1``, ``weight`` has to be ``const``
          at compile time

    strides: const tensor<[S], i32> (Optional)

        * Default to one vector of length equal to the number of spatial dimensions.
        * Strides along each of the spatial dimensions.
        * ``S == len(d_in)``.

    pad_type: const str (Required)

        Must be one of the following:

            * ``valid``: No padding. This is equivalent to custom pad with
              ``pad[2*i] == pad[2*i+1] == 0, for i=0,...,len(d_in)-1``.
            * ``custom``: Specify custom padding in the parameter ``pad``.
            * ``same``: input is padded such that out spatial shapes are
              ``d_out[i] = ceil(d_in[i] / strides[i])``.

        Specifically, for ``i = 0,..,,len(d_in)-1``, the equivalent paddings are
        as follows, when dilated kernel is even (for example, ``(K[i]-1)*dilations[i]+1)``):

            * ``pad[2*i] = ceil[((K[i]-1)*dilations[i]+1)/2]``.
            * ``pad[2*i+1] = floor[((K[i]-1)*dilations[i]+1)/2]``.

        Otherwise, ``pad[2*i] = pad[2*i+1] = (K[i]-1) * dilations[i] / 2``.

    pad: const tensor<[P], i32> (Optional. Default to all zeros)

        * ``len(P) = 2 * len(d_in)``
        * ``pad`` should be specified if and only if ``pad_type == custom``,
          otherwise errors occur.
        * ``pad`` represents the number of elements to pad before and after each
          dimension. Specifically, ``pad[0], pad[1]`` are the pad size before / after
          spatial dimension 0, ``pad[2], pad[3]`` are the pad size before / after
          spatial dimension 1, etc.

    dilations: const tensor<[S], i32> (Optional. Default to all 1s)

        * Dilation value along each spatial dimension in ``d_in``.
          See `visualization <https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md>`_.
        * ``S == len(d_in)``.

    groups: const tensor<[], i32> (Optional, default to 1)

        * Input and output channels are split by ``groups``.
        * ``C_in`` must be divisible by ``groups``.
        * Maximum value for group is ``C_in``, in which case it is a depthwise
          convolution.

        For examples (assuming ``C_in = 16, C_out = 32``):

            * ``groups == 1``, ``weight`` has shape ``[32, 16, KH, KW]``: All input
              channels are convolved with the ``weight`` kernel to produce all output
              channels.
            * ``groups == 2``, ``weight`` has shape ``[32, 8, KH, KW]``: Input
              channels 0~7 are convolved with half of the ``weight`` kernel to produce
              output channels 0~15. Similarly, input channels 8~15 are convolved with
              the other half of ``weight`` to product output channels 16~31.
            * ``groups == C_in``, ``weight`` has shape ``[32, 1, KH, KW]``: Each input
              channel is convolved with its own set of filters and each produce
              ``C_out / C_in = 2`` channels. This is equivalent to depthwise
              convolution.

    bias: const tensor<[C_out],T> (Optional, default to all 0)
        * Bias along output channels.

    Returns
    -------
    tensor<[n, C_out, \*d_out], T>
        * Output activation has the same rank and spatial dimension as the input.
          That is, ``len(d_out) == len(d_in)``.
        * For ``i=0,..,len(d_in)-1, d_out[i] = floor [(D_in[i] + pad[2*i] +
          pad[2*i+1] - (K[i]-1)*dilations[i] - 1) / strides[i] ] + 1``

    Attributes
    ----------
    T: fp32

    See Also
    --------
    conv_transpose
    """

    input_spec = InputSpec(
        x=TensorInputType(),
        weight=TensorInputType(),
        bias=TensorInputType(const=True, optional=True),
        strides=IntTensorInputType(const=True, optional=True),
        pad_type=StringInputType(const=True, optional=True),
        pad=IntTensorInputType(const=True, optional=True),
        dilations=IntTensorInputType(const=True, optional=True),
        groups=IntInputType(const=True, optional=True),
    )

    def default_inputs(self):
        num_spatial_dims = self.x.rank - 2
        return DefaultInputs(
            bias=None,
            strides=[1] * num_spatial_dims,
            pad_type="valid",
            pad=[0] * num_spatial_dims * 2,
            dilations=[1] * num_spatial_dims,
            groups=1,
        )

    def __init__(self, **kwargs):
        super(conv, self).__init__(**kwargs)

    def type_inference(self):
        inshape = self.x.shape
        f_shape = self.weight.shape
        kernel_shape = f_shape[2:]
        num_dims = len(inshape) - 2
        C_out = f_shape[0]
        C_in = self.x.shape[1]
        groups = self.groups.val

        if self.bias is not None and self.bias.val.shape[0] != C_out:
            msg = "# of bias values {} not equal to # output channels {}"
        if C_in % groups != 0:
            msg = "# of input channels {} not divisible by groups {}"
            raise ValueError(msg.format(C_in, groups))
        if C_in // groups != self.weight.shape[1]:
            msg = "C_in / groups = {}/{} != weight[1] ({})"
            raise ValueError(msg.format(C_in, groups, self.weight.shape[1]))

        strides = self.strides.val
        dilations = self.dilations.val
        # Ignore self.pad if pad_type != custom
        custom_pad = None if self.pad_type.val != 'custom' else self.pad.val

        if self.weight.val is None and any(
            [True if d > 1 else False for d in dilations]):
            raise ValueError(
                "Convolution with dynamic weights does not support dilations!")

        N = inshape[0]
        C_out = f_shape[0]
        # spatial dimensions
        d_out_shape = spatial_dimensions_out_shape(
            pad_type=self.pad_type.val,
            input_shape=inshape[2:],
            kernel_shape=kernel_shape,
            strides=strides,
            dilations=dilations,
            custom_pad=custom_pad,
        )
        retshape = [N, C_out] + d_out_shape
        return types.tensor(self.x.dtype, tuple(retshape))

예제 #9

0

파일 보기

파일: image_resizing.py 프로젝트: aseemw/coremltools

class crop_resize(Operation):
    """
    Resize the spatial dimensions (last two dimensions) of the first input
    according to the bounding boxes specified in the second input, using
    bilinear interpolation.

    Parameters
    ----------

    x: tensor<[B, C, H, W],T> (Required)
        * The input, from which patches (regions of interest) are extracted
          and resized using bilinear interpolation.
        * Rank ``4``.

    roi: tensor<[N,1,4,1,1], T> or tensor<[N,1,5,1,1], T> (Required)
        * Regions of interest, or coordinates of the boxes. The above input
          represents coordinates of ``N`` boxes.
        * The convention to express coordinates  depends on the value of the
          input ``box_coordinate_mode``.
        * Rank ``5``.
        * If ``tensor<[N,1,4,1,1], T>``: Resized images are computed for all
          ``B`` input images.
        * If ``tensor<[N,1,5,1,1], T>``: The first element from ``axis=-3``
          to be resized is an index. It must be within range ``[0, B)``.

    target_height: const<i32> (Optional, Default=1)
        * Target height for resizing each patch.

    target_width: const<i32> (Optional, Default=1)
        * Target width for resizing each patch.

    normalized_coordinates : const<bool> (Optional, default=False)
        * If true, the bounding box coordinates must be in the
          interval ``[0, 1]``. Scaling is based on the input spatial
          dimensions: ``(H_in - 1)`` for height and ``(W_in - 1)`` for width.
        * If false, the bounding box coordinates must be in the interval
          ``[0, H_in - 1]`` for height dimensions and ``[0, W_in - 1]`` for
          width dimensions.

    spatial_scale : const<fp32> (Optional, default=1.0)
        * Additional spatial scale that multiplies the bounding box coordinates.
          You would use this to implement the RoI Align layer, which typically
          uses unnormalized RoI coordinates along with a spatial scale that is
          less than or equal to 1.

    box_coordinate_mode: const<str> (Optional, default="CORNERS_HEIGHT_FIRST")
        * Specifies the convention for specifying the four bounding box
          coordinates for an image of size ``(Height, Width)``. The ``(0,0)``
          coordinate corresponds to the top-left corner of the image.
        * This parameter can take one of four values:

          "CORNERS_HEIGHT_FIRST": ``[h_start, w_start, h_end, w_end]``

          "CORNERS_WIDTH_FIRST": ``[w_start, h_start, w_end, h_end]``

          "CENTER_SIZE_HEIGHT_FIRST": ``[h_center, w_center, box_height, box_width]``

          "CENTER_SIZE_WIDTH_FIRST": ``[w_center, h_center, box_width, box_height]``

    sampling_mode : const<str> (Optional, default="DEFAULT")
        * This parameter can take ``"STRICT_ALIGN_CORNERS"``,
          ``"ALIGN_CORNERS"``, ``"DEFAULT"``, ``"OFFSET_CORNERS"`` or
          ``UNALIGN_CORNERS`` as values.
        * This same convention is used by the ``resize_bilinear`` op (see
          that op for details).

    See Also
    --------
    resize_bilinear

    Returns
    -------
    tensor<[N, B, C, target_height, target_width],T> or tensor<[N, 1, C, target_height, target_width],T>
        * Tensor with same type as the input.
        * If ``roi : tensor<[N,1,4,1,1], T>``, the output is
          ``tensor<[N, B, C, target_height, target_width],T>``.
          Total crops = ``N*B``; that is, ``N`` crops for each input in the batch.
        * If ``roi : tensor<[N,1,5,1,1], T>``, the output is
          ``tensor<[N, 1, C, target_height, target_width],T>``.
          Total crops = ``N``; that is, 1 crop for given input image index
          in the batch.

    Attributes
    ----------
    T: fp32
    """

    input_spec = InputSpec(
        x=TensorInputType(),
        roi=TensorInputType(),
        target_height=IntInputType(const=True, optional=True),
        target_width=IntInputType(const=True, optional=True),
        normalized_coordinates=BoolInputType(const=True, optional=True),
        spatial_scale=FloatInputType(const=True, optional=True),
        box_coordinate_mode=StringInputType(const=True, optional=True),
        sampling_mode=StringInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(
            target_height=1,
            target_width=1,
            normalized_coordinates=False,
            spatial_scale=1.,
            box_coordinate_mode="CONRNERS_HEIGHT_FIRST",
            sampling_mode="DEFAULT",
        )

    def __init__(self, **kwargs):
        super(crop_resize, self).__init__(**kwargs)

    def type_inference(self):
        if self.x.rank != 4:
            raise ValueError(
                'input to the "crop_resize" op must be of rank 4. Provided {}'.
                format(self.x.rank))

        if self.roi.rank != 5:
            raise ValueError(
                'ROI input to the "crop_resize" op must be of rank 5, provided {}'
                .format(self.roi.rank))

        if self.sampling_mode.val not in {
                "STRICT_ALIGN_CORNERS",
                "ALIGN_CORNERS",
                "UNALIGN_CORNERS",
                "DEFAULT",
                "OFFSET_CORNERS",
        }:
            raise ValueError(
                '"crop_resize" op: unrecognized sampling mode "{}"'.format(
                    self.sampling_mode))

        # ret_shape: [N] + [B, C, h_out, w_out]
        N, B, C = self.roi.shape[0], self.x.shape[0], self.x.shape[1]
        ret_shape = [N, B, C, self.target_height.val, self.target_width.val]
        return types.tensor(self.x.dtype, ret_shape)

예제 #10

0

파일 보기

파일: image_resizing.py 프로젝트: aseemw/coremltools

class resize_bilinear(Operation):
    """
    Resize the spatial (last two) dimensions to the specified target size
    using bilinear interpolation. Although this op is similar to
    ``upsample_bilinear``, ``resize_bilinear`` works with a target size
    rather than with scale factors.

    Parameters
    ----------
    x: tensor<[\*D, H1, W1],T> (Required)
        * Must be at least rank ``3``.
    target_size_height: const<int32> (Optional, default=1)
        * Target spatial size for the height dimension (``axis=-2``).
    target_size_width: const<int32> (Optional, default=1)
        * Target spatial size for the width dimension (``axis=-1``).
    sampling_mode: const<str> (Optional, default="DEFAULT")
        * This parameter can take ``"STRICT_ALIGN_CORNERS”``, ``"ALIGN_CORNERS"``,
          ``"DEFAULT"``, ``"OFFSET_CORNERS"`` or ``UNALIGN_CORNERS`` as values.
          For details, see the Notes section.

    Notes
    -----
    To understand the ``sampling_mode`` parameter, consider the 1-D case.
    You need to sample a grid of pixels whose values are computed using
    linear interpolation. This parameter controls how the grid is sampled.
    If the input grid is ``[0, Xin-1]`` (corresponding to an input size of
    ``Xin``), and if the output size is ``Xout``, then the grid points are
    sampled in the following manner:

    .. sourcecode:: python

        # "STRICT_ALIGN_CORNERS":
        spacing = (Xin - 1) / (Xout - 1)
        grid_point[i] = min(Xin-1, max(0, i*spacing)), for i=0,1,...,Xout-1

        # "ALIGN_CORNERS": Same as "STRICT_ALIGN_CORNERS" unless Xout=1,
        # in which case:
        grid_point[0] = (Xin-1) / 2, if Xout==1

        # "DEFAULT":
        spacing = (Xin - Xin/Xout) / (Xout - 1)
        grid_point[i] = min(Xin-1, max(0, i*spacing)), for i=0,1,...,Xout-1

        # "OFFSET_CORNERS":
        delta = max(1, Xin - 1) / Xout
        spacing = ((Xout - 1) * delta) / (Xout - 1)
        grid_point[i] = min(Xin-1, max(0, 0.5*delta + i*spacing)), for
        ...   i=0,1,...,Xout-1

        # "UNALIGN_CORNERS":
        spacing = Xin / Xout
        grid_point[i] = min(Xin - 1, max(0, i*spacing + 0.5*spacing - 0.5)), for i=0,1,...,Xout-1

    For example:

    .. sourcecode:: python

        Xin = 2
        input_interval = [0,1]

    Grid points:

    .. sourcecode:: python

        [0., 0.1, 0.5, 0.9, 1.] (Xout = 5, UNALIGN_CORNERS)
        [0., 0.25, 0.5, 0.75, 1.] (Xout = 5, "STRICT_ALIGN_CORNERS" / "ALIGN_CORNERS")
        [0., 0.4, 0.8, 1., 1.] (Xout = 5, "DEFAULT")
        [0.1, 0.3, 0.5, 0.7, 0.9] (Xout = 5, "OFFSET_CORNERS")

        [0., 0., 0.33, 0.67, 1., 1.] (Xout = 6, UNALIGN_CORNERS)
        [0., 0.2, 0.4, 0.6, 0.8, 1.] (Xout = 6, "STRICT_ALIGN_CORNERS" / "ALIGN_CORNERS")
        [0., 0.33, 0.67, 1., 1., 1.] (Xout = 6, "DEFAULT")
        [0.08, 0.25, 0.42, 0.58, 0.75, 0.92] (Xout = 6, "OFFSET_CORNERS")

    Note the following similarities:

        * ``"DEFAULT"`` is same as
          ``tf.raw_ops.ResizeBilinear(align_corners=False,
          half_pixel_centers=False)``.
        * ``"STRICT_ALIGN_CORNERS"`` is same as
          ``tf.raw_ops.ResizeBilinear(align_corners=True,
          half_pixel_centers=False)``.

    Returns
    -------
    tensor<[\*D, H2, W2],T>
        * Tensor with same type as the input.
        * ``H2`` = ``target_size_height``.
        * ``W2`` = ``target_size_width``.

    Attributes
    ----------
    T: fp32
    """

    input_spec = InputSpec(
        x=TensorInputType(),
        target_size_height=IntInputType(const=True, optional=True),
        target_size_width=IntInputType(const=True, optional=True),
        sampling_mode=StringInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(
            target_size_height=1,
            target_size_width=1,
            sampling_mode="DEFAULT",
        )

    def __init__(self, **kwargs):
        super(resize_bilinear, self).__init__(**kwargs)

    def type_inference(self):
        if self.x.rank < 3:
            raise ValueError(
                'input to the "resize_bilinear" op must have rank at least 3')

        if self.sampling_mode.val not in {
                "STRICT_ALIGN_CORNERS",
                "ALIGN_CORNERS",
                "UNALIGN_CORNERS",
                "DEFAULT",
                "OFFSET_CORNERS",
        }:
            raise ValueError(
                '"resize_bilinear" op: unrecognized sampling mode "{}"'.format(
                    self.sampling_mode.val))

        ret_shape = list(self.x.shape)
        ret_shape[-1] = self.target_size_width.val
        ret_shape[-2] = self.target_size_height.val
        return types.tensor(self.x.dtype, ret_shape)

예제 #11

0

파일 보기

파일: image_resizing.py 프로젝트: aseemw/coremltools

class upsample_bilinear(Operation):
    """
    Upsample the spatial dimensions (last two dimensions) of the input
    by scale factors using bilinear interpolation.
    The upsample_bilinear operation in MIL corresponds to the recompute_scale_factor=True
    mode in the pyorch bilinear interpolation op. That is,
    the scale factor is recomputed by the output size.
    Note that when the scale_factor_height and scale_factor_width are floating point, this
    could result in a different scale factor due to rounding.

    Parameters
    ----------
    x: tensor<[\*D, H1, W1],T>  (Required)
        * Must be at least rank ``3``.
    scale_factor_height: const<T2> (Optional, default=1)
        * Scale factor for the height dimension (``axis=-2``).
    scale_factor_width: const<T2> (Optional, default=1)
        * Scale factor for the width dimension (``axis=-1``).
    align_corners: const<bool> (Optional, default=True)
        * This parameter determines how samples are chosen for bilinear
          interpolation. For details, see the Notes section.

    Notes
    -----
    To understand the ``align_corners`` parameter, consider the 1-D case.
    You need to sample a grid of pixels whose values are computed using linear
    interpolation. This parameter controls how the grid is sampled. If the
    input grid is ``[0, Xin-1]`` (corresponding to an input size of ``Xin``),
    and if the output size is ``Xout``, then the grid points are sampled in
    the following manner:

    .. sourcecode:: python

        # If align_corners == True:
        spacing = (Xin - 1) / (Xout - 1)
        grid_point[i] = min(Xin - 1, max(0, i*spacing)), for i=0,1,...,Xout-1

        # If align_corners == False:
        spacing = Xin / Xout
        grid_point[i] = min(Xin - 1, max(0, i*spacing + 0.5*spacing - 0.5)),
        ...   for i=0,1,...,Xout-1

    For example:

    .. sourcecode:: python

        Xin = 2
        input_interval = [0,1]

    Grid points:

    .. sourcecode:: python

        [0., 0.1, 0.5, 0.9, 1.] (Xout = 5, align_corners=False)
        [0., 0.25, 0.5, 0.75, 1.] (Xout = 5, align_corners=True)
        [0., 0., 0.33, 0.67, 1., 1.] (Xout = 6, align_corners=False)
        [0., 0.2, 0.4, 0.6, 0.8, 1.] (Xout = 6, align_corners=True)

    Note the following similarities:

    * ``align_corners=False`` is the same as
      ``tf.raw_ops.ResizeBilinear(align_corners=False, half_pixel_centers=True)``.

    * ``align_corners=True`` is the same as
      ``tf.raw_ops.ResizeBilinear(align_corners=True, half_pixel_centers=False)``.

    Returns
    -------
    tensor<[\*D, H2, W2],T>
        * Tensor with same type as the input.
        * ``H2`` = floor(``H1`` * ``scale_factor_height``).
        * ``W2`` = floor(``W1`` * ``scale_factor_width``).

    Attributes
    ----------
    T: fp32
    T2 : fp32 or int32
    """

    input_spec = InputSpec(
        x=TensorInputType(),
        scale_factor_height=IntOrFloatInputType(const=True, optional=True),
        scale_factor_width=IntOrFloatInputType(const=True, optional=True),
        align_corners=BoolInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(
            scale_factor_height=1,
            scale_factor_width=1,
            align_corners=True,
        )

    def __init__(self, **kwargs):
        super(upsample_bilinear, self).__init__(**kwargs)

    def type_inference(self):
        if self.x.rank < 3:
            raise ValueError(
                'input to the "upsample_bilinear" op must have rank at least 3'
            )

        ret_shape = list(self.x.shape)
        ret_shape[-1] = np.floor(self.scale_factor_width.val *
                                 ret_shape[-1]) if not is_symbolic(
                                     ret_shape[-1]) else get_new_symbol()
        ret_shape[-2] = np.floor(self.scale_factor_height.val *
                                 ret_shape[-2]) if not is_symbolic(
                                     ret_shape[-2]) else get_new_symbol()
        return types.tensor(self.x.dtype, ret_shape)

예제 #12

0

파일 보기

파일: image_resizing.py 프로젝트: aseemw/coremltools

class affine(Operation):
    """
    Apply a linear affine transform to the input 2D image tensor. Value at the
    (x, y), i.e., (w, h) coordinate of the output, is computed by first computing
    the coordinates x’ and y’ with the following equation and then compute the
    value at the coordinate (x’,y’) in the input image using either bilinear or
    nearest neighbor interpolation. If the (x’, y’) point falls outside the input
    image, then padding information is used to compute the value.
    * x’ = a0 * x + a1 * y + a2
    * y’ = b0 * x + b1 * y + b2

    Parameters
    ----------
    x: tensor<[B, C, H1, W1], T>
        * Must be rank ``4``.
    transform_matrix: tensor<[D, 6], T>
        * Must be rank ``2``
        * D can be either B or 1.
            when D == B, for each batch, there is a separate transform matrix
            when D == 1, the same matrix is used for all input batches
            for each batch: [a0, a1, a2, b0, b1, b2]
    output_height: const<i32>
        * Target output height
    output_width: const<i32>
        * Target output width
    sampling_mode: const<str>
        * Allowed values: "bilinear"
    padding_mode: const<str>
        * Allowed values: "constant"
        * Note that following illustration is 1D case for brevity, the op only support 2D image input.
        * if ``padding_mode == "constant"``:
            the input image is assumed to be padded with the padding_value
            E.g., |1, 2, 3| -> |0, 0, 0, 1, 2, 3, 0, 0, 0|
    padding_value: const<T>
        * Currently non-zero values are not supported.
        * To be used only when ``padding_mode == "constant"``, ignored in other cases.
    coordinates_mode: const<str>
        * allowed values: "normalized_minus_one_to_one",
        * if ``coordinates_mode == "normalized_minus_one_to_one"``, in-image values are [-1, 1]
        * E.g., if ``coordinates_mode == "normalized_minus_one_to_one"``,
            the in range values are [-1, 1]. That is:
            * (-1, -1), i.e., (w=-1, h=-1), corresponds to the top-left pixel
            * (1, -1), i.e., (w=1, h=-1), corresponds to the top-right pixel
            * (-1, 1), i.e., (w=-1, h=1), corresponds to the bottom-left pixel
            * (1, 1), i.e., (w=1, h=1), corresponds to the bottom-right pixel
    align_corners: const<bool>
        * Currently align_corners=False is not supported.
        * To be used only when ``coordinates_mode != unnormalized``, ignored otherwise.
        * if ``align_corners == True``, the extrema coordinates are corresponding
            to the center of the first and last corner pixels.
        * if ``align_corners == False``, the extrema coordinates are corresponding
            to the edge of the first and last corner pixels.

    Returns
    -------
    tensor<[B, C, output_height, output_width], T>

    Attributes
    ----------
    T: fp32
    """

    input_spec = InputSpec(
        x=TensorInputType(),
        transform_matrix=TensorInputType(),
        output_height=IntInputType(const=True),
        output_width=IntInputType(const=True),
        sampling_mode=StringInputType(const=True),
        padding_mode=StringInputType(const=True),
        padding_value=FloatInputType(const=True),
        coordinates_mode=StringInputType(const=True),
        align_corners=BoolInputType(const=True),
    )

    def __init__(self, **kwargs):
        super(affine, self).__init__(**kwargs)

    def type_inference(self):
        if self.x.rank != 4:
            raise ValueError(
                'input "x" to the "affine" op must be a rank 4 tensor. '
                "Got rank {} tensor of shape {}".format(
                    self.x.rank, self.x.shape))
        if self.transform_matrix.rank != 2:
            raise ValueError(
                'input "transform_matrix" to the "affine" op must be a rank 2 tensor. '
                "Got rank {} tensor of shape {}".format(
                    self.transform_matrix.rank, self.transform_matrix.shape))
        if self.sampling_mode.val.lower() != "bilinear":
            raise NotImplementedError(
                'input "sampling_mode" to the "affine" not implemented. '
                'Got "{}"'.format(self.sampling_mode.val))
        if self.coordinates_mode.val.lower() != "normalized_minus_one_to_one":
            raise NotImplementedError(
                'input "coordinates_mode" to the "affine" not implemented. '
                'Got "{}"'.format(self.coordinates_mode.val))
        if self.padding_mode.val.lower(
        ) != "constant" or self.padding_value.val != 0.0:
            raise NotImplementedError(
                'input "padding_mode" to the "affine" not implemented. '
                'Got "{}" with "padding_value={}"'.format(
                    self.padding_mode.val, self.padding_value.val))

        input_shape = self.x.shape
        transform_matrix_shape = self.transform_matrix.shape
        if (not is_symbolic(transform_matrix_shape[-1])
                and transform_matrix_shape[-1] != 6):
            raise ValueError(
                'input "transform_matrix" to the "affine" op last dimension must be 6 '
                "[a0, a1, a2, b0, b1, b2], "
                "Got {} for last dimension".format(transform_matrix_shape[-1]))

        ret_shape = list(input_shape)
        ret_shape[2] = self.output_height.val
        ret_shape[3] = self.output_width.val
        return types.tensor(self.x.dtype, tuple(ret_shape))

예제 #13

0

파일 보기

파일: image_resizing.py 프로젝트: aseemw/coremltools

class resample(Operation):
    """
    Resample the input image tensor ``x``, at the ``coordinates``.
    input. Since the coordinates may not correspond to exact pixels in the
    input image, this would require "resampling". sampling_mode determines
    the algorithm used for resampling and computing the values.

    Parameters
    ----------
    x: tensor<[B, C, H1, W1], T>
        * Must be rank ``4``.
    coordinates: tensor<[B, H2, W2, 2], U>
        * Must be rank ``4``.
        * Coordinates are provided in the order (x, y), i.e., (w, h).
        * Value of each output location output[b, c, h, w] is calculated by
          sampling, from the input image x[b, c, :, :], the pixel at the (x, y)
          location corresponding to the length-2 vector: coordinates[b, h, w, :]
        * Coordinate (normalized or unnormalized) should be specified according
          to ``coordinates_mode``
    sampling_mode: const<str>
        * Allowed values: "bilinear" , "nearest"
    padding_mode: const<str>
        * Allowed values: "constant", "border", "reflection", "symmetric"
        * Note that following illustration is 1D case for brevity, the op only support 2D image input.
        * if ``padding_mode == "constant"``:
            the input image is assumed to be padded with the padding_value
            E.g., |1, 2, 3| -> |0, 0, 0, 1, 2, 3, 0, 0, 0|
        * if ``padding_mode == "border"``:
            the input image is assumed to be padded with the values replicated
            from the values at the edge. This is also referred to as the
            "clamped" or "replication" mode, since the padded values are
            clamped to the border values.
            E.g., |1, 2, 3| -> |1, 1, 1, 1, 2, 3, 3, 3, 3|
        * if ``padding_mode == "reflection"``:
            the border values are reflected, *not* including the values at the edge/border
            E.g., |1, 2, 3| -> |2, 3, 2, 1, 2, 3, 2, 1, 2|
        * if ``padding_mode == "symmetric"``:
            values are reflected, including the border/edge values
            E.g., |1, 2, 3| -> |3, 2, 1 , 1, 2, 3, 3, 2, 1|
    padding_value: const<T>
        * To be used only when ``padding_mode == "constant"``, ignored in other cases.
    coordinates_mode: const<str>
        * allowed values: "unnormalized", "normalized_minus_one_to_one",
                          "normalized_zero_to_one"
        * if ``coordinates_mode == "unnormalized"``, the coordinates input values
            are interpreted to be in range [0, W - 1] / [0, H - 1] corresponds to in-image point
        * if ``coordinates_mode == "normalized_minus_one_to_one"``, in-image values are [-1, 1]
        * if ``coordinates_mode == "normalized_zero_to_one"``, in-image values are [0, 1]
        * E.g., if ``coordinates_mode == "normalized_minus_one_to_one"``,
            the in range values are [-1, 1]. That is:
            * (-1, -1), i.e., (w=-1, h=-1), corresponds to the top-left pixel
            * (1, -1), i.e., (w=1, h=-1), corresponds to the top-right pixel
            * (-1, 1), i.e., (w=-1, h=1), corresponds to the bottom-left pixel
            * (1, 1), i.e., (w=1, h=1), corresponds to the bottom-right pixel
    align_corners: const<bool>
        * if ``align_corners == True``, the extrema coordinates are corresponding
            to the center of the first and last corner pixels.
        * if ``align_corners == False``, the extrema coordinates are corresponding
            to the edge of the first and last corner pixels.

    Returns
    -------
    tensor<[B, C, H2, W2], T>

    Attributes
    ----------
    T: fp32
    U: fp32, i32, i64
    """

    input_spec = InputSpec(
        x=TensorInputType(),
        coordinates=TensorInputType(),
        sampling_mode=StringInputType(const=True),
        padding_mode=StringInputType(const=True),
        padding_value=FloatInputType(const=True),
        coordinates_mode=StringInputType(const=True),
        align_corners=BoolInputType(const=True),
    )

    def __init__(self, **kwargs):
        super(resample, self).__init__(**kwargs)

    def type_inference(self):
        if self.x.rank != 4:
            raise ValueError(
                'input "x" to the "resample" op must be a rank 4 tensor. '
                "Got rank {} tensor of shape {}".format(
                    self.x.rank, self.x.shape))
        if self.coordinates.rank != 4:
            raise ValueError(
                'input "coordinates" to the "resample" op must be a rank 4 tensor. '
                "Got rank {} tensor of shape {}".format(
                    self.coordinates.rank, self.coordinates.shape))

        input_shape = self.x.shape
        coord_shape = self.coordinates.shape
        if (not is_symbolic(input_shape[0]) and not is_symbolic(coord_shape[0])
                and input_shape[0] != coord_shape[0]):
            raise ValueError(
                'input "x" and "coordinates" to the "resample" must agree on '
                "dimension of batch size: {} vs. {}".format(
                    input_shape[0], coord_shape[0]))
        if not is_symbolic(coord_shape[-1]) and coord_shape[-1] != 2:
            raise ValueError(
                'input "coordinates" to the "resample" op last dimension must be 2. '
                "Got {} for last dimension".format(coord_shape[-1]))

        ret_shape = list(input_shape)
        ret_shape[2] = coord_shape[1]  # Output height
        ret_shape[3] = coord_shape[2]  # Output width
        return types.tensor(self.x.dtype, tuple(ret_shape))

예제 #14

0

파일 보기

class matmul(Operation):
    """
    Perform N-D batch matrix multiplication with NumPy-style broadcasting
    based on the following rules:

    Rule 1. If both ``x, y`` are 1-D, return the scalar from the dot product.

    Rule 2. If both ``x, y`` are 2-D or higher, perform a broadcast on the batch dimensions
    (all dimensions except the last ``2``).

    For example:

    * ``x.shape == (10, 4, 3)``
    * ``y.shape == (5, 10, 3, 2)``
    * ``matmul(x, y).shape == (5, 10, 4, 2)``

    Conventional matrix multiplication is a special case where both ``x, y`` are
    exactly 2-D. For example:

    * ``x.shape == (4, 3)``
    * ``y.shape == (3, 2)``
    * ``matmul(x, y).shape == (4, 2)``

    If ``x`` is 1-D, and ``y`` is N-D where ``N >= 2``, ``x`` is first promoted to
    matrix ``xm`` by prepending a ``1`` to its dimension, and the resulting ``xm`` is
    broadcast to ``y`` following Rule 2 above. After this, remove the inserted dimension.
    For example:

    * ``x.shape == (4)``
    * ``y.shape == (10, 4, 3)``
    * ``xm.shape == (1, 4)``
    * ``matmul(xm, y).shape == (10, 1, 3)``
    * Removing the inserted dimension results in ``matmul(x, y).shape == (10, 3)``.
    * Note: ``xm`` and ``matmul(xm, y)`` are for illustration only.

    If ``x`` is N-D where ``N >= 2``, and ``y`` is 1-D, ``y`` is first promoted to
    matrix ``ym`` by appending a ``1`` to its dimension, and the resulting ``ym`` is
    broadcast to ``x`` following Rule 2 above. After this, remove the inserted dimension.
    For example:

    * ``x.shape == (10, 3, 4)``
    * ``y.shape == (4,)``
    * ``ym.shape == (4, 1)``
    * ``matmul(x, ym).shape == (10, 3, 1)``
    * Removing the inserted dimension results in ``matmul(x, y).shape == (10, 3)``.
    * Note: ``xm`` and ``matmul(xm, y)`` are for illustration only.

    Parameters
    ----------
    x: tensor<[\*,K1], T> (Required)
        * ``x`` must be 1-D or higher.
    y: tensor<[\*,K2], T> (Required)
        * ``y`` must be 1-D or higher.
    transpose_x: const bool (Optional)
        * Default to ``False``.
        * Use ``True`` to transpose the last two dimensions of ``x`` before multiplication.
          It has no effect when ``x`` is 1-D.
    transpose_y: const bool (Optional)
        * Default to ``False``.
        * Use ``True`` to transpose the last two dimensions of ``y`` before multiplication.
          It has no effect when ``y`` is 1-D.

    Returns
    -------
    tensor<\*, T>
        * Scalar or tensor output.

    Attributes
    ----------
    T: fp16, fp32, i32
    """
    input_spec = InputSpec(
        x=TensorInputType(),
        y=TensorInputType(),
        transpose_x=BoolInputType(const=True, optional=True),
        transpose_y=BoolInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(
            transpose_x=False,
            transpose_y=False,
        )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def type_inference(self):
        x_type = self.x.dtype
        x_shape = list(self.x.shape)
        y_shape = list(self.y.shape)
        x_rank = len(x_shape)

        if x_rank == 1 and self.transpose_x.val:
            msg = "Op {} (matmul): x is rank 1, but transpose_x is True, which is not allowed."
            raise ValueError(msg.format(self.name))

        if self.transpose_x.val:
            x_shape = list(x_shape)
            x_shape[-1], x_shape[-2] = x_shape[-2], x_shape[-1]
            x_shape = tuple(x_shape)
        if self.transpose_y.val:
            y_shape = list(y_shape)
            y_shape[-1], y_shape[-2] = y_shape[-2], y_shape[-1]
            y_shape = tuple(y_shape)
        if not (x_shape[-1] == y_shape[-2] or is_symbolic(x_shape[-1])
                or is_symbolic(y_shape[-2])):
            msg = "Op {} (matmul): x {}, y {} are not broadcastable"
            raise ValueError(msg.format(self.name, self.x.shape, self.y.shape))

        if x_rank == 1:
            # promote shape of x to rank 2
            x_shape = list((1, ) + tuple(x_shape))
        ret_shape = list(broadcast_shapes(x_shape[:-2], y_shape[:-2]))
        ret_shape += [x_shape[-2], y_shape[-1]]
        if x_rank == 1:
            # remove the first dimension of the returned shape
            return types.tensor(x_type, tuple(ret_shape[1:]))
        else:
            return types.tensor(x_type, tuple(ret_shape))

    @precondition(allow=VALUE)
    def value_inference(self):
        x = self.x.val
        if self.transpose_x.val:
            x = np.transpose(x)
        y = self.y.val
        if self.transpose_y.val:
            y = np.transpose(y)
        return np.matmul(x, y)

예제 #15

0

파일 보기

파일: normalization.py 프로젝트: apple/coremltools

class l2_norm(Operation):
    """
    Apply L2 normalization to the n-dimensional input tensor. That is, divide the input
    tensor by the square root of the sum of squares of all elements of the input.

    .. math::
       x_i \\leftarrow \\dfrac{x_i}{\\sqrt{\\sum{x_i^2} + \\epsilon}}


    Parameters
    ----------
    x: tensor<[\*B, \*D], T> (Required)
        * Input tensor, ``rank(x) >= 3``.
        * ``*B`` refers to the leading dimensions.
        * ``*D`` refers to the spatial dimensions to be normalized. Must be rank 3: ``rank(*D) == 3``.
        * When ``rank(x) == 3``, in which ``rank(*B) == 0 and rank(*D) == 3``, the input is divided by
          the square root of the sum of squares of all elements.
        * For ranks greater than 3, in which ``rank(*B) >= 1 and rank(*D) == 3``,
          the leading dimensions \*B, starting from ``0`` to ``-4`` (inclusive),
          are all treated as batch. The L2 normalization are done batch-wise.
    epsilon: const fp32 (Optional)
        * Small constant to avoid division by ``0``.
        * Optional, defaults to ``1e-6``.

    Returns
    -------
    tensor<[\*B, \*D], T>
        * Same type and shape as the input tensor ``x``.

    Attributes
    ----------
    T: fp16, fp32
    """

    input_spec = InputSpec(
        x=TensorInputType(),
        epsilon=FloatInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(epsilon=1e-6, )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def type_inference(self):
        if self.x.rank < 3:
            msg = "Input rank of l2_norm must be at least 3. Got {}".format(
                self.x.rank)
            raise ValueError(msg)
        x_shape = self.x.shape
        return types.tensor(self.x.dtype, tuple(x_shape))

    @precondition(allow=VALUE)
    def value_inference(self):
        val = self.x.val
        eps = self.epsilon.val
        shape = self.x.shape
        rank = self.x.rank
        batch_dims = rank - 3
        if batch_dims == 0:
            square_sum = np.sum(val**2)
            output = val / np.power(square_sum + eps, 0.5)
        else:
            batch_dim_prod = np.prod(shape[:batch_dims])
            reshape_val = np.reshape(val, (batch_dim_prod, -1))
            square_sum = np.sum(
                reshape_val * reshape_val, axis=1, keepdims=True) + eps
            output = reshape_val / np.power(square_sum, 0.5)
            output = np.reshape(output, shape)
        return output

예제 #16

0

파일 보기

파일: normalization.py 프로젝트: apple/coremltools

class layer_norm(Operation):
    """
    Apply layer normalization to the n-dimensional input tensor:

    .. math::
       out = gamma * (input - E[x]) / sqrt(Var[x] + epsilon) + beta


    Parameters
    ----------
    x: tensor<\*?, T> (Required)
        * Input tensor.

    axes: const<[K], i32> (Optional)
        * Dimensions to perform layer normalization.
        * Default is ``None`` (all dimensions).

    gamma: const tensor<\*?, T>, T> (Optional)
        * if provided, the shape must be be ``x.shape[axes]``. For instance, if
          input ``x`` with shape ``(3,4,5,6)`` and ``axes = [2,3]``, gamma must have
          shape ``(5,6)``.
        * Default is all ones.

    beta: const tensor<\*?, T>, T> (Optional)
        * Same shape as gamma.
        * Default is all zeros.

    epsilon: const fp32 (Optional)
        * Small constant to avoid division by ``0``.
        * Default is ``1e-5``.


    Returns
    -------
    tensor<\*?, T>:
     * Tensor with same shape and type as the input tensor ``x``.

    Attributes
    ----------
    T: fp16, fp32
    """

    input_spec = InputSpec(
        x=TensorInputType(),
        axes=IntTensorInputType(const=True, optional=True),
        gamma=TensorInputType(const=True, optional=True),
        beta=TensorInputType(const=True, optional=True),
        epsilon=FloatInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(
            axes=range(self.x.rank),
            gamma=None,
            beta=None,
            epsilon=1e-5,
        )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    @staticmethod
    def _is_compatible_shape(shapea, shapeb):
        if not len(shapea) == len(shapeb):
            return False
        for a, b in zip(shapea, shapeb):
            if any_symbolic([a, b]):
                continue
            if a != b:
                return False
        return True

    def type_inference(self):
        rank = self.x.rank

        # check valid axes
        positive_axes = [
            axis + rank if axis < 0 else axis for axis in self.axes.val
        ]
        if not all([axis >= 0 and axis < rank for axis in positive_axes]):
            raise ValueError("axes must in the range of [-x.rank, x.rank-1].")

        # check shape of gamma and beta
        normalized_shape = [
            self.x.shape[i] for i in range(rank) if i in positive_axes
        ]
        if self.gamma is not None and not layer_norm._is_compatible_shape(
                list(self.gamma.shape), normalized_shape):
            raise ValueError(
                "Expect shape {} for gamma, but get shape {} instead".format(
                    normalized_shape, self.gamma.shape))

        if self.beta is not None and not layer_norm._is_compatible_shape(
                list(self.gamma.shape), normalized_shape):
            raise ValueError(
                "Expect shape {} for beta, but get shape {} instead".format(
                    normalized_shape, self.beta.shape))

        x_shape = self.x.shape
        return types.tensor(self.x.dtype, tuple(x_shape))

    @precondition(allow=VALUE)
    def value_inference(self):
        def np_layer_norm(x, axes, gamma, beta, epsilon=1e-5):
            rank = len(x.shape)
            axes = [axis + rank if axis < 0 else axis for axis in axes]
            normalized_shape = [
                x.shape[i] if i in axes else 1 for i in range(rank)
            ]
            gamma = np.ones(
                shape=normalized_shape) if gamma is None else np.reshape(
                    gamma, normalized_shape)
            beta = np.zeros(
                shape=normalized_shape) if beta is None else np.reshape(
                    beta, normalized_shape)
            num = x - np.mean(x, axis=tuple(axes), keepdims=True)
            dem = np.sqrt(
                np.sum(np.square(num), axis=tuple(axes), keepdims=True) /
                np.prod(normalized_shape) + epsilon)
            return num / dem * gamma + beta

        _axes = self.x.shape if self.axes is None else self.axes.val
        _gamma = None if self.gamma is None else self.gamma.val
        _beta = None if self.beta is None else self.beta.val
        return np_layer_norm(self.x.val, _axes, _gamma, _beta,
                             self.epsilon.val)

예제 #17

0

파일 보기

파일: normalization.py 프로젝트: apple/coremltools

class batch_norm(Operation):
    """
    Normalize input tensor ``x`` by ``mean`` and ``variance``, and optionally apply a
    scale ``gamma`` and an offset ``beta``:

    .. math::
       y_i = \\gamma_i \\dfrac{ (x_i - mean_i)}{\\sqrt{variance_i + epsilon}} + beta_i \\;,\\;i=1,....,C

    The ``mean``, ``variance``, ``gamma``, and ``beta``
    must be 1-D tensors whose lengths are equal to the second axis (the "depth"
    or "channel" dimension) of ``x``.

    Parameters
    ----------
    x: tensor<[n,C,*D], T> (Required)
        * ``3 <= rank <= 5``.
        * ``*D`` refers to the spatial dimensions, ``1 <= rank(*D) <= 3``.
        * ``n`` is the batch dimension.
    mean: const tensor<[C], T> (Required)
    variance: const tensor<[C], T> (Required)
    gamma: const tensor<[C], T> (Optional)
        * Optional scale applied to normalized tensor.
        * Default is all ones.
    beta: const tensor<[C], T> (Optional)
        * Optional offset applied to normalized tensor.
        * Default is all zeros.
    epsilon: const fp32 (Optional)
        * Default is ``1e-5``.

    Returns
    -------
    tensor<[n,C,*D], T>
        * Output tensor has the same shape and type as the input ``x``.

    Attributes
    ----------
    T: fp16, fp32
    """

    input_spec = InputSpec(
        x=TensorInputType(),
        mean=TensorInputType(const=True),
        variance=TensorInputType(const=True),
        gamma=TensorInputType(const=True, optional=True),
        beta=TensorInputType(const=True, optional=True),
        epsilon=FloatInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(
            gamma=None,
            beta=None,
            epsilon=1e-5,
        )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def type_inference(self):
        x_shape = self.x.shape
        return types.tensor(self.x.dtype, tuple(x_shape))

예제 #18

0

파일 보기

class conv_transpose(Operation):
    """
    Perform transposed convolution (also known as deconvolution and fractionally
    stride convolution) over input. ``conv_transpose`` can also be used to compute
    the gradient of conv. Supports 1-D, 2-D, and 3-D convolution.

    Parameters
    ----------

    x: tensor<[n,C_in,*D_in],T> (Required)
        * Input data.
        * ``D_in`` are spatial dimensions.
        * ``1 <= len(D_in) <= 3``.
        * ``C_in`` is the number of input channels.

    weight: const tensor<[C_in,C_out/groups,*D_in], T> (Required)
        * Filter weights. ``C_in, C_out`` are the number of input and output channels
          respectively.
        * ``D_in`` are spatial dimensions. ``1 <= len(D_in) <= 2``.

    bias: const tensor<[C_out],T> (Optional, default to all 0)
        * Bias added along output channels.

    pad: const tensor<[P],i32> (Optional, default to all 0s)
        * Number of elements to pad before and after each dimension.
        * ``P == 2 * len(D_in)``.
        * ``pad[2*i], pad[2*i+1]`` are pad sizes before and after
          dimension ``i``, where ``0 <= i < len(D_in)``.

    output_shape: const tensor<[P],i32> (Optional, default None)
        * Expected output shape. The first two dimensions must be ``[n, C_out]``.
        * The output shape of ``conv_transpose`` is underdetermined in general,
          because ``conv`` can map multiple input shapes to a single output shape.
          For example, for ``same`` padding mode, ``conv_out = ceil(conv_in/stride)``.
          Hence we need ``output_shape`` when this occurs.

    pad_type: const tensor<[P],i32> (Optional, default valid)
        * One of ``same``, ``valid``, or ``custom``.

    strides: const tensor<[S],i32> (Optional. Default to all 1s)
        * Stride along each of the spatial dimensions.
        * ``S == len(D_in)``.

    dilations: const tensor<[S],i32> (Optional. Default to all 1s)
        * Dilation value along each spatial dimension in ``d_in``. See ``conv``.
        * ``S == len(D_in)``.

    groups: const tensor<[], i32> (Optional. Default to 1)
        * Input and output channels are separated into ``groups``.
        * ``C_in`` and ``C_out`` must be divisible by the number of groups.
          See ``conv`` for examples.

    Returns
    -------
    tensor<[n,C_out,*D_out],T>
		* If ``output_shape`` is not ``None``:
		  
		     ``Dout = output_shape``

		* If ``pad_type == "custom"``:
		  
		     ``Dout[i] = (D_in[i]-1)*stride[i] + (K[i]-1) * dilation[i] + 1 - pad[2*i] - pad[2*i-1]``

		* If ``pad_type == "valid"``:
		  
		     ``Dout[i] = (D_in[i]-1)*stride[i] + (K[i]-1) * dilation[i] + 1``

		* If ``pad_type == "same"``:
		  
		     ``Dout[i] = D_in[i] * stride[i]``
    

    Attributes
    ----------
    T: fp16, fp32

    See Also
    --------
    conv
    """

    input_spec = InputSpec(
        x=FloatTensorInputType(),  # [n, C_in, spatial_dims]
        weight=FloatTensorInputType(const=True),  # [C_out, C_in, spatial_dims]
        bias=FloatTensorInputType(const=True, optional=True),
        pad=IntTensorInputType(const=True, optional=True),
        output_shape=IntTensorInputType(const=True, optional=True),
        pad_type=StringInputType(const=True, optional=True),
        strides=TensorInputType(const=True, optional=True),
        dilations=TensorInputType(const=True, optional=True),
        groups=IntInputType(const=True, optional=True),
    )

    def default_inputs(self):
        num_spatial_dims = self.x.rank - 2
        return DefaultInputs(
            bias=None,
            pad=[0] * 2 * num_spatial_dims,
            output_shape=None,
            pad_type="valid",
            strides=[1] * num_spatial_dims,
            dilations=[1] * num_spatial_dims,
            groups=1,
        )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def type_inference(self):
        # Input shape is [n, C_in, spatial_dims]
        in_shape = self.x.shape
        # Weight shape is [C_in, C_out/group, spatial_dims]
        f_shape = self.weight.shape
        kernel_shape = f_shape[2:]
        spatial_dim_rank = len(in_shape) - 2
        N = in_shape[0]
        C_in = self.x.shape[0]
        groups = self.groups.val
        C_out = f_shape[1] * groups

        if self.bias is not None and self.bias.val.shape[0] != C_out:
            msg = "# of bias values {} not equal to # output channels {}"
            raise ValueError(msg.format(self.bias.val.shape[0], C_out))
        if C_out % groups != 0:
            msg = "# of input channels {} not divisible by groups {}"
            raise ValueError(msg.format(C_in, groups))

        # If output shape is given, return it
        if self.output_shape is not None:
            output_shape = self.output_shape.val
            assert output_shape[0] == N
            assert output_shape[1] == C_out
            return types.tensor(self.x.dtype, tuple(output_shape))

        strides = self.strides.val
        dilations = self.dilations.val
        kernel_shape = [(kernel_shape[r] - 1) * dilations[r] + 1
                        for r in range(spatial_dim_rank)]

        D_in = in_shape[2:]  # spatial dimensions

        # Deconv's output shape is non-deterministic, we follow TF shape logic here.
        if self.pad_type.val == "same":
            d_out_shape = [
                strides[r] * D_in[r] for r in range(spatial_dim_rank)
            ]
        elif self.pad_type.val == "valid":
            d_out_shape = [
                strides[r] * (D_in[r] - 1) + kernel_shape[r]
                for r in range(spatial_dim_rank)
            ]
        elif self.pad_type.val == "custom":
            if self.pad is None:
                raise ValueError("self.pad must exist if pad_type is custom")
            pad = self.pad.val
            d_out_shape = [
                strides[r] * (D_in[r] - 1) + kernel_shape[r] - pad[2 * r] -
                pad[2 * r + 1] for r in range(spatial_dim_rank)
            ]

        retshape = [N, C_out] + d_out_shape
        return types.tensor(self.x.dtype, tuple(retshape))

예제 #19

0

파일 보기

class linear(Operation):
    """
    Perform  ``x * weight.T + bias`` where ``weight`` and ``bias`` are constant at
    compile time.

    Parameters
    ----------
    x: tensor<[\*D,D_in], T> (Required)
        * ``1 <= rank <= 3``.
        * ``0 <= rank(*D) <= 2``.
    weight: const tensor<[D_out,D_in], T> (Required)
    bias: const tensor<[D_out],T> (Optional)
        * Default to ``0``.

    Returns
    -------
    tensor<[\*D,D_out], T>
        * Same rank as the input ``x``.

    Attributes
    ----------
    T: fp16, fp32, i32
    """
    input_spec = InputSpec(
        x=TensorInputType(),
        weight=TensorInputType(const=True),
        bias=TensorInputType(const=True, optional=True),
    )

    def default_inputs(self):
        Dout = self.weight.shape[0]
        return DefaultInputs(bias=[0.] * Dout, )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def type_inference(self):
        x_type = self.x.dtype
        x_shape = self.x.shape
        weight_shape = self.weight.shape
        assert len(weight_shape) == 2
        if not (x_shape[-1] == weight_shape[-1] or is_symbolic(x_shape[-1])
                or is_symbolic(weight_shape[-1])):
            msg = "Op '{}' (linear op): Size of the last dimension of x, which is {}, " \
                  "does not match the last dimension of weights, which is {}"
            raise ValueError(
                msg.format(self.name, x_shape[-1], weight_shape[-1]))
        if self.bias is not None:
            assert len(self.bias.shape) == 1
            if len(self.bias.val) != weight_shape[-2]:
                msg = "Op '{}' (linear op): Size of the bias, which is {}, " \
                      "does not match the first dimension of weights, which is {}"
                raise ValueError(
                    msg.format(self.name, len(self.bias.val),
                               weight_shape[-2]))
        shape = list(x_shape)
        shape[-1] = weight_shape[0]
        return types.tensor(x_type, tuple(shape))

    @precondition(allow=VALUE)
    def value_inference(self):
        res = np.matmul(self.x.val, np.transpose(self.weight.val))
        if self.bias is not None:
            res += self.bias.val
        return res