Ejemplo n.º 1
0
    def _bilinear(*indices):
        n, c, y, x, cc = _get_indices(*indices)

        if coordinate_transformation_mode == "half_pixel":
            in_y = y_ratio * (y + 0.5) - 0.5
            in_x = x_ratio * (x + 0.5) - 0.5
        else:
            in_y = y_ratio * y
            in_x = x_ratio * x

        xint = tvm.floor(in_x).astype('int32')
        xfract = in_x - tvm.floor(in_x)

        yint = tvm.floor(in_y).astype('int32')
        yfract = in_y - tvm.floor(in_y)

        p00 = _get_pixel(n, c, yint, xint, cc)
        p10 = _get_pixel(n, c, yint, xint + 1, cc)
        p01 = _get_pixel(n, c, yint + 1, xint, cc)
        p11 = _get_pixel(n, c, yint + 1, xint + 1, cc)

        col0 = _lerp(p00, p10, xfract)
        col1 = _lerp(p01, p11, xfract)
        value = _lerp(col0, col1, yfract)
        return _cast_output(value)
Ejemplo n.º 2
0
    def _nearest_neighbor(*indices):
        n, c, y, x, cc = _get_indices(*indices)

        in_y = y_ratio * y
        in_x = x_ratio * x

        if align_corners:
            yint = tvm.round(in_y).astype('int32')
            xint = tvm.round(in_x).astype('int32')
        else:
            # Add epsilon to floor to prevent gpu rounding errors.
            epsilon = 1e-5
            yint = tvm.floor(in_y + epsilon).astype('int32')
            xint = tvm.floor(in_x + epsilon).astype('int32')

        return _cast_output(_get_pixel(n, c, yint, xint, cc))
Ejemplo n.º 3
0
    def _bicubic(*indices):
        n, c, y, x, cc = _get_indices(*indices)

        if coordinate_transformation_mode == "half_pixel":
            in_y = y_ratio * (y + 0.5) - 0.5
            in_x = x_ratio * (x + 0.5) - 0.5
        else:
            in_y = y_ratio * y
            in_x = x_ratio * x

        xint = tvm.floor(in_x).astype('int32')
        xfract = in_x - tvm.floor(in_x)

        yint = tvm.floor(in_y).astype('int32')
        yfract = in_y - tvm.floor(in_y)

        # 1st row
        p00 = _get_pixel(n, c, yint - 1, xint - 1, cc)
        p10 = _get_pixel(n, c, yint - 1, xint + 0, cc)
        p20 = _get_pixel(n, c, yint - 1, xint + 1, cc)
        p30 = _get_pixel(n, c, yint - 1, xint + 2, cc)

        # 2nd row
        p01 = _get_pixel(n, c, yint + 0, xint - 1, cc)
        p11 = _get_pixel(n, c, yint + 0, xint + 0, cc)
        p21 = _get_pixel(n, c, yint + 0, xint + 1, cc)
        p31 = _get_pixel(n, c, yint + 0, xint + 2, cc)

        # 3rd row
        p02 = _get_pixel(n, c, yint + 1, xint - 1, cc)
        p12 = _get_pixel(n, c, yint + 1, xint + 0, cc)
        p22 = _get_pixel(n, c, yint + 1, xint + 1, cc)
        p32 = _get_pixel(n, c, yint + 1, xint + 2, cc)

        # 4th row
        p03 = _get_pixel(n, c, yint + 2, xint - 1, cc)
        p13 = _get_pixel(n, c, yint + 2, xint + 0, cc)
        p23 = _get_pixel(n, c, yint + 2, xint + 1, cc)
        p33 = _get_pixel(n, c, yint + 2, xint + 2, cc)

        # Interpolate bicubically
        col0 = _cubic_kernel(p00, p10, p20, p30, xfract)
        col1 = _cubic_kernel(p01, p11, p21, p31, xfract)
        col2 = _cubic_kernel(p02, p12, p22, p32, xfract)
        col3 = _cubic_kernel(p03, p13, p23, p33, xfract)
        value = _cubic_kernel(col0, col1, col2, col3, yfract)
        return _cast_output(value)
Ejemplo n.º 4
0
    def _bilinear(*indices):
        n, c, y, x, cc = _get_indices(*indices)

        in_y = y_ratio * y
        in_x = x_ratio * x

        xint = tvm.floor(in_x).astype('int32')
        xfract = in_x - tvm.floor(in_x)

        yint = tvm.floor(in_y).astype('int32')
        yfract = in_y - tvm.floor(in_y)

        p00 = _get_pixel(n, c, yint, xint, cc)
        p10 = _get_pixel(n, c, yint, xint + 1, cc)
        p01 = _get_pixel(n, c, yint + 1, xint, cc)
        p11 = _get_pixel(n, c, yint + 1, xint + 1, cc)

        col0 = _lerp(p00, p10, xfract)
        col1 = _lerp(p01, p11, xfract)
        value = _lerp(col0, col1, yfract)
        return _cast_output(value)
Ejemplo n.º 5
0
    def _pool(i, c, ph, pw):
        roi = rois[i]
        batch_index = roi[0].astype('int32')
        roi_start_w, roi_start_h, roi_end_w, roi_end_h = roi[1], roi[2], roi[
            3], roi[4]

        roi_start_h = tvm.round(roi_start_h * spatial_scale).astype('int32')
        roi_start_w = tvm.round(roi_start_w * spatial_scale).astype('int32')
        roi_end_h = tvm.round(roi_end_h * spatial_scale).astype('int32')
        roi_end_w = tvm.round(roi_end_w * spatial_scale).astype('int32')

        # force malformed ROIs to be 1x1
        roi_h = tvm.max(roi_end_h - roi_start_h + 1, tvm.const(1, 'int32'))
        roi_w = tvm.max(roi_end_w - roi_start_w + 1, tvm.const(1, 'int32'))

        bin_h = roi_h.astype(dtype) / pooled_size_h
        bin_w = roi_w.astype(dtype) / pooled_size_w

        # use epsilon to prevent floating point precision loss in floor/ceil
        epsilon = tvm.const(0.00001, dtype)
        hstart = tvm.floor(ph * bin_h + epsilon).astype('int32')
        wstart = tvm.floor(pw * bin_w + epsilon).astype('int32')
        hend = tvm.ceil((ph + 1) * bin_h - epsilon).astype('int32')
        wend = tvm.ceil((pw + 1) * bin_w - epsilon).astype('int32')
        hstart = tvm.min(tvm.max(hstart + roi_start_h, 0), height)
        wstart = tvm.min(tvm.max(wstart + roi_start_w, 0), width)
        hend = tvm.min(tvm.max(hend + roi_start_h, 0), height)
        wend = tvm.min(tvm.max(wend + roi_start_w, 0), width)

        non_empty = tvm.all(hstart < hend, wstart < wend)
        min_value = lambda dtype: tvm.if_then_else(
            non_empty, tvm.min_value(dtype), tvm.const(0.0, dtype))
        # pylint: disable=unnecessary-lambda
        _max = tvm.comm_reducer(lambda x, y: tvm.make._OpMax(x, y),
                                min_value,
                                name='max')
        rh = tvm.reduce_axis((0, hend - hstart), 'rh')
        rw = tvm.reduce_axis((0, wend - wstart), 'rw')
        return _max(data[batch_index, c, hstart + rh, wstart + rw],
                    axis=[rh, rw])
Ejemplo n.º 6
0
    def _nearest_neighbor(*indices):
        n, c, z, y, x, cc = _get_indices(*indices)

        in_z = z_ratio * z
        in_y = y_ratio * y
        in_x = x_ratio * x

        if coordinate_transformation_mode == "align_corners":
            zint = tvm.round(in_z).astype('int32')
            yint = tvm.round(in_y).astype('int32')
            xint = tvm.round(in_x).astype('int32')
        elif coordinate_transformation_mode in ["asymmetric", "half_pixel"]:
            # Add epsilon to floor to prevent gpu rounding errors.
            epsilon = 1e-5
            zint = tvm.floor(in_z + epsilon).astype('int32')
            yint = tvm.floor(in_y + epsilon).astype('int32')
            xint = tvm.floor(in_x + epsilon).astype('int32')
        else:
            raise ValueError("Unsupported coordinate_transformation_mode: {}".format(
                coordinate_transformation_mode))

        return _cast_output(_get_pixel(n, c, zint, yint, xint, cc))
Ejemplo n.º 7
0
def floor(x):
    """Take floor of input x.

    Parameters
    ----------
    x : tvm.Tensor
        Input argument.

    Returns
    -------
    y : tvm.Tensor
        The result.
    """
    return tvm.compute(x.shape, lambda *i: tvm.floor(x(*i)))
Ejemplo n.º 8
0
def floor(x):
    """Take floor of input x.

    Parameters
    ----------
    x : tvm.Tensor
        Input argument.

    Returns
    -------
    y : tvm.Tensor
        The result.
    """
    return tvm.compute(x.shape, lambda *i: tvm.floor(x(*i)))
Ejemplo n.º 9
0
def resize_bicubic(indices, data, image_height, image_width,
                   target_height, target_width, boxes=None,
                   box_indices=None, extrapolation_value=None, layout='NCHW',
                   coordinate_transformation_mode="align_corners",
                   out_dtype=None):
    """Perform resize operation with bicubic method on the data.
    More details about Bicubic interpolation please refer to
    https://en.wikipedia.org/wiki/Bicubic_interpolation.

    Parameters
    ----------
    indices : tuple
        The indices of input data

    data : tvm.Tensor
        inputs is a 4-D tensor with shape
        [batch, channel, in_height, in_width]
        or  [batch, in_height, in_width, channel]

    image_height : integer
        Input image height

    image_width : integer
        Input image width

    target_height : integer
        The target resized image height

    target_width : integer
        The target resized image width

    boxes : tvm.Tensor, optional
        A 2-D tensor of shape [num_boxes, 4]. Each row of the tensor specifies
        the coordinates of a box.

    box_indices : tvm.Tensor, optional
        A 1-D tensor of shape [num_boxes], box_indices[i] specifies the data that
        the i-th box refers to.

    extrapolation_value: float, optional
        Value used for extrapolation, when applicable.

    layout: string, optional
        "NCHW", "NHWC", or "NCHWc".

    coordinate_transformation_mode: string, optional
        Describes how to transform the coordinate in the resized tensor
        to the coordinate in the original tensor.
        Refer to the ONNX Resize operator specification for details.
        Available options are "half_pixel", "align_corners" and "asymmetric".

    out_dtype: string, optional
        Type to return. If left None will be same as input type.

    Returns
    -------
    output : out_dtype
        The computed result with type out_dtype
    """

    def _cubic_kernel(A, B, C, D, t):
        a = -A / 2.0 + (3.0 * B) / 2.0 - (3.0 * C) / 2.0 + D / 2.0
        b = A - (5.0 * B) / 2.0 + 2.0 * C - D / 2.0
        c = -A / 2.0 + C / 2.0
        d = B
        return a * t * t * t + b * t * t + c * t + d

    def _cast_output(value, data_dtype="float32", out_dtype=None):
        if out_dtype:
            dtype = out_dtype
        else:
            dtype = data_dtype
        return value.astype(dtype)

    def _get_indices(indices, layout='NCHW'):
        if layout == 'NHWC':
            n, y, x, c = indices
            cc = None
        elif layout == 'NCHW':
            n, c, y, x = indices
            cc = None
        else:
            n, c, y, x, cc = indices
        return n, c, y, x, cc

    def _get_pixel(data, layout, n, c, y, x, cc):
        if boxes is None:
            y = tvm.max(tvm.min(y, image_height - 1), 0)
            x = tvm.max(tvm.min(x, image_width - 1), 0)
        if layout == 'NHWC':
            return data(n, y, x, c).astype('float')
        if layout == 'NCHW':
            return data(n, c, y, x).astype('float')
        # else must be NCHWxc
        return data(n, c, y, x, cc).astype('float')

    n, c, y, x, cc = _get_indices(indices, layout)
    box_idx = box_indices(n) if box_indices is not None else n

    if boxes is not None:
        y1, x1 = boxes(n, 0), boxes(n, 1)
        y2, x2 = boxes(n, 2), boxes(n, 3)

        in_h = (image_height - 1) * (y2 - y1)
        in_w = (image_width - 1) * (x2 - x1)
        h_scale = in_h.astype('float') / (target_height - 1)
        w_scale = in_w.astype('float') / (target_width - 1)

        in_y = y1 * (image_height - 1) + h_scale * y
        in_x = x1 * (image_width - 1) + w_scale * x
    else:
        if coordinate_transformation_mode == "align_corners":
            h_scale = (image_height - 1).astype('float') / (target_height - 1)
            w_scale = (image_width - 1).astype('float') / (target_width - 1)
        elif coordinate_transformation_mode in ["asymmetric", "half_pixel"]:
            h_scale = image_height.astype('float') / target_height
            w_scale = image_width.astype('float') / target_width
        else:
            raise ValueError("Unsupported coordinate_transformation_mode: {}".format(
                coordinate_transformation_mode))

        if coordinate_transformation_mode == "half_pixel":
            in_y = h_scale * (y + 0.5) - 0.5
            in_x = w_scale * (x + 0.5) - 0.5
        else:
            in_y = h_scale * y
            in_x = w_scale * x

    xint = tvm.floor(in_x).astype('int32')
    xfract = in_x - tvm.floor(in_x)

    yint = tvm.floor(in_y).astype('int32')
    yfract = in_y - tvm.floor(in_y)

    # 1st row
    p00 = _get_pixel(data, layout, box_idx, c, yint - 1, xint - 1, cc)
    p10 = _get_pixel(data, layout, box_idx, c, yint - 1, xint + 0, cc)
    p20 = _get_pixel(data, layout, box_idx, c, yint - 1, xint + 1, cc)
    p30 = _get_pixel(data, layout, box_idx, c, yint - 1, xint + 2, cc)

    # 2nd row
    p01 = _get_pixel(data, layout, box_idx, c, yint + 0, xint - 1, cc)
    p11 = _get_pixel(data, layout, box_idx, c, yint + 0, xint + 0, cc)
    p21 = _get_pixel(data, layout, box_idx, c, yint + 0, xint + 1, cc)
    p31 = _get_pixel(data, layout, box_idx, c, yint + 0, xint + 2, cc)

    # 3rd row
    p02 = _get_pixel(data, layout, box_idx, c, yint + 1, xint - 1, cc)
    p12 = _get_pixel(data, layout, box_idx, c, yint + 1, xint + 0, cc)
    p22 = _get_pixel(data, layout, box_idx, c, yint + 1, xint + 1, cc)
    p32 = _get_pixel(data, layout, box_idx, c, yint + 1, xint + 2, cc)

    # 4th row
    p03 = _get_pixel(data, layout, box_idx, c, yint + 2, xint - 1, cc)
    p13 = _get_pixel(data, layout, box_idx, c, yint + 2, xint + 0, cc)
    p23 = _get_pixel(data, layout, box_idx, c, yint + 2, xint + 1, cc)
    p33 = _get_pixel(data, layout, box_idx, c, yint + 2, xint + 2, cc)

    # Interpolate bicubically
    col0 = _cubic_kernel(p00, p10, p20, p30, xfract)
    col1 = _cubic_kernel(p01, p11, p21, p31, xfract)
    col2 = _cubic_kernel(p02, p12, p22, p32, xfract)
    col3 = _cubic_kernel(p03, p13, p23, p33, xfract)
    value = _cubic_kernel(col0, col1, col2, col3, yfract)

    # use extrapolation_value if in_y/in_x is out of boundary
    if extrapolation_value is not None:
        out = tvm.if_then_else(in_y < 0,
                               extrapolation_value,
                               tvm.if_then_else(in_y > image_height - 1,
                                                extrapolation_value,
                                                value))
        value = tvm.if_then_else(in_x < 0,
                                 extrapolation_value,
                                 tvm.if_then_else(in_x > image_width - 1,
                                                  extrapolation_value,
                                                  out))
    return _cast_output(value, data.dtype, out_dtype=out_dtype)
Ejemplo n.º 10
0
def resize_nearest_neighbor(indices, data, image_height, image_width,
                            target_height, target_width, boxes=None,
                            box_indices=None, extrapolation_value=None, layout='NCHW',
                            coordinate_transformation_mode="align_corners",
                            out_dtype=None):

    """Perform resize operation with nearest neighbor method on the data.
    For details about Nearest-neighbor interpolation please refer to
    https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation.

    Parameters
    ----------
    indices : tuple
        The indices of input data

    data : tvm.Tensor
        inputs is a 4-D tensor with shape
        [batch, channel, in_height, in_width]
        or  [batch, in_height, in_width, channel]

    image_height : integer
        Input image height

    image_width : integer
        Input image width

    target_height : integer
        The target resized image height

    target_width : integer
        The target resized image width

    boxes : tvm.Tensor, optional
        A 2-D tensor of shape [num_boxes, 4]. Each row of the tensor specifies
        the coordinates of a box.

    box_indices : tvm.Tensor, optional
        A 1-D tensor of shape [num_boxes], box_indices[i] specifies the data that
        the i-th box refers to.

    extrapolation_value: float, optional
        Value used for extrapolation, when applicable.

    layout: string, optional
        "NCHW", "NHWC", or "NCHWc".

    coordinate_transformation_mode: string, optional
        Describes how to transform the coordinate in the resized tensor
        to the coordinate in the original tensor.
        Refer to the ONNX Resize operator specification for details.
        Available options are "half_pixel", "align_corners" and "asymmetric".

    out_dtype: string, optional
        Type to return. If left None will be same as input type.

    Returns
    -------
    output : out_dtype
        The computed result with type out_dtype
    """

    def _cast_output(value, data_dtype="float32", out_dtype=None):
        if out_dtype:
            dtype = out_dtype
        else:
            dtype = data_dtype
        return value.astype(dtype)

    def _get_indices(indices, layout='NCHW'):
        if layout == 'NHWC':
            n, y, x, c = indices
            cc = None
        elif layout == 'NCHW':
            n, c, y, x = indices
            cc = None
        else:
            n, c, y, x, cc = indices
        return n, c, y, x, cc

    def _get_pixel(data, layout, n, c, y, x, cc):
        if boxes is None:
            y = tvm.max(tvm.min(y, image_height - 1), 0)
            x = tvm.max(tvm.min(x, image_width - 1), 0)
        if layout == 'NHWC':
            return data(n, y, x, c).astype('float')
        if layout == 'NCHW':
            return data(n, c, y, x).astype('float')
        # else must be NCHWxc
        return data(n, c, y, x, cc).astype('float')

    n, c, y, x, cc = _get_indices(indices, layout)
    box_idx = box_indices(n) if box_indices is not None else n
    if boxes is not None:
        y1, x1 = boxes(n, 0), boxes(n, 1)
        y2, x2 = boxes(n, 2), boxes(n, 3)

        in_h = (image_height - 1) * (y2 - y1)
        in_w = (image_width - 1) * (x2 - x1)
        h_scale = in_h.astype('float') / (target_height - 1)
        w_scale = in_w.astype('float') / (target_width - 1)

        in_y = y1 * (image_height - 1) + h_scale * y
        in_x = x1 * (image_width - 1) + w_scale * x
    else:
        if coordinate_transformation_mode == "align_corners":
            h_scale = (image_height - 1).astype('float') / (target_height - 1)
            w_scale = (image_width - 1).astype('float') / (target_width - 1)
        elif coordinate_transformation_mode in ["asymmetric", "half_pixel"]:
            h_scale = image_height.astype('float') / target_height
            w_scale = image_width.astype('float') / target_width
        else:
            raise ValueError("Unsupported coordinate_transformation_mode: {}".format(
                coordinate_transformation_mode))
        in_y = h_scale * y
        in_x = w_scale * x

    if coordinate_transformation_mode == "align_corners" or boxes is not None:
        closest_x_index = tvm.round(in_x).astype("int32")
        closest_y_index = tvm.round(in_y).astype("int32")
    else:
        # Add epsilon to floor to prevent gpu rounding errors.
        epsilon = 1e-5
        closest_y_index = tvm.floor(in_y + epsilon).astype('int32')
        closest_x_index = tvm.floor(in_x + epsilon).astype('int32')

    value = _get_pixel(data, layout, box_idx, c, closest_y_index, closest_x_index, cc)

    if extrapolation_value is not None:
        out = tvm.if_then_else(in_y < 0,
                               extrapolation_value,
                               tvm.if_then_else(in_y > image_height - 1,
                                                extrapolation_value,
                                                value))
        # use extrapolation_value if in_x is out of boundary
        value = tvm.if_then_else(in_x < 0,
                                 extrapolation_value,
                                 tvm.if_then_else(in_x > image_width - 1,
                                                  extrapolation_value,
                                                  out))
    return _cast_output(value, data.dtype, out_dtype=out_dtype)
Ejemplo n.º 11
0
def sort_ir(data, index, output, axis, is_descend):
    """Low level IR to do sorting on the GPU, same usage as tvm.contrib.sort.argsort on the CPU.

    Parameters
    ----------
    data: Buffer
        2D Buffer of input boxes' score with shape [batch_size, num_anchors].

    index : Buffer
        Buffer of number of valid number of boxes.

    output : Buffer
        Output buffer of indicies of sorted tensor.

    axis : int
        The axis used for sorting.

    is_descend : bool
        If the sorted data is in descending order.

    Returns
    -------
    stmt : Stmt
        The result IR statement.
    """

    max_threads = int(
        tvm.target.current_target(allow_none=False).max_num_threads)
    tx = tvm.thread_axis("threadIdx.x")
    bx = tvm.thread_axis("blockIdx.x")
    ib = tvm.ir_builder.create()
    p_data = ib.buffer_ptr(data)
    p_index = ib.buffer_ptr(index)
    p_out = ib.buffer_ptr(output)
    ndim = len(data.shape)
    assert data.dtype == "float32", "Currently only supports input dtype to be float32"
    assert axis < ndim, "Axis out of boundary for input ndim %d" % ndim

    axis_mul_before = 1
    axis_mul_after = 1
    if axis < 0:
        axis = ndim + axis
    for i in range(0, ndim):
        if i < axis:
            axis_mul_before *= data.shape[i]
        elif i > axis:
            axis_mul_after *= data.shape[i]

    dshape = 0
    for i in range(0, len(index.shape)):
        dshape += index.shape[i]
    dshape = tvm.select(dshape > axis_mul_before * axis_mul_after, dshape,
                        axis_mul_before * axis_mul_after)

    sizes_temp = ib.allocate("int32",
                             dshape,
                             name="sizes_temp",
                             scope="global")
    sizes = ib.allocate("int32", dshape, name="sizes", scope="global")
    temp_index = ib.allocate("int32", dshape, name="temp_index", scope="local")
    temp_data = ib.allocate("float32", dshape, name="temp_data", scope="local")
    data_new = ib.allocate("float32", dshape, name="data_new", scope="global")
    index_new = ib.allocate("int32", dshape, name="index_new", scope="global")
    nthread_tx = max_threads
    nthread_bx = dshape // max_threads + 1
    ib.scope_attr(tx, "thread_extent", nthread_tx)
    ib.scope_attr(bx, "thread_extent", nthread_bx)
    tid = bx * max_threads + tx

    with ib.if_scope(tid < axis_mul_before * axis_mul_after):
        sizes[tid] = p_index[tid]
        sizes_temp[tid] = p_index[tid]

    with ib.if_scope(tid < axis_mul_before * axis_mul_after):
        with ib.for_range(0, tvm.floor(tvm.sqrt((axis_mul_before * axis_mul_after) \
             .astype("float32"))) + 1, name="k") as k:
            with ib.if_scope(tid - (tvm.const(1, "int32") << k) >= 0):
                with ib.if_scope(k % 2 == 0):
                    sizes[tid] += sizes_temp[tid -
                                             (tvm.const(1, "int32") << k)]
                    sizes_temp[tid] = sizes[tid]
                with ib.else_scope():
                    sizes_temp[tid] += sizes[tid -
                                             (tvm.const(1, "int32") << k)]
                    sizes[tid] = sizes_temp[tid]

    with ib.if_scope(tid < axis_mul_before * axis_mul_after):
        i = tid / axis_mul_after
        j = tid % axis_mul_after
        current_sort_num = p_index[tid]
        base_idx = i * data.shape[axis] * axis_mul_after + j
        with ib.for_range(0, current_sort_num, name="k") as k:
            full_idx = base_idx + k * axis_mul_after
            with ib.if_scope(tid == 0):
                start = 0
            with ib.else_scope():
                start = sizes[tid - 1]
            index_new[start + k] = k
            data_new[start + k] = p_data[full_idx]

    with ib.if_scope(tid < axis_mul_before * axis_mul_after):
        with ib.if_scope(tid == 0):
            start = 0
        with ib.else_scope():
            start = sizes[tid - 1]
        # OddEvenTransposeSort
        with ib.for_range(0, p_index[tid], name="k") as k:
            with ib.for_range(0, p_index[tid] - 1, name="i") as i:
                with ib.if_scope(i % 2 == (k & 1)):
                    with ib.if_scope(
                        ((data_new[i + start] < data_new[i + start + 1])
                         ^ is_descend) == False):
                        temp_data[tid] = data_new[i + start]
                        data_new[i + start] = data_new[i + start + 1]
                        data_new[i + start + 1] = temp_data[tid]
                        temp_index[tid] = index_new[i + start]
                        index_new[i + start] = index_new[i + start + 1]
                        index_new[i + start + 1] = temp_index[tid]

    with ib.if_scope(tid < axis_mul_before * axis_mul_after):
        i = tid / axis_mul_after
        j = tid % axis_mul_after
        current_sort_num = p_index[tid]
        base_idx = i * data.shape[axis] * axis_mul_after + j
        with ib.for_range(0, data.shape[axis], name="k") as k:
            with ib.if_scope(tid == 0):
                start = 0
            with ib.else_scope():
                start = sizes[tid - 1]
            p_out[base_idx + k * axis_mul_after] = tvm.select(
                k < current_sort_num, index_new[k + start], k)
    body = ib.get()
    return body
Ejemplo n.º 12
0
def resize_bilinear(indices,
                    data,
                    image_height,
                    image_width,
                    target_height,
                    target_width,
                    boxes=None,
                    box_indices=None,
                    extrapolation_value=None,
                    layout='NCHW',
                    coordinate_transformation_mode="align_corners",
                    out_dtype=None):
    """Perform resize operation with bilinear method on the data.
    For details about Bilinear interpolation please refer to
    https://en.wikipedia.org/wiki/Bilinear_interpolation.

    Parameters
    ----------
    indices : tuple
        The indices of input data

    data : tvm.Tensor
        inputs is a 4-D tensor with shape
        [batch, channel, in_height, in_width]
        or  [batch, in_height, in_width, channel]

    image_height : integer
        Input image height

    image_width : integer
        Input image width

    target_height : integer
        The target resized image height

    target_width : integer
        The target resized image width

    boxes : tvm.Tensor, optional
        A 2-D tensor of shape [num_boxes, 4]. Each row of the tensor specifies
        the coordinates of a box.

    box_indices : tvm.Tensor, optional
        A 1-D tensor of shape [num_boxes], box_indices[i] specifies the data that
        the i-th box refers to.

    extrapolation_value: float, optional
        Value used for extrapolation, when applicable.

    layout: string, optional
        "NCHW", "NHWC", or "NCHWc".

    coordinate_transformation_mode: string, optional
        Describes how to transform the coordinate in the resized tensor
        to the coordinate in the original tensor.
        Refer to the ONNX Resize operator specification for details.
        Available options are "half_pixel", "align_corners" and "asymmetric".

    out_dtype: string, optional
        Type to return. If left None will be same as input type.

    Returns
    -------
    output : out_dtype
        The computed result with type out_dtype
    """
    def _cast_output(value, data_dtype="float32", out_dtype=None):
        if out_dtype:
            dtype = out_dtype
        else:
            dtype = data_dtype
        return value.astype(dtype)

    def _lerp(A, B, t):
        return A * (1.0 - t) + B * t

    n, c, y, x, cc, inum, ic = get_2d_indices(indices, layout=layout)
    box_idx = box_indices(n) if box_indices is not None else n

    if boxes is not None:
        y1, x1 = boxes(n, 0), boxes(n, 1)
        y2, x2 = boxes(n, 2), boxes(n, 3)

        in_h = (image_height - 1) * (y2 - y1)
        in_w = (image_width - 1) * (x2 - x1)
        h_scale = in_h.astype('float') / (target_height - 1)
        w_scale = in_w.astype('float') / (target_width - 1)

        in_y = y1 * (image_height - 1) + h_scale * y
        in_x = x1 * (image_width - 1) + w_scale * x
    else:
        if coordinate_transformation_mode == "align_corners":
            h_scale = (image_height - 1).astype('float') / (target_height - 1)
            w_scale = (image_width - 1).astype('float') / (target_width - 1)
        elif coordinate_transformation_mode in ["asymmetric", "half_pixel"]:
            h_scale = image_height.astype('float') / target_height
            w_scale = image_width.astype('float') / target_width
        else:
            raise ValueError(
                "Unsupported coordinate_transformation_mode: {}".format(
                    coordinate_transformation_mode))

        if coordinate_transformation_mode == "half_pixel":
            in_y = h_scale * (y + 0.5) - 0.5
            in_x = w_scale * (x + 0.5) - 0.5
        else:
            in_y = h_scale * y
            in_x = w_scale * x

    top_y_index = tvm.floor(in_y).astype('int32')
    bottom_y_index = tvm.ceil(in_y).astype('int32')
    y_lerp = in_y - top_y_index

    left_x_index = tvm.floor(in_x).astype('int32')
    right_x_index = tvm.ceil(in_x).astype('int32')
    x_lerp = in_x - left_x_index

    top_left = get_2d_pixel(data, layout, boxes, image_height, image_width,
                            box_idx, c, top_y_index, left_x_index, cc, inum,
                            ic)
    top_right = get_2d_pixel(data, layout, boxes, image_height, image_width,
                             box_idx, c, top_y_index, right_x_index, cc, inum,
                             ic)
    bottom_left = get_2d_pixel(data, layout, boxes, image_height, image_width,
                               box_idx, c, bottom_y_index, left_x_index, cc,
                               inum, ic)
    bottom_right = get_2d_pixel(data, layout, boxes, image_height, image_width,
                                box_idx, c, bottom_y_index, right_x_index, cc,
                                inum, ic)

    top = _lerp(top_left, top_right, x_lerp)
    bottom = _lerp(bottom_left, bottom_right, x_lerp)
    value = _lerp(top, bottom, y_lerp)

    # use extrapolation_value if in_y/in_x is out of boundary
    if extrapolation_value is not None:
        out = tvm.if_then_else(
            in_y < 0, extrapolation_value,
            tvm.if_then_else(in_y > image_height - 1, extrapolation_value,
                             value))
        value = tvm.if_then_else(
            in_x < 0, extrapolation_value,
            tvm.if_then_else(in_x > image_width - 1, extrapolation_value, out))
    return _cast_output(value, data.dtype, out_dtype=out_dtype)