def _bilinear(*indices): n, c, y, x, cc = _get_indices(*indices) if coordinate_transformation_mode == "half_pixel": in_y = y_ratio * (y + 0.5) - 0.5 in_x = x_ratio * (x + 0.5) - 0.5 else: in_y = y_ratio * y in_x = x_ratio * x xint = tvm.floor(in_x).astype('int32') xfract = in_x - tvm.floor(in_x) yint = tvm.floor(in_y).astype('int32') yfract = in_y - tvm.floor(in_y) p00 = _get_pixel(n, c, yint, xint, cc) p10 = _get_pixel(n, c, yint, xint + 1, cc) p01 = _get_pixel(n, c, yint + 1, xint, cc) p11 = _get_pixel(n, c, yint + 1, xint + 1, cc) col0 = _lerp(p00, p10, xfract) col1 = _lerp(p01, p11, xfract) value = _lerp(col0, col1, yfract) return _cast_output(value)
def _nearest_neighbor(*indices): n, c, y, x, cc = _get_indices(*indices) in_y = y_ratio * y in_x = x_ratio * x if align_corners: yint = tvm.round(in_y).astype('int32') xint = tvm.round(in_x).astype('int32') else: # Add epsilon to floor to prevent gpu rounding errors. epsilon = 1e-5 yint = tvm.floor(in_y + epsilon).astype('int32') xint = tvm.floor(in_x + epsilon).astype('int32') return _cast_output(_get_pixel(n, c, yint, xint, cc))
def _bicubic(*indices): n, c, y, x, cc = _get_indices(*indices) if coordinate_transformation_mode == "half_pixel": in_y = y_ratio * (y + 0.5) - 0.5 in_x = x_ratio * (x + 0.5) - 0.5 else: in_y = y_ratio * y in_x = x_ratio * x xint = tvm.floor(in_x).astype('int32') xfract = in_x - tvm.floor(in_x) yint = tvm.floor(in_y).astype('int32') yfract = in_y - tvm.floor(in_y) # 1st row p00 = _get_pixel(n, c, yint - 1, xint - 1, cc) p10 = _get_pixel(n, c, yint - 1, xint + 0, cc) p20 = _get_pixel(n, c, yint - 1, xint + 1, cc) p30 = _get_pixel(n, c, yint - 1, xint + 2, cc) # 2nd row p01 = _get_pixel(n, c, yint + 0, xint - 1, cc) p11 = _get_pixel(n, c, yint + 0, xint + 0, cc) p21 = _get_pixel(n, c, yint + 0, xint + 1, cc) p31 = _get_pixel(n, c, yint + 0, xint + 2, cc) # 3rd row p02 = _get_pixel(n, c, yint + 1, xint - 1, cc) p12 = _get_pixel(n, c, yint + 1, xint + 0, cc) p22 = _get_pixel(n, c, yint + 1, xint + 1, cc) p32 = _get_pixel(n, c, yint + 1, xint + 2, cc) # 4th row p03 = _get_pixel(n, c, yint + 2, xint - 1, cc) p13 = _get_pixel(n, c, yint + 2, xint + 0, cc) p23 = _get_pixel(n, c, yint + 2, xint + 1, cc) p33 = _get_pixel(n, c, yint + 2, xint + 2, cc) # Interpolate bicubically col0 = _cubic_kernel(p00, p10, p20, p30, xfract) col1 = _cubic_kernel(p01, p11, p21, p31, xfract) col2 = _cubic_kernel(p02, p12, p22, p32, xfract) col3 = _cubic_kernel(p03, p13, p23, p33, xfract) value = _cubic_kernel(col0, col1, col2, col3, yfract) return _cast_output(value)
def _bilinear(*indices): n, c, y, x, cc = _get_indices(*indices) in_y = y_ratio * y in_x = x_ratio * x xint = tvm.floor(in_x).astype('int32') xfract = in_x - tvm.floor(in_x) yint = tvm.floor(in_y).astype('int32') yfract = in_y - tvm.floor(in_y) p00 = _get_pixel(n, c, yint, xint, cc) p10 = _get_pixel(n, c, yint, xint + 1, cc) p01 = _get_pixel(n, c, yint + 1, xint, cc) p11 = _get_pixel(n, c, yint + 1, xint + 1, cc) col0 = _lerp(p00, p10, xfract) col1 = _lerp(p01, p11, xfract) value = _lerp(col0, col1, yfract) return _cast_output(value)
def _pool(i, c, ph, pw): roi = rois[i] batch_index = roi[0].astype('int32') roi_start_w, roi_start_h, roi_end_w, roi_end_h = roi[1], roi[2], roi[ 3], roi[4] roi_start_h = tvm.round(roi_start_h * spatial_scale).astype('int32') roi_start_w = tvm.round(roi_start_w * spatial_scale).astype('int32') roi_end_h = tvm.round(roi_end_h * spatial_scale).astype('int32') roi_end_w = tvm.round(roi_end_w * spatial_scale).astype('int32') # force malformed ROIs to be 1x1 roi_h = tvm.max(roi_end_h - roi_start_h + 1, tvm.const(1, 'int32')) roi_w = tvm.max(roi_end_w - roi_start_w + 1, tvm.const(1, 'int32')) bin_h = roi_h.astype(dtype) / pooled_size_h bin_w = roi_w.astype(dtype) / pooled_size_w # use epsilon to prevent floating point precision loss in floor/ceil epsilon = tvm.const(0.00001, dtype) hstart = tvm.floor(ph * bin_h + epsilon).astype('int32') wstart = tvm.floor(pw * bin_w + epsilon).astype('int32') hend = tvm.ceil((ph + 1) * bin_h - epsilon).astype('int32') wend = tvm.ceil((pw + 1) * bin_w - epsilon).astype('int32') hstart = tvm.min(tvm.max(hstart + roi_start_h, 0), height) wstart = tvm.min(tvm.max(wstart + roi_start_w, 0), width) hend = tvm.min(tvm.max(hend + roi_start_h, 0), height) wend = tvm.min(tvm.max(wend + roi_start_w, 0), width) non_empty = tvm.all(hstart < hend, wstart < wend) min_value = lambda dtype: tvm.if_then_else( non_empty, tvm.min_value(dtype), tvm.const(0.0, dtype)) # pylint: disable=unnecessary-lambda _max = tvm.comm_reducer(lambda x, y: tvm.make._OpMax(x, y), min_value, name='max') rh = tvm.reduce_axis((0, hend - hstart), 'rh') rw = tvm.reduce_axis((0, wend - wstart), 'rw') return _max(data[batch_index, c, hstart + rh, wstart + rw], axis=[rh, rw])
def _nearest_neighbor(*indices): n, c, z, y, x, cc = _get_indices(*indices) in_z = z_ratio * z in_y = y_ratio * y in_x = x_ratio * x if coordinate_transformation_mode == "align_corners": zint = tvm.round(in_z).astype('int32') yint = tvm.round(in_y).astype('int32') xint = tvm.round(in_x).astype('int32') elif coordinate_transformation_mode in ["asymmetric", "half_pixel"]: # Add epsilon to floor to prevent gpu rounding errors. epsilon = 1e-5 zint = tvm.floor(in_z + epsilon).astype('int32') yint = tvm.floor(in_y + epsilon).astype('int32') xint = tvm.floor(in_x + epsilon).astype('int32') else: raise ValueError("Unsupported coordinate_transformation_mode: {}".format( coordinate_transformation_mode)) return _cast_output(_get_pixel(n, c, zint, yint, xint, cc))
def floor(x): """Take floor of input x. Parameters ---------- x : tvm.Tensor Input argument. Returns ------- y : tvm.Tensor The result. """ return tvm.compute(x.shape, lambda *i: tvm.floor(x(*i)))
def resize_bicubic(indices, data, image_height, image_width, target_height, target_width, boxes=None, box_indices=None, extrapolation_value=None, layout='NCHW', coordinate_transformation_mode="align_corners", out_dtype=None): """Perform resize operation with bicubic method on the data. More details about Bicubic interpolation please refer to https://en.wikipedia.org/wiki/Bicubic_interpolation. Parameters ---------- indices : tuple The indices of input data data : tvm.Tensor inputs is a 4-D tensor with shape [batch, channel, in_height, in_width] or [batch, in_height, in_width, channel] image_height : integer Input image height image_width : integer Input image width target_height : integer The target resized image height target_width : integer The target resized image width boxes : tvm.Tensor, optional A 2-D tensor of shape [num_boxes, 4]. Each row of the tensor specifies the coordinates of a box. box_indices : tvm.Tensor, optional A 1-D tensor of shape [num_boxes], box_indices[i] specifies the data that the i-th box refers to. extrapolation_value: float, optional Value used for extrapolation, when applicable. layout: string, optional "NCHW", "NHWC", or "NCHWc". coordinate_transformation_mode: string, optional Describes how to transform the coordinate in the resized tensor to the coordinate in the original tensor. Refer to the ONNX Resize operator specification for details. Available options are "half_pixel", "align_corners" and "asymmetric". out_dtype: string, optional Type to return. If left None will be same as input type. Returns ------- output : out_dtype The computed result with type out_dtype """ def _cubic_kernel(A, B, C, D, t): a = -A / 2.0 + (3.0 * B) / 2.0 - (3.0 * C) / 2.0 + D / 2.0 b = A - (5.0 * B) / 2.0 + 2.0 * C - D / 2.0 c = -A / 2.0 + C / 2.0 d = B return a * t * t * t + b * t * t + c * t + d def _cast_output(value, data_dtype="float32", out_dtype=None): if out_dtype: dtype = out_dtype else: dtype = data_dtype return value.astype(dtype) def _get_indices(indices, layout='NCHW'): if layout == 'NHWC': n, y, x, c = indices cc = None elif layout == 'NCHW': n, c, y, x = indices cc = None else: n, c, y, x, cc = indices return n, c, y, x, cc def _get_pixel(data, layout, n, c, y, x, cc): if boxes is None: y = tvm.max(tvm.min(y, image_height - 1), 0) x = tvm.max(tvm.min(x, image_width - 1), 0) if layout == 'NHWC': return data(n, y, x, c).astype('float') if layout == 'NCHW': return data(n, c, y, x).astype('float') # else must be NCHWxc return data(n, c, y, x, cc).astype('float') n, c, y, x, cc = _get_indices(indices, layout) box_idx = box_indices(n) if box_indices is not None else n if boxes is not None: y1, x1 = boxes(n, 0), boxes(n, 1) y2, x2 = boxes(n, 2), boxes(n, 3) in_h = (image_height - 1) * (y2 - y1) in_w = (image_width - 1) * (x2 - x1) h_scale = in_h.astype('float') / (target_height - 1) w_scale = in_w.astype('float') / (target_width - 1) in_y = y1 * (image_height - 1) + h_scale * y in_x = x1 * (image_width - 1) + w_scale * x else: if coordinate_transformation_mode == "align_corners": h_scale = (image_height - 1).astype('float') / (target_height - 1) w_scale = (image_width - 1).astype('float') / (target_width - 1) elif coordinate_transformation_mode in ["asymmetric", "half_pixel"]: h_scale = image_height.astype('float') / target_height w_scale = image_width.astype('float') / target_width else: raise ValueError("Unsupported coordinate_transformation_mode: {}".format( coordinate_transformation_mode)) if coordinate_transformation_mode == "half_pixel": in_y = h_scale * (y + 0.5) - 0.5 in_x = w_scale * (x + 0.5) - 0.5 else: in_y = h_scale * y in_x = w_scale * x xint = tvm.floor(in_x).astype('int32') xfract = in_x - tvm.floor(in_x) yint = tvm.floor(in_y).astype('int32') yfract = in_y - tvm.floor(in_y) # 1st row p00 = _get_pixel(data, layout, box_idx, c, yint - 1, xint - 1, cc) p10 = _get_pixel(data, layout, box_idx, c, yint - 1, xint + 0, cc) p20 = _get_pixel(data, layout, box_idx, c, yint - 1, xint + 1, cc) p30 = _get_pixel(data, layout, box_idx, c, yint - 1, xint + 2, cc) # 2nd row p01 = _get_pixel(data, layout, box_idx, c, yint + 0, xint - 1, cc) p11 = _get_pixel(data, layout, box_idx, c, yint + 0, xint + 0, cc) p21 = _get_pixel(data, layout, box_idx, c, yint + 0, xint + 1, cc) p31 = _get_pixel(data, layout, box_idx, c, yint + 0, xint + 2, cc) # 3rd row p02 = _get_pixel(data, layout, box_idx, c, yint + 1, xint - 1, cc) p12 = _get_pixel(data, layout, box_idx, c, yint + 1, xint + 0, cc) p22 = _get_pixel(data, layout, box_idx, c, yint + 1, xint + 1, cc) p32 = _get_pixel(data, layout, box_idx, c, yint + 1, xint + 2, cc) # 4th row p03 = _get_pixel(data, layout, box_idx, c, yint + 2, xint - 1, cc) p13 = _get_pixel(data, layout, box_idx, c, yint + 2, xint + 0, cc) p23 = _get_pixel(data, layout, box_idx, c, yint + 2, xint + 1, cc) p33 = _get_pixel(data, layout, box_idx, c, yint + 2, xint + 2, cc) # Interpolate bicubically col0 = _cubic_kernel(p00, p10, p20, p30, xfract) col1 = _cubic_kernel(p01, p11, p21, p31, xfract) col2 = _cubic_kernel(p02, p12, p22, p32, xfract) col3 = _cubic_kernel(p03, p13, p23, p33, xfract) value = _cubic_kernel(col0, col1, col2, col3, yfract) # use extrapolation_value if in_y/in_x is out of boundary if extrapolation_value is not None: out = tvm.if_then_else(in_y < 0, extrapolation_value, tvm.if_then_else(in_y > image_height - 1, extrapolation_value, value)) value = tvm.if_then_else(in_x < 0, extrapolation_value, tvm.if_then_else(in_x > image_width - 1, extrapolation_value, out)) return _cast_output(value, data.dtype, out_dtype=out_dtype)
def resize_nearest_neighbor(indices, data, image_height, image_width, target_height, target_width, boxes=None, box_indices=None, extrapolation_value=None, layout='NCHW', coordinate_transformation_mode="align_corners", out_dtype=None): """Perform resize operation with nearest neighbor method on the data. For details about Nearest-neighbor interpolation please refer to https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation. Parameters ---------- indices : tuple The indices of input data data : tvm.Tensor inputs is a 4-D tensor with shape [batch, channel, in_height, in_width] or [batch, in_height, in_width, channel] image_height : integer Input image height image_width : integer Input image width target_height : integer The target resized image height target_width : integer The target resized image width boxes : tvm.Tensor, optional A 2-D tensor of shape [num_boxes, 4]. Each row of the tensor specifies the coordinates of a box. box_indices : tvm.Tensor, optional A 1-D tensor of shape [num_boxes], box_indices[i] specifies the data that the i-th box refers to. extrapolation_value: float, optional Value used for extrapolation, when applicable. layout: string, optional "NCHW", "NHWC", or "NCHWc". coordinate_transformation_mode: string, optional Describes how to transform the coordinate in the resized tensor to the coordinate in the original tensor. Refer to the ONNX Resize operator specification for details. Available options are "half_pixel", "align_corners" and "asymmetric". out_dtype: string, optional Type to return. If left None will be same as input type. Returns ------- output : out_dtype The computed result with type out_dtype """ def _cast_output(value, data_dtype="float32", out_dtype=None): if out_dtype: dtype = out_dtype else: dtype = data_dtype return value.astype(dtype) def _get_indices(indices, layout='NCHW'): if layout == 'NHWC': n, y, x, c = indices cc = None elif layout == 'NCHW': n, c, y, x = indices cc = None else: n, c, y, x, cc = indices return n, c, y, x, cc def _get_pixel(data, layout, n, c, y, x, cc): if boxes is None: y = tvm.max(tvm.min(y, image_height - 1), 0) x = tvm.max(tvm.min(x, image_width - 1), 0) if layout == 'NHWC': return data(n, y, x, c).astype('float') if layout == 'NCHW': return data(n, c, y, x).astype('float') # else must be NCHWxc return data(n, c, y, x, cc).astype('float') n, c, y, x, cc = _get_indices(indices, layout) box_idx = box_indices(n) if box_indices is not None else n if boxes is not None: y1, x1 = boxes(n, 0), boxes(n, 1) y2, x2 = boxes(n, 2), boxes(n, 3) in_h = (image_height - 1) * (y2 - y1) in_w = (image_width - 1) * (x2 - x1) h_scale = in_h.astype('float') / (target_height - 1) w_scale = in_w.astype('float') / (target_width - 1) in_y = y1 * (image_height - 1) + h_scale * y in_x = x1 * (image_width - 1) + w_scale * x else: if coordinate_transformation_mode == "align_corners": h_scale = (image_height - 1).astype('float') / (target_height - 1) w_scale = (image_width - 1).astype('float') / (target_width - 1) elif coordinate_transformation_mode in ["asymmetric", "half_pixel"]: h_scale = image_height.astype('float') / target_height w_scale = image_width.astype('float') / target_width else: raise ValueError("Unsupported coordinate_transformation_mode: {}".format( coordinate_transformation_mode)) in_y = h_scale * y in_x = w_scale * x if coordinate_transformation_mode == "align_corners" or boxes is not None: closest_x_index = tvm.round(in_x).astype("int32") closest_y_index = tvm.round(in_y).astype("int32") else: # Add epsilon to floor to prevent gpu rounding errors. epsilon = 1e-5 closest_y_index = tvm.floor(in_y + epsilon).astype('int32') closest_x_index = tvm.floor(in_x + epsilon).astype('int32') value = _get_pixel(data, layout, box_idx, c, closest_y_index, closest_x_index, cc) if extrapolation_value is not None: out = tvm.if_then_else(in_y < 0, extrapolation_value, tvm.if_then_else(in_y > image_height - 1, extrapolation_value, value)) # use extrapolation_value if in_x is out of boundary value = tvm.if_then_else(in_x < 0, extrapolation_value, tvm.if_then_else(in_x > image_width - 1, extrapolation_value, out)) return _cast_output(value, data.dtype, out_dtype=out_dtype)
def sort_ir(data, index, output, axis, is_descend): """Low level IR to do sorting on the GPU, same usage as tvm.contrib.sort.argsort on the CPU. Parameters ---------- data: Buffer 2D Buffer of input boxes' score with shape [batch_size, num_anchors]. index : Buffer Buffer of number of valid number of boxes. output : Buffer Output buffer of indicies of sorted tensor. axis : int The axis used for sorting. is_descend : bool If the sorted data is in descending order. Returns ------- stmt : Stmt The result IR statement. """ max_threads = int( tvm.target.current_target(allow_none=False).max_num_threads) tx = tvm.thread_axis("threadIdx.x") bx = tvm.thread_axis("blockIdx.x") ib = tvm.ir_builder.create() p_data = ib.buffer_ptr(data) p_index = ib.buffer_ptr(index) p_out = ib.buffer_ptr(output) ndim = len(data.shape) assert data.dtype == "float32", "Currently only supports input dtype to be float32" assert axis < ndim, "Axis out of boundary for input ndim %d" % ndim axis_mul_before = 1 axis_mul_after = 1 if axis < 0: axis = ndim + axis for i in range(0, ndim): if i < axis: axis_mul_before *= data.shape[i] elif i > axis: axis_mul_after *= data.shape[i] dshape = 0 for i in range(0, len(index.shape)): dshape += index.shape[i] dshape = tvm.select(dshape > axis_mul_before * axis_mul_after, dshape, axis_mul_before * axis_mul_after) sizes_temp = ib.allocate("int32", dshape, name="sizes_temp", scope="global") sizes = ib.allocate("int32", dshape, name="sizes", scope="global") temp_index = ib.allocate("int32", dshape, name="temp_index", scope="local") temp_data = ib.allocate("float32", dshape, name="temp_data", scope="local") data_new = ib.allocate("float32", dshape, name="data_new", scope="global") index_new = ib.allocate("int32", dshape, name="index_new", scope="global") nthread_tx = max_threads nthread_bx = dshape // max_threads + 1 ib.scope_attr(tx, "thread_extent", nthread_tx) ib.scope_attr(bx, "thread_extent", nthread_bx) tid = bx * max_threads + tx with ib.if_scope(tid < axis_mul_before * axis_mul_after): sizes[tid] = p_index[tid] sizes_temp[tid] = p_index[tid] with ib.if_scope(tid < axis_mul_before * axis_mul_after): with ib.for_range(0, tvm.floor(tvm.sqrt((axis_mul_before * axis_mul_after) \ .astype("float32"))) + 1, name="k") as k: with ib.if_scope(tid - (tvm.const(1, "int32") << k) >= 0): with ib.if_scope(k % 2 == 0): sizes[tid] += sizes_temp[tid - (tvm.const(1, "int32") << k)] sizes_temp[tid] = sizes[tid] with ib.else_scope(): sizes_temp[tid] += sizes[tid - (tvm.const(1, "int32") << k)] sizes[tid] = sizes_temp[tid] with ib.if_scope(tid < axis_mul_before * axis_mul_after): i = tid / axis_mul_after j = tid % axis_mul_after current_sort_num = p_index[tid] base_idx = i * data.shape[axis] * axis_mul_after + j with ib.for_range(0, current_sort_num, name="k") as k: full_idx = base_idx + k * axis_mul_after with ib.if_scope(tid == 0): start = 0 with ib.else_scope(): start = sizes[tid - 1] index_new[start + k] = k data_new[start + k] = p_data[full_idx] with ib.if_scope(tid < axis_mul_before * axis_mul_after): with ib.if_scope(tid == 0): start = 0 with ib.else_scope(): start = sizes[tid - 1] # OddEvenTransposeSort with ib.for_range(0, p_index[tid], name="k") as k: with ib.for_range(0, p_index[tid] - 1, name="i") as i: with ib.if_scope(i % 2 == (k & 1)): with ib.if_scope( ((data_new[i + start] < data_new[i + start + 1]) ^ is_descend) == False): temp_data[tid] = data_new[i + start] data_new[i + start] = data_new[i + start + 1] data_new[i + start + 1] = temp_data[tid] temp_index[tid] = index_new[i + start] index_new[i + start] = index_new[i + start + 1] index_new[i + start + 1] = temp_index[tid] with ib.if_scope(tid < axis_mul_before * axis_mul_after): i = tid / axis_mul_after j = tid % axis_mul_after current_sort_num = p_index[tid] base_idx = i * data.shape[axis] * axis_mul_after + j with ib.for_range(0, data.shape[axis], name="k") as k: with ib.if_scope(tid == 0): start = 0 with ib.else_scope(): start = sizes[tid - 1] p_out[base_idx + k * axis_mul_after] = tvm.select( k < current_sort_num, index_new[k + start], k) body = ib.get() return body
def resize_bilinear(indices, data, image_height, image_width, target_height, target_width, boxes=None, box_indices=None, extrapolation_value=None, layout='NCHW', coordinate_transformation_mode="align_corners", out_dtype=None): """Perform resize operation with bilinear method on the data. For details about Bilinear interpolation please refer to https://en.wikipedia.org/wiki/Bilinear_interpolation. Parameters ---------- indices : tuple The indices of input data data : tvm.Tensor inputs is a 4-D tensor with shape [batch, channel, in_height, in_width] or [batch, in_height, in_width, channel] image_height : integer Input image height image_width : integer Input image width target_height : integer The target resized image height target_width : integer The target resized image width boxes : tvm.Tensor, optional A 2-D tensor of shape [num_boxes, 4]. Each row of the tensor specifies the coordinates of a box. box_indices : tvm.Tensor, optional A 1-D tensor of shape [num_boxes], box_indices[i] specifies the data that the i-th box refers to. extrapolation_value: float, optional Value used for extrapolation, when applicable. layout: string, optional "NCHW", "NHWC", or "NCHWc". coordinate_transformation_mode: string, optional Describes how to transform the coordinate in the resized tensor to the coordinate in the original tensor. Refer to the ONNX Resize operator specification for details. Available options are "half_pixel", "align_corners" and "asymmetric". out_dtype: string, optional Type to return. If left None will be same as input type. Returns ------- output : out_dtype The computed result with type out_dtype """ def _cast_output(value, data_dtype="float32", out_dtype=None): if out_dtype: dtype = out_dtype else: dtype = data_dtype return value.astype(dtype) def _lerp(A, B, t): return A * (1.0 - t) + B * t n, c, y, x, cc, inum, ic = get_2d_indices(indices, layout=layout) box_idx = box_indices(n) if box_indices is not None else n if boxes is not None: y1, x1 = boxes(n, 0), boxes(n, 1) y2, x2 = boxes(n, 2), boxes(n, 3) in_h = (image_height - 1) * (y2 - y1) in_w = (image_width - 1) * (x2 - x1) h_scale = in_h.astype('float') / (target_height - 1) w_scale = in_w.astype('float') / (target_width - 1) in_y = y1 * (image_height - 1) + h_scale * y in_x = x1 * (image_width - 1) + w_scale * x else: if coordinate_transformation_mode == "align_corners": h_scale = (image_height - 1).astype('float') / (target_height - 1) w_scale = (image_width - 1).astype('float') / (target_width - 1) elif coordinate_transformation_mode in ["asymmetric", "half_pixel"]: h_scale = image_height.astype('float') / target_height w_scale = image_width.astype('float') / target_width else: raise ValueError( "Unsupported coordinate_transformation_mode: {}".format( coordinate_transformation_mode)) if coordinate_transformation_mode == "half_pixel": in_y = h_scale * (y + 0.5) - 0.5 in_x = w_scale * (x + 0.5) - 0.5 else: in_y = h_scale * y in_x = w_scale * x top_y_index = tvm.floor(in_y).astype('int32') bottom_y_index = tvm.ceil(in_y).astype('int32') y_lerp = in_y - top_y_index left_x_index = tvm.floor(in_x).astype('int32') right_x_index = tvm.ceil(in_x).astype('int32') x_lerp = in_x - left_x_index top_left = get_2d_pixel(data, layout, boxes, image_height, image_width, box_idx, c, top_y_index, left_x_index, cc, inum, ic) top_right = get_2d_pixel(data, layout, boxes, image_height, image_width, box_idx, c, top_y_index, right_x_index, cc, inum, ic) bottom_left = get_2d_pixel(data, layout, boxes, image_height, image_width, box_idx, c, bottom_y_index, left_x_index, cc, inum, ic) bottom_right = get_2d_pixel(data, layout, boxes, image_height, image_width, box_idx, c, bottom_y_index, right_x_index, cc, inum, ic) top = _lerp(top_left, top_right, x_lerp) bottom = _lerp(bottom_left, bottom_right, x_lerp) value = _lerp(top, bottom, y_lerp) # use extrapolation_value if in_y/in_x is out of boundary if extrapolation_value is not None: out = tvm.if_then_else( in_y < 0, extrapolation_value, tvm.if_then_else(in_y > image_height - 1, extrapolation_value, value)) value = tvm.if_then_else( in_x < 0, extrapolation_value, tvm.if_then_else(in_x > image_width - 1, extrapolation_value, out)) return _cast_output(value, data.dtype, out_dtype=out_dtype)