def compute_conv2d_transpose(attrs, inputs, _): """Compute definition of conv2d_transpose""" padding = attrs.get_int_tuple("padding") strides = attrs.get_int_tuple("strides") dilation = attrs.get_int_tuple("dilation") groups = attrs.get_int("groups") out_dtype = attrs.get_string("out_dtype") layout = attrs["layout"] out_dtype = inputs[0].dtype if out_dtype == "same" else out_dtype assert layout == "NCHW", "only support nchw for now" assert dilation == (1, 1), "not support dilate now" assert groups == 1, "only support groups == 1 for now" with tvm.target.create(attrs.get_string("target")): out = topi.nn.conv2d_transpose_nchw(inputs[0], inputs[1], strides, padding, out_dtype) if attrs.get_bool("use_bias"): bias = inputs[2] bias = topi.expand_dims(bias, axis=1, num_newaxis=2) out = topi.add(out, bias) output_padding = attrs.get_int_tuple("output_padding") out = topi.nn.pad(out, \ [0, 0, 0, 0], [0, 0, output_padding[0], output_padding[1]]) return out
def compute_contrib_conv2d_NCHWc(attrs, inputs, _): """Compute definition of conv2d NCHWc""" padding = attrs.get_int_tuple("padding") strides = attrs.get_int_tuple("strides") dilation = attrs.get_int_tuple("dilation") out_channel = attrs.get_int("channels") groups = attrs.get_int("groups") layout = attrs.get_str("layout") out_layout = attrs.get_str("out_layout") out_dtype = attrs.get_str("out_dtype") out_dtype = inputs[0].dtype if out_dtype == "same" else out_dtype if layout == "NCHW": _, in_channel, _, _ = get_const_tuple(inputs[0].shape) else: _, in_channel_chunk, _, _, in_channel_block = get_const_tuple(inputs[0].shape) in_channel = in_channel_chunk * in_channel_block assert dilation == (1, 1), "not support dilate now" if groups == 1: # pylint: disable=assignment-from-no-return out = topi.nn.conv2d_NCHWc(inputs[0], inputs[1], strides, padding, dilation, layout, out_layout, out_dtype) # pylint: enable=assignment-from-no-return elif groups == in_channel and groups == out_channel: # pylint: disable=assignment-from-no-return out = topi.nn.depthwise_conv2d_NCHWc(inputs[0], inputs[1], strides, padding, dilation, layout, out_layout, out_dtype) # pylint: enable=assignment-from-no-return else: raise ValueError("not support arbitrary group number > 1 for now") if attrs.get_bool("use_bias"): bias = inputs[2] bias = topi.expand_dims(bias, axis=1, num_newaxis=2) out = topi.add(out, bias) return out
def verify_expand_dims(in_shape, out_shape, axis, num_newaxis): A = tvm.placeholder(shape=in_shape, name="A") B = topi.expand_dims(A, axis, num_newaxis) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_broadcast(B) foo = tvm.build(s, [A, B], device, name="expand_dims") data_npy = np.random.uniform(size=in_shape).astype(A.dtype) out_npy = data_npy.reshape(out_shape) data_nd = tvm.nd.array(data_npy, ctx) out_nd = tvm.nd.array(np.empty(out_shape).astype(B.dtype), ctx) foo(data_nd, out_nd) np.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in [ "llvm", "nvptx", "cuda", "opencl", "metal", "rocm", "vulkan", "sdaccel" ]: check_device(device)
def compute_conv2d(attrs, inputs, _): """Compute definition of conv2d""" padding = attrs.get_int_tuple("padding") strides = attrs.get_int_tuple("strides") dilation = attrs.get_int_tuple("dilation") groups = attrs.get_int("groups") channels = attrs.get_int("channels") layout = attrs["layout"] assert layout == "NCHW" or layout == "NHWC" (dilation_h, dilation_w) = dilation if dilation_h < 1 or dilation_w < 1: raise ValueError("dilation should be positive value") elif dilation == (1, 1): kernel = inputs[1] elif layout == "NCHW": kernel = topi.nn.dilate(inputs[1], [1, 1, dilation_h, dilation_w]) else: #layout == NHWC kernel = topi.nn.dilate(inputs[1], [1, dilation_h, dilation_w, 1]) if groups == 1: out = topi.nn.conv2d(inputs[0], kernel, strides, padding, layout) elif groups == get_const_int(inputs[0].shape[1]) and groups == channels: out = topi.nn.depthwise_conv2d_nchw(inputs[0], kernel, strides, padding) else: raise ValueError("not support arbitrary group number for now") if attrs.get_bool("use_bias"): bias = inputs[2] expand_axis = 1 if layout == "NCHW" else 0 bias = topi.expand_dims(bias, axis=expand_axis, num_newaxis=2) out = topi.broadcast_add(out, bias) return out
def compute_contrib_conv2d_winograd_without_weight_transform(attrs, inputs, _): """Compute definition of conv2d NCHWc""" padding = attrs.get_int_tuple("padding") strides = attrs.get_int_tuple("strides") dilation = attrs.get_int_tuple("dilation") groups = attrs.get_int("groups") layout = attrs.get_string("layout") out_dtype = attrs.get_string("out_dtype") target = attrs.get_string("target") tile_size = attrs.get_int("tile_size") out_dtype = inputs[0].dtype if out_dtype == "same" else out_dtype assert dilation == (1, 1), "Do not support dilate now" assert groups == 1, "Do not supoort arbitrary group number" with tvm.target.create(target): # pylint: disable=assignment-from-no-return out = topi.nn.conv2d_winograd_without_weight_transform( inputs[0], inputs[1], strides, padding, layout, out_dtype, tile_size) if attrs.get_bool("use_bias"): bias = inputs[2] bias = topi.expand_dims(bias, axis=1, num_newaxis=2) out = topi.add(out, bias) return out
def compute_contrib_conv2d_NCHWc(attrs, inputs, _): """Compute definition of conv2d NCHWc""" padding = attrs.get_int_tuple("padding") strides = attrs.get_int_tuple("strides") dilation = attrs.get_int_tuple("dilation") kh, kw = attrs.get_int_tuple('kernel_size') groups = attrs.get_int("groups") channels = attrs.get_int("channels") layout = attrs.get_string("layout") out_layout = attrs.get_string("out_layout") assert dilation == (1, 1), "not support dilate now" with tvm.target.create(attrs.get_string("target")): if groups == 1: # pylint: disable=assignment-from-no-return out = topi.nn.conv2d_NCHWc(inputs[0], inputs[1], channels, (kh, kw), strides, padding, layout, out_layout) # pylint: enable=assignment-from-no-return else: raise ValueError("not support arbitrary group number > 1 for now") if attrs.get_bool("use_bias"): bias = inputs[2] bias = topi.expand_dims(bias, axis=1, num_newaxis=2) out = topi.add(out, bias) return out
def compute_conv2d(attrs, inputs, _): """Compute definition of conv2d""" padding = attrs.get_int_tuple("padding") strides = attrs.get_int_tuple("strides") dilation = attrs.get_int_tuple("dilation") groups = attrs.get_int("groups") channels = attrs.get_int("channels") layout = attrs["layout"] kernel_layout = attrs["kernel_layout"] out_dtype = attrs["out_dtype"] out_dtype = inputs[0].dtype if out_dtype == "same" else out_dtype assert layout in ["NCHW", "NHWC", "NCHW4c"] (dilation_h, dilation_w) = dilation if dilation_h < 1 or dilation_w < 1: raise ValueError("dilation should be positive value") elif layout == "NCHW4c" and (dilation_h > 1 or dilation_w > 1): raise ValueError("not support dilate now") elif dilation == (1, 1): kernel = inputs[1] elif layout == "NCHW": kernel = topi.nn.dilate(inputs[1], [1, 1, dilation_h, dilation_w]) else: #layout == NHWC kernel = topi.nn.dilate(inputs[1], [1, dilation_h, dilation_w, 1]) with tvm.target.create(attrs.get_string("target")): if groups == 1: out = topi.nn.conv2d(inputs[0], kernel, strides, padding, layout, out_dtype=out_dtype) elif layout == "NCHW" and \ groups == get_const_int(inputs[0].shape[1]) and \ groups == channels: out = topi.nn.depthwise_conv2d_nchw(inputs[0], kernel, strides, padding, out_dtype=out_dtype) elif layout == "NHWC" and \ kernel_layout == "HWOI" and \ groups == get_const_int(inputs[0].shape[3]) and \ groups == channels: out = topi.nn.depthwise_conv2d_nhwc(inputs[0], kernel, strides, padding, out_dtype=out_dtype) else: raise ValueError("not support arbitrary group number for now") if attrs.get_bool("use_bias"): bias = inputs[2] expand_axis = 1 if layout == "NCHW" else 0 bias = topi.expand_dims(bias, axis=expand_axis, num_newaxis=2) out = topi.add(out, bias) return out
def compute_bias_add(attrs, inputs, out_dtype, target): """Compute definition of conv2d_transpose""" axis = attrs.axis bias = inputs[1] data_ndim = len(inputs[0].shape) if axis < 0: axis = axis + data_ndim num_newaxis = data_ndim - axis - 1 if num_newaxis: bias = topi.expand_dims(bias, axis=1, num_newaxis=num_newaxis) return [topi.add(inputs[0], bias)]
def compute_conv2d(attrs, inputs, _): """Compute definition of conv2d""" padding = attrs.get_int_tuple("padding") strides = attrs.get_int_tuple("strides") dilation = attrs.get_int_tuple("dilation") groups = attrs.get_int("groups") channels = attrs.get_int("channels") layout = attrs["layout"] kernel_layout = attrs["kernel_layout"] out_dtype = attrs["out_dtype"] out_dtype = inputs[0].dtype if out_dtype == "same" else out_dtype assert layout in ["NCHW", "NHWC", "NCHW4c"] (dilation_h, dilation_w) = dilation if dilation_h < 1 or dilation_w < 1: raise ValueError("dilation should be positive value") with tvm.target.create(attrs.get_str("target")): if groups == 1 and layout == 'NCHW4c' and inputs[0].dtype == 'int8': # pylint: disable=assignment-from-no-return out = topi.nn.conv2d(inputs[0], inputs[1], strides, padding, dilation, layout, out_dtype) # pylint: enable=assignment-from-no-return elif groups == 1: out = topi.nn.conv2d(inputs[0], inputs[1], strides, padding, dilation, layout, out_dtype) elif layout == "NCHW" and \ groups == get_const_int(inputs[0].shape[1]) and \ groups == channels: out = topi.nn.depthwise_conv2d_nchw(inputs[0], inputs[1], strides, padding, dilation, out_dtype) elif layout in ["NCHW", "NCHW4c"]: out = topi.nn.group_conv2d_nchw(inputs[0], inputs[1], strides, padding, dilation, groups, out_dtype) elif layout == "NHWC" and \ kernel_layout == "HWOI" and \ groups == get_const_int(inputs[0].shape[3]) and \ groups == channels: out = topi.nn.depthwise_conv2d_nhwc(inputs[0], inputs[1], strides, padding, dilation, out_dtype) else: raise ValueError("not support arbitrary group number for now") if attrs.get_bool("use_bias"): bias = inputs[2] expand_axis = 1 if layout == "NCHW" else 0 bias = topi.expand_dims(bias, axis=expand_axis, num_newaxis=2) out = topi.add(out, bias) return out
def compute_conv2d(attrs, inputs, _): """Compute definition of conv2d""" padding = attrs.get_int_tuple("padding") strides = attrs.get_int_tuple("strides") dilation = attrs.get_int_tuple("dilation") groups = attrs.get_int("groups") channels = attrs.get_int("channels") layout = attrs["layout"] kernel_layout = attrs["kernel_layout"] out_dtype = attrs["out_dtype"] out_dtype = inputs[0].dtype if out_dtype == "same" else out_dtype assert layout in ["NCHW", "NHWC", "NCHW4c"] (dilation_h, dilation_w) = dilation if dilation_h < 1 or dilation_w < 1: raise ValueError("dilation should be positive value") if groups == 1 and layout == 'NCHW4c' and inputs[0].dtype == 'int8': # pylint: disable=assignment-from-no-return out = topi.nn.conv2d(inputs[0], inputs[1], strides, padding, dilation, layout, out_dtype=out_dtype) # pylint: enable=assignment-from-no-return elif groups == 1: out = topi.nn.conv2d( inputs[0], inputs[1], strides, padding, dilation, layout, out_dtype=out_dtype) elif layout == "NCHW" and \ groups == get_const_int(inputs[0].shape[1]) and \ groups == channels: out = topi.nn.depthwise_conv2d_nchw( inputs[0], inputs[1], strides, padding, dilation, out_dtype=out_dtype) elif layout in ["NCHW", "NCHW4c"]: out = topi.nn.group_conv2d_nchw(inputs[0], inputs[1], strides, padding, dilation, groups, out_dtype=out_dtype) elif layout == "NHWC" and \ kernel_layout == "HWOI" and \ groups == get_const_int(inputs[0].shape[3]) and \ groups == channels: out = topi.nn.depthwise_conv2d_nhwc( inputs[0], inputs[1], strides, padding, dilation, out_dtype=out_dtype) else: raise ValueError("not support arbitrary group number for now") if attrs.get_bool("use_bias"): bias = inputs[2] expand_axis = 1 if layout in ["NCHW", "NCHW4c"] else 0 bias = topi.expand_dims(bias, axis=expand_axis, num_newaxis=2) out = topi.add(out, bias) return out
def compute_contrib_conv2d_NCHWc(attrs, inputs, _): """Compute definition of conv2d NCHWc""" padding = attrs.get_int_tuple("padding") strides = attrs.get_int_tuple("strides") dilation = attrs.get_int_tuple("dilation") kh, kw = attrs.get_int_tuple('kernel_size') groups = attrs.get_int("groups") channels = attrs.get_int("channels") assert dilation == (1, 1), "not support dilate now" if groups == 1: out = topi.nn.conv2d_NCHWc(inputs[0], inputs[1], channels, (kh, kw), strides, padding) else: raise ValueError("not support arbitrary group number > 1 for now") if attrs.get_bool("use_bias"): bias = inputs[2] bias = topi.expand_dims(bias, axis=1, num_newaxis=2) out = topi.broadcast_add(out, bias) return out
def compute_conv2d_transpose(attrs, inputs, _): """Compute definition of conv2d_transpose""" padding = attrs.get_int_tuple("padding") strides = attrs.get_int_tuple("strides") dilation = attrs.get_int_tuple("dilation") groups = attrs.get_int("groups") layout = attrs["layout"] assert layout == "NCHW", "only support nchw for now" assert dilation == (1, 1), "not support dilate now" assert groups == 1, "only support groups == 1 for now" out = topi.nn.conv2d_transpose_nchw(inputs[0], inputs[1], strides, padding) if attrs.get_bool("use_bias"): bias = inputs[2] bias = topi.expand_dims(bias, axis=1, num_newaxis=2) out = topi.broadcast_add(out, bias) output_padding = attrs.get_int_tuple("output_padding") out = topi.nn.pad(out, \ [0, 0, 0, 0], [0, 0, output_padding[0], output_padding[1]]) return out
def compute_conv2d(attrs, inputs, _): """Compute definition of conv2d""" padding = attrs.get_int_tuple("padding") strides = attrs.get_int_tuple("strides") dilation = attrs.get_int_tuple("dilation") groups = attrs.get_int("groups") channels = attrs.get_int("channels") layout = attrs["layout"] assert layout == "NCHW", "only support nchw for now" assert dilation == (1, 1), "not support dilate now" if groups == 1: out = topi.nn.conv2d(inputs[0], inputs[1], strides, padding) elif groups == get_const_int(inputs[0].shape[1]) and groups == channels: out = topi.nn.depthwise_conv2d_nchw(inputs[0], inputs[1], strides, padding) else: raise ValueError("not support arbitrary group number for now") if attrs.get_bool("use_bias"): bias = inputs[2] bias = topi.expand_dims(bias, axis=1, num_newaxis=2) out = topi.broadcast_add(out, bias) return out
def verify_expand_dims(in_shape, out_shape, axis, num_newaxis): A = tvm.placeholder(shape=in_shape, name="A") B = topi.expand_dims(A, axis, num_newaxis) s = topi.cuda.schedule_broadcast(B) def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) foo = tvm.build(s, [A, B], device, name="expand_dims") data_npy = np.random.uniform(size=in_shape).astype(A.dtype) out_npy = data_npy.reshape(out_shape) data_nd = tvm.nd.array(data_npy, ctx) out_nd = tvm.nd.array(np.empty(out_shape).astype(B.dtype), ctx) foo(data_nd, out_nd) np.testing.assert_allclose(out_nd.asnumpy(), out_npy) check_device("opencl") check_device("cuda") check_device("metal")
def verify_expand_dims(in_shape, out_shape, axis, num_newaxis): A = tvm.placeholder(shape=in_shape, name="A") B = topi.expand_dims(A, axis, num_newaxis) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_broadcast(B) foo = tvm.build(s, [A, B], device, name="expand_dims") data_npy = np.random.uniform(size=in_shape).astype(A.dtype) out_npy = data_npy.reshape(out_shape) data_nd = tvm.nd.array(data_npy, ctx) out_nd = tvm.nd.array(np.empty(out_shape).astype(B.dtype), ctx) foo(data_nd, out_nd) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in get_all_backend(): check_device(device)
def verify_expand_dims(in_shape, out_shape, axis, num_newaxis): A = te.placeholder(shape=in_shape, name="A") B = topi.expand_dims(A, axis, num_newaxis) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.testing.get_broadcast_schedule(device)(B) foo = tvm.build(s, [A, B], device, name="expand_dims") data_npy = np.random.uniform(size=in_shape).astype(A.dtype) out_npy = data_npy.reshape(out_shape) data_nd = tvm.nd.array(data_npy, ctx) out_nd = tvm.nd.array(np.empty(out_shape).astype(B.dtype), ctx) foo(data_nd, out_nd) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in get_all_backend(): check_device(device)
def Stack(device="llvm", lib_path="./", ndim=None, dtype=None, input_num=None, axis=None): ''' stack Args: device: lib_path: ndim: dtype: input_num: axis: Returns: ''' if axis > ndim: return shape = [tvm.var("n" + str(i)) for i in range(ndim)] shapes = [shape] * input_num in_tensor = [ tvm.placeholder(shape, dtype=dtype, name='in_tensor%d' % i) for i, shape in enumerate(shapes) ] opname = "Stack_ndim%d_%s_input_num%d_axis%d" % (ndim, dtype, input_num, axis) print(opname) input_tensor = [topi.expand_dims(ai, axis) for ai in in_tensor] out_tensor = topi.concatenate(tuple(input_tensor), axis=axis) tensor_list = in_tensor + [out_tensor] if ndim < 4: s = topi.generic.schedule_concatenate(out_tensor) else: s = tvm.create_schedule(out_tensor.op) Genlib(s, tensor_list, device, opname, lib_path)
def compute_contrib_conv2d_winograd_without_weight_transform(attrs, inputs, _): """Compute definition of conv2d NCHWc""" padding = attrs.get_int_tuple("padding") strides = attrs.get_int_tuple("strides") dilation = attrs.get_int_tuple("dilation") groups = attrs.get_int("groups") layout = attrs.get_str("layout") out_dtype = attrs.get_str("out_dtype") tile_size = attrs.get_int("tile_size") out_dtype = inputs[0].dtype if out_dtype == "same" else out_dtype assert dilation == (1, 1), "Do not support dilate now" assert groups == 1, "Do not supoort arbitrary group number" # pylint: disable=assignment-from-no-return out = topi.nn.conv2d_winograd_without_weight_transform( inputs[0], inputs[1], strides, padding, dilation, layout, out_dtype, tile_size) if attrs.get_bool("use_bias"): bias = inputs[2] bias = topi.expand_dims(bias, axis=1, num_newaxis=2) out = topi.add(out, bias) return out
def ExpandDims(device="llvm", lib_path="./", ndim=None, axis=None, dtype=None): ''' expand dims Args: device: lib_path: ndim: axis: dtype: Returns: ''' if axis > ndim: return shape = [tvm.var("n" + str(i)) for i in range(ndim)] opname = "ExpandDim_ndim%d_%s_axis%d" % (ndim, dtype, axis) print(opname) # define compute in_tensor = tvm.placeholder(shape, dtype=dtype, name='in_tensor') out_tensor = topi.expand_dims(in_tensor, axis=axis) tensor_list = [in_tensor, out_tensor] s = topi.generic.schedule_injective(out_tensor) Genlib(s, tensor_list, device, opname, lib_path)
def compute_expand_dims(attrs, inputs, out_info): """Compute definition of expand_dims""" return topi.expand_dims(inputs[0], attrs.get_int("axis"), num_newaxis=attrs.get_int("num_newaxis"))
def _interpolate(im, im_shape, x, y, out_size, dtype): num_batch = im_shape[0] height = im_shape[1] width = im_shape[2] channels = im_shape[3] out_height = out_size[0] out_width = out_size[1] max_y = int(im_shape[1] - 1) max_x = int(im_shape[2] - 1) #[-1,1] -> [0, width-1] x = topi.multiply(topi.add(x, tvm.const(1, dtype=dtype)), width / tvm.const(2, dtype=dtype)) y = topi.multiply(topi.add(y, tvm.const(1, dtype=dtype)), height / tvm.const(2, dtype=dtype)) # do sampling dim3 = out_height * out_width * num_batch x0 = topi.cast(topi.floor(x), 'int32') y0 = topi.cast(topi.floor(y), 'int32') x1 = topi.add(x0,tvm.const(1, dtype="int32")) y1 = topi.add(y0,tvm.const(1, dtype="int32")) x0 = topi.clip(x0, 0, max_x) x1 = topi.clip(x1, 0, max_x) y0 = topi.clip(y0, 0, max_y) y1 = topi.clip(y1, 0, max_y) dim2 = width dim1 = width * height base = tvm.compute((dim3,),lambda i:(i // (out_height * out_width)) * width * height, name = 'base') base_y0 = topi.add(base, topi.multiply(y0, dim2)) base_y1 = topi.add(base, topi.multiply(y1, dim2)) idx_a = topi.add(base_y0, x0) idx_b = topi.add(base_y1, x0) idx_c = topi.add(base_y0, x1) idx_d = topi.add(base_y1, x1) im_flat = topi.reshape(im, (num_batch * height * width, channels)) im_flat = topi.cast(im_flat, dtype) Ia = tvm.compute((dim3, channels),lambda i,j: im_flat[idx_a[i], j], name = 'Ia') Ib = tvm.compute((dim3, channels),lambda i,j: im_flat[idx_b[i], j], name = 'Ib') Ic = tvm.compute((dim3, channels),lambda i,j: im_flat[idx_c[i], j], name = 'Ic') Id = tvm.compute((dim3, channels),lambda i,j: im_flat[idx_d[i], j], name = 'Id') x0_f = topi.cast(x0, dtype) x1_f = topi.cast(x1, dtype) y0_f = topi.cast(y0, dtype) y1_f = topi.cast(y1, dtype) wa = topi.expand_dims(topi.multiply(topi.subtract(x1_f, x), topi.subtract(y1_f, y)), 1) wb = topi.expand_dims(topi.multiply(topi.subtract(x1_f, x), topi.subtract(y, y0_f)), 1) wc = topi.expand_dims(topi.multiply(topi.subtract(x, x0_f), topi.subtract(y1_f, y)), 1) wd = topi.expand_dims(topi.multiply(topi.subtract(x, x0_f), topi.subtract(y, y0_f)), 1) output = topi.add(topi.add(topi.add(topi.multiply(wa, Ia), topi.multiply(wb, Ib)),topi.multiply(wc, Ic)), topi.multiply(wd, Id)) return output
def _interpolate(im, im_shape, x, y, out_size, dtype): num_batch = im_shape[0] height = im_shape[1] width = im_shape[2] channels = im_shape[3] out_height = out_size[0] out_width = out_size[1] max_y = int(im_shape[1] - 1) max_x = int(im_shape[2] - 1) # [-1,1] -> [0, width-1] x_temp = topi.multiply(topi.add(x, tvm.const(1, dtype=dtype)), width / tvm.const(2, dtype=dtype)) y_temp = topi.multiply(topi.add(y, tvm.const(1, dtype=dtype)), height / tvm.const(2, dtype=dtype)) # do sampling dim3 = out_height * out_width * num_batch x_zero = topi.cast(topi.floor(x_temp), 'int32') y_zero = topi.cast(topi.floor(y_temp), 'int32') x_one = topi.add(x_zero, tvm.const(1, dtype="int32")) y_one = topi.add(y_zero, tvm.const(1, dtype="int32")) x_zero = topi.clip(x_zero, 0, max_x) x_one = topi.clip(x_one, 0, max_x) y_zero = topi.clip(y_zero, 0, max_y) y_one = topi.clip(y_one, 0, max_y) dim2 = width base = tvm.compute((dim3, ), lambda i: (i // (out_height * out_width)) * width * height, name='base') base_y0 = topi.add(base, topi.multiply(y_zero, dim2)) base_y1 = topi.add(base, topi.multiply(y_one, dim2)) idx_a = topi.add(base_y0, x_zero) idx_b = topi.add(base_y1, x_zero) idx_c = topi.add(base_y0, x_one) idx_d = topi.add(base_y1, x_one) im_flat = topi.reshape(im, (num_batch * height * width, channels)) im_flat = topi.cast(im_flat, dtype) i_a = tvm.compute((dim3, channels), lambda i, j: im_flat[idx_a[i], j], name='Ia') i_b = tvm.compute((dim3, channels), lambda i, j: im_flat[idx_b[i], j], name='Ib') i_c = tvm.compute((dim3, channels), lambda i, j: im_flat[idx_c[i], j], name='Ic') i_d = tvm.compute((dim3, channels), lambda i, j: im_flat[idx_d[i], j], name='Id') x0_f = topi.cast(x_zero, dtype) x1_f = topi.cast(x_one, dtype) y0_f = topi.cast(y_zero, dtype) y1_f = topi.cast(y_zero, dtype) w_a = topi.expand_dims( topi.multiply(topi.subtract(x1_f, x_temp), topi.subtract(y1_f, y_temp)), 1) w_b = topi.expand_dims( topi.multiply(topi.subtract(x1_f, x_temp), topi.subtract(y_temp, y0_f)), 1) w_c = topi.expand_dims( topi.multiply(topi.subtract(x_temp, x0_f), topi.subtract(y1_f, y_temp)), 1) w_d = topi.expand_dims( topi.multiply(topi.subtract(x_temp, x0_f), topi.subtract(y_temp, y0_f)), 1) output = topi.add( topi.add( topi.add(topi.multiply(w_a, i_a), topi.multiply(w_b, i_b)), topi.multiply(w_c, i_c)), topi.multiply(w_d, i_d)) return output