def _meshgrid(height, width): y_zero = tvm.compute((height, ), lambda i: -1 + i * 2.0 / (height - 1), name='y0') x_zero = tvm.compute((width, ), lambda i: -1 + i * 2.0 / (width - 1), name='x0') y_temp = tvm.compute((height * width, ), lambda i: y_zero[i // width], name='y') x_temp = tvm.compute((height * width, ), lambda i: x_zero[i % width], name='x') y_temp = topi.reshape(y_temp, (1, height * width)) x_temp = topi.reshape(x_temp, (1, height * width)) ones = tvm.compute((1, height * width), lambda i, j: 1, name='ones') grid = tvm.compute( (3, height * width), lambda i, j: 0.5 * (i - 1) * (i - 2) * x_temp[0, j] + i * (2 - i) * y_temp[0, j] + 0.5 * i * (i - 1) * ones[0, j], name='grid') # grid = topi.concatenate((x,y,ones),0) #can not use topi.concatenate return grid
def _transform(theta, input_dim, out_size, input_shape, dtype): num_batch = input_shape[0] height = input_shape[1] width = input_shape[2] num_channels = input_shape[3] theta = topi.reshape(theta, (num_batch, 2, 3)) theta = topi.cast(theta, dtype) out_height = out_size[0] out_width = out_size[1] grid = _meshgrid(out_height, out_width) grid = topi.reshape(grid, (num_batch, 3, out_height*out_width)) grid = topi.cast(grid, dtype=dtype) k = tvm.reduce_axis((0, 3), 'k') T_g = tvm.compute((num_batch, 2, out_height*out_width),lambda b, y, x: tvm.sum(theta[b, y, k] * grid[b, k, x], axis = k), name = 'T_g') x_s = tvm.compute((num_batch, 1, out_height*out_width), lambda i,j,k:T_g[i,0,k], name = 'x_s') y_s = tvm.compute((num_batch, 1, out_height*out_width), lambda i,j,k:T_g[i,1,k], name = 'y_s') x_s_flat = topi.reshape(x_s, (num_batch*out_height*out_width,)) y_s_flat = topi.reshape(y_s, (num_batch*out_height*out_width,)) input_transformed = _interpolate(input_dim, input_shape, x_s_flat, y_s_flat, out_size, dtype) output = topi.reshape(input_transformed, [num_batch, out_height, out_width, num_channels]) return output
def forward_(x_op, name): # this is 2d only specialized implementation of topi.nn.softmax if x_op.ndim == 1: x = topi.reshape(x_op.tvm_tensor, (1, x_op.size)) m = 1 n = x_op.shape[0] elif x_op.ndim == 2: x = x_op.tvm_tensor m, n = x_op.shape else: raise ValueError(f'Given ndim {x_op.ndim} is not supported') k = tvm.reduce_axis((0, n), name='k') max_elem = tvm.compute((m, ), lambda i: tvm.max(x[i, k], axis=k), name=f'{name}:max_elem') k = tvm.reduce_axis((0, n), name='k') expsum = tvm.compute( (m, ), lambda i: tvm.sum(tvm.exp(x[i, k] - max_elem[i]), axis=k), name=f'{name}:expsum') softmax = tvm.compute( x.shape, lambda i, j: tvm.exp(x[i, j] - max_elem[i]) / expsum[i], name=f'{name}:softmax') if x_op.ndim == 1: softmax = topi.reshape(softmax, x_op.shape) return softmax, max_elem, expsum
def reshape(tensor, shape, sph=None, dst_scope='buffer0'): res = topi.reshape(tensor, shape) MarkScope(res, dst_scope) PragmaCopy(res) return res
def verify_reshape(src_shape, dst_shape): A = tvm.placeholder(shape=src_shape, name="A") B = topi.reshape(A, dst_shape) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(B) foo = tvm.build(s, [A, B], device, name="reshape") data_npy = np.random.normal(size=src_shape).astype(A.dtype) out_npy = np.reshape(data_npy, newshape=dst_shape) data_nd = tvm.nd.array(data_npy, ctx) out_nd = tvm.nd.empty(dst_shape, ctx=ctx, dtype=B.dtype) foo(data_nd, out_nd) np.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in [ "llvm", "nvptx", "cuda", "opencl", "metal", "rocm", "vulkan", "sdaccel" ]: check_device(device)
def FullConnection(device="llvm", lib_path="./", ndim_a=None, dtype=None, has_bias=None): ''' full connection Args: device: lib_path: ndim_a: dtype: hasBias: Returns: ''' n_dim, ci, h_dim, kernel_tensor = (tvm.var("n_dim"), tvm.var("out_tensor"), tvm.var("h_dim"), \ tvm.var("kernel_tensor")) co = tvm.var("co") if ndim_a == 4: shape_a = (n_dim, ci, h_dim, kernel_tensor) chw = ci * h_dim * kernel_tensor else: shape_a = (n_dim, ci) chw = ci shape_w = (co, chw) opname = "FullConnection_ndimA%d_%s_%s" % (ndim_a, dtype, "hasBias" if has_bias else "notHasBias") is_var = True vh, vw, vc = 1, 1, 1 print(opname) in_tensor = tvm.placeholder(shape_a, dtype=dtype, name='in_tensor') kernel_tensor = tvm.placeholder(shape_w, dtype=dtype, name='kernel_tensor') input_tensor = topi.reshape(in_tensor, (n_dim, chw)) if len(shape_a) == 4 else in_tensor out_tensor = _matmul_spatial_pack_asm((is_var, 0, ci, vh, vw, vc), input_tensor, kernel_tensor, \ layout='NC', out_dtype=dtype) if has_bias: bias = tvm.placeholder((co, ), dtype=dtype, name='bias') out_tensor = tvm.compute((n_dim, co), lambda n, co: out_tensor[n, co] + bias[co], tag='injective') tensor_list = [in_tensor, kernel_tensor, bias, out_tensor ] if has_bias else [in_tensor, kernel_tensor, out_tensor] cfg = { 'is_var': is_var, 'is_transpose': 0, 'core_id': 0, 'CI': ci, 'VH': vh, 'VW': vw, 'VC': vc } s = _matmul_schedule_asm(cfg, [out_tensor]) Genlib(s, tensor_list, device, opname, lib_path)
def __call__(self, inputs): outputs = topi.nn.conv2d(inputs, self.weight, self.stride, self.padding, self.dilation) if self.bias: # TODO: check bias shape reshaped_bias = topi.reshape( self.bias, (self.in_channels, self.out_channels, 1, 1)) outputs += reshaped_bias return outputs
def algorithm_forward(self): assert self.x.size == self.t.size, \ "only supports one-hot vector" self.softmax, self.max_elem, self.expsum = Softmax.forward_( self.x, name=f'{self.label}') if self.x.ndim == 1: y = topi.reshape(self.softmax, (1, self.x.size)) t = topi.reshape(self.t.tvm_tensor, (1, self.t.size)) m = 1 n = self.x.shape[0] elif self.x.ndim == 2: y = self.softmax t = self.t.tvm_tensor m, n = self.x.shape else: raise NotImplementedError self.ty = tvm.compute((m, n), lambda i, j: tvm.log(y[i, j]) * t[i, j], name=f'{self.label}:ty') k = tvm.reduce_axis((0, n), name='k') self.sum_ty = tvm.compute((m, ), lambda i: tvm.sum(self.ty[i, k], axis=k), name=f'{self.label}:sum_ty') # TODO: need to validate the shape and keepdims # self.shape would be like (1,1,1), which size is 1 expected_size = 1 assert self.size == expected_size, \ f'size of SoftmaxWithCrossEntropyLoss must be {expected_size}, not {self.size}' k = tvm.reduce_axis((0, m), name='k') self.total = tvm.compute(self.shape, lambda *idxs: tvm.sum(self.sum_ty[k], axis=k), name=f'{self.label}:total') self.tvm_tensor = tvm.compute(self.shape, lambda *idxs: -self.total[idxs] / m, name=f'{self.label}:tensor')
def _declaration_conv_NCHWc_int8(wkl, sch, data, kernel): """ Declaration for int8 conv""" out_dtype = wkl.out_dtype HPAD, WPAD = wkl.hpad, wkl.wpad HSTR, WSTR = wkl.hstride, wkl.wstride batch_size = data.shape[0] out_height = (wkl.height + 2 * HPAD - wkl.hkernel) // HSTR + 1 out_width = (wkl.width + 2 * WPAD - wkl.wkernel) // WSTR + 1 DOPAD = (HPAD != 0 or WPAD != 0) if DOPAD: data_pad = pad(data, (0, 0, HPAD, WPAD, 0), name="data_pad") else: data_pad = data oshape = (batch_size, wkl.out_filter // sch.oc_bn, out_height, out_width, sch.oc_bn) # Intel performs dot product of 2 "4" Int8 values n_elems = 4 assert sch.ic_bn % n_elems == 0 ic_outer = tvm.reduce_axis((0, wkl.in_filter // (sch.ic_bn)), name='ic_outer') ic_f_inner = tvm.reduce_axis((0, sch.ic_bn // n_elems), name='ic_f_inner') ic_s_inner = tvm.reduce_axis((0, n_elems), name='ic_s_inner') # Reshaping kernel as the last 2 dimensions are 1x1 (k_h x k_w) k_shape = kernel.shape kernel = topi.reshape(kernel, (k_shape[0], k_shape[1], k_shape[2], k_shape[3], k_shape[4] * k_shape[5] * k_shape[6])) conv = tvm.compute( oshape, lambda n, oc_chunk, oh, ow, oc_block: tvm.sum( data_pad[n, ic_outer, oh * HSTR, ow * WSTR, ic_f_inner * n_elems + ic_s_inner].astype(out_dtype) * kernel[ oc_chunk, ic_outer, ic_f_inner, oc_block, ic_s_inner]. astype(out_dtype), axis=[ic_outer, ic_f_inner, ic_s_inner]), name='conv2d_NCHWc_int8', tag="conv2d_NCHWc_int8") return conv
def verify_reshape(src_shape, dst_shape): A = tvm.placeholder(shape=src_shape, name="A") B = topi.reshape(A, dst_shape) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(B) foo = tvm.build(s, [A, B], device, name="reshape") data_npy = np.random.normal(size=src_shape).astype(A.dtype) out_npy = np.reshape(data_npy, newshape=dst_shape) data_nd = tvm.nd.array(data_npy, ctx) out_nd = tvm.nd.empty(dst_shape, ctx=ctx, dtype=B.dtype) foo(data_nd, out_nd) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in get_all_backend(): check_device(device)
def verify_reshape(src_shape, dst_shape): A = te.placeholder(shape=src_shape, name="A") B = topi.reshape(A, dst_shape) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.testing.get_injective_schedule(device)(B) foo = tvm.build(s, [A, B], device, name="reshape") data_npy = np.random.normal(size=src_shape).astype(A.dtype) out_npy = np.reshape(data_npy, newshape=dst_shape) data_nd = tvm.nd.array(data_npy, ctx) out_nd = tvm.nd.empty(dst_shape, ctx=ctx, dtype=B.dtype) foo(data_nd, out_nd) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in get_all_backend(): check_device(device)
def verify_reshape(src_shape, dst_shape): A = tvm.placeholder(shape=src_shape, name="A") B = topi.reshape(A, dst_shape) s = topi.cuda.schedule_injective(B) def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) foo = tvm.build(s, [A, B], device, name="reshape") data_npy = np.random.normal(size=src_shape).astype(A.dtype) out_npy = np.reshape(data_npy, newshape=dst_shape) data_nd = tvm.nd.array(data_npy, ctx) out_nd = tvm.nd.empty(dst_shape, ctx=ctx, dtype=B.dtype) foo(data_nd, out_nd) np.testing.assert_allclose(out_nd.asnumpy(), out_npy) check_device("cuda") check_device("opencl") check_device("metal")
inits = [(np.zeros, 'shape'), (np.zeros, 'shape'), (np.zeros, 'shape'), (np.zeros, 'shape'), (np.zeros, 'shape'), (np.ones, 'shape'), (np.zeros, 'shape'), (np.zeros, 'shape'), (np.random.normal, 'size'), (np.random.normal, 'size')] # Graph input x = tvm.placeholder((batch_size, num_timesteps * num_input), 'float32') y = tvm.placeholder((batch_size, num_classes), 'float32') s = tvm.placeholder((batch_size, num_hidden), 'float32') h = tvm.placeholder((batch_size, num_hidden), 'float32') # Tensors and vars for training graph weights = [tvm.placeholder(x, 'float32') for x in sizes] #Construct model xs = topi.split(topi.reshape(x, (batch_size, num_timesteps, num_input)), num_timesteps, axis=1) xs = [topi.reshape(x, (batch_size, num_input)) for x in xs] new_s = s new_h = h for i in range(num_timesteps): inp = topi.concatenate([xs[i], new_h], 1) g = topi.tanh(topi.matmul(inp, weights[0]) + weights[1]) j = topi.sigmoid(topi.matmul(inp, weights[2]) + weights[3]) f = topi.sigmoid(topi.matmul(inp, weights[4]) + weights[5]) o = topi.sigmoid(topi.matmul(inp, weights[6]) + weights[7]) new_s = new_s * f + g * j new_h = topi.tanh(new_s) * o
def compute_reshape_like(attrs, inputs, out_info): """Compute definition of reshape_like""" return topi.reshape(inputs[0], inputs[1].shape)
def ConvVar(device="llvm", lib_path="./", optype=None,\ ndim=None, layout=None, dtype=None, kernels=None,\ strides=None, pad=None, dilations=None,\ hasbias=None, activation_type=None,\ config_entity=None, impl_dtype=None, channel_multiplier=None,\ use_arm32=False, cfg=None): ''' convolution Args: device: lib_path: optype: ndim: layout: dtype: kernels: strides: pad: dilations: hasbias: activationType: configEntity: impl_dtype: channel_multiplier: use_arm32: cfg: Returns: ''' use_depthwise = optype == 'ConvolutionDepthwise' use_deconv = optype == 'Deconvolution' use_deconv_depthwise = optype == 'DeConvolutionDepthwise' has_bias = hasbias ow = 1 if cfg is None else cfg['VW'] oh = 1 if cfg is None else cfg['VH'] oc = 1 if cfg is None else cfg['VC'] kh, kw = kernels op_name = "%s_ndim%d_%s_k%d_s%d_p%d%d%d%d_d%d_act%d_vc%d_vh%d_vw%d_hasbias%d" % ( \ map_conv[optype], ndim, dtype, \ kh, strides[0], pad[0], pad[1], pad[2], pad[3], dilations[0], \ activation_enum_map[activation_type], oc, oh, ow, hasbias) batch = tvm.var("batch") in_channel = tvm.var("in_channel") in_height, in_width = tvm.var("in_height"), tvm.var("in_width") pad_up, pad_down, pad_left, pad_right = pad opname = op_name print("Conv", opname, config_entity) if impl_dtype is None: impl_dtype = dtype if use_depthwise: multiplier = channel_multiplier out_channel = in_channel * multiplier elif use_deconv_depthwise: multiplier = channel_multiplier out_channel = in_channel * multiplier else: out_channel = tvm.var("out_channel") # define placeholder input_tensor = in_tensor = tvm.placeholder( (batch, in_channel, in_height, in_width), dtype=dtype, name='in_tensor') if use_depthwise: temp_tensor = kernel_tensor = tvm.placeholder((in_channel, multiplier, kh, kw), dtype=dtype,\ name='kernel_tensor') elif use_deconv: temp_tensor = kernel_tensor = tvm.placeholder((in_channel, out_channel, kh, kw), dtype=dtype,\ name='kernel_tensor') elif use_deconv_depthwise: temp_tensor = kernel_tensor = tvm.placeholder((in_channel, multiplier, kh, kw), dtype=dtype,\ name='kernel_tensor') else: temp_tensor = kernel_tensor = tvm.placeholder((out_channel, in_channel, kh, kw), dtype=dtype,\ name='kernel_tensor') if has_bias: bias = tvm.placeholder((out_channel, ), dtype=dtype, name='bias') bias1 = topi.reshape(bias, (out_channel, 1, 1)) if impl_dtype != dtype: input_tensor = AsType(input_tensor, impl_dtype) temp_tensor = AsType(temp_tensor, impl_dtype) if has_bias: bias1 = AsType(bias1, impl_dtype) # define compute & schedule if pad_up != pad_down or pad_left != pad_right: input_tensor = topi.nn.pad(input_tensor, [0, 0, pad_up, pad_left], [0, 0, pad_down, pad_right], name='data_pad') padding = 0, 0 else: padding = pad_up, pad_left if use_depthwise: cfg1 = (True, 1, 1, 1) if cfg is None else (True, cfg["tile_oh"], cfg["tile_ow"], cfg["tile_co"]) out_tensor = _depthwise_spatial_pack(cfg1, input_tensor, temp_tensor, strides, padding, dilations,\ out_dtype=impl_dtype) elif use_deconv: def GetInput(input_tensor, temp_tensor, padding): _, out_c, filter_h, filter_w = temp_tensor.shape if out_c is None: print("temp_tensor.shape err") stride_h, stride_w = strides # dilate stage dilated_input = topi.nn.dilate(input_tensor, [1, 1, stride_h, stride_w], name='DilatedInput') # padding stage fpad_top, fpad_left, fpad_bottom, fpad_right = topi.nn.get_pad_tuple( padding, (filter_h, filter_w)) bpad_top = filter_h - 1 - fpad_top bpad_bottom = filter_h - 1 - fpad_bottom bpad_left = filter_w - 1 - fpad_left bpad_right = filter_w - 1 - fpad_right padded_input = topi.nn.pad(dilated_input, \ [0, 0, bpad_top, bpad_left], \ [0, 0, bpad_bottom, bpad_right], \ name='PaddedInput') return padded_input special_deconv = kh == 2 and kw == 2 and strides[0] == 2 and strides[ 1] == 2 # special_deconv = False if special_deconv: out_tensor = OptimalOut(input_tensor, temp_tensor, in_channel) else: out_tensor = BaseImplementation(input_tensor, temp_tensor, GetInput, layout, padding) elif use_deconv_depthwise: def GetInput(input_tensor, temp_tensor, padding): _, out_c, filter_h, filter_w = temp_tensor.shape if out_c is None: print("temp_tensor.shape err") stride_h, stride_w = strides # dilate stage dilated_input = topi.nn.dilate(input_tensor, [1, 1, stride_h, stride_w], name='DilatedInput') # padding stage fpad_top, fpad_left, fpad_bottom, fpad_right = topi.nn.get_pad_tuple( padding, (filter_h, filter_w)) bpad_top = filter_h - 1 - fpad_top bpad_bottom = filter_h - 1 - fpad_bottom bpad_left = filter_w - 1 - fpad_left bpad_right = filter_w - 1 - fpad_right padded_input = topi.nn.pad(dilated_input, \ [0, 0, bpad_top, bpad_left], \ [0, 0, bpad_bottom, bpad_right], \ name='PaddedInput') return padded_input temp_tensor = topi.flip(temp_tensor, axis=-1) temp_tensor = topi.flip(temp_tensor, axis=-2) out_tensor = topi.nn.depthwise_conv2d_nchw(GetInput(input_tensor, temp_tensor, padding), temp_tensor, (1, 1), \ padding, (1, 1), out_dtype=input_tensor.dtype) else: cfg1 = (True, 1, 1, 1) if cfg is None else (True, cfg["tile_oh"], cfg["tile_ow"], cfg["tile_co"]) out_tensor = _conv_spatial_pack_asm(cfg1, input_tensor, temp_tensor, strides, padding, dilations,\ out_dtype=impl_dtype) if has_bias: out_tensor = tvm.compute(out_tensor.shape, lambda n, co, h, w: out_tensor[n, co, h, w] + bias1[co][0][0],\ tag="injective") out_tensor = TopiActivation(out_tensor, activation_type) if impl_dtype != dtype: out_tensor = AsType(out_tensor, dtype) # create schedule if use_arm32: s = tvm.create_schedule(out_tensor.op) elif use_depthwise: s = schedule_depthwise_conv2d_nchw_arm(cfg, [out_tensor]) elif use_deconv: if special_deconv: s = tvm.create_schedule([out_tensor.op]) else: s = topi.generic.schedule_conv2d_nchw([out_tensor]) elif use_deconv_depthwise: s = tvm.create_schedule([out_tensor.op]) else: s = schedule_conv2d_nchw_arm_cpu([out_tensor]) # generate lib attr = [ batch, in_channel, in_height, in_width, out_channel, in_tensor, kernel_tensor ] tensor_list = [*attr, bias, out_tensor ] if has_bias else [*attr, out_tensor] Genlib(s, tensor_list, device, opname, lib_path)
def flatten_topi(inputs): N, C, H, W = inputs.shape return topi.reshape(inputs, [N, C * H * W])
packed_output_shape = (N, G, K // G, P, Q) output_shape = (N, K, P, Q) I = te.placeholder(input_shape, name="I") W = te.placeholder(weight_shape, name="W") ### reductions rc = te.reduce_axis((0, C // G), name='rc') ry = te.reduce_axis((0, R), name='ry') rx = te.reduce_axis((0, S), name='rx') ig = C // G og = K // G ### (K,C//G,R,S) to (G,K//G,C//G,R,S) W_pack = topi.reshape(W, (packed_weight_shape)) O = te.compute( packed_output_shape, lambda n, g, co, x, y: te.sum(I[n, rc + ( g * ig), x + rx, y + ry] * W_pack[g, co, rc, rx, ry], axis=[rc, ry, rx])) s = te.create_schedule(O.op) s[W_pack].compute_inline() ir = tvm.lower(s, [I, W, O]) print(ir) ### COMPILE AND RUN tgt_host = "llvm" tgt = "llvm"
def Deconv(device="llvm", lib_path="./", optype=None, ndim=None, dtype=None, kernels=None, strides=None, pad=None, dilations=None, hasbias=None, activation_type=None, config_entity=None, impl_dtype=None, use_arm32=False, cfg=None): ''' Deconvolution Args: device: lib_path: optype: ndim: dtype: kernels: strides: pad: dilations: hasbias: activationType: configEntity: impl_dtype: use_arm32: cfg: Returns: ''' if cfg is None: cfg = { 'CI': tvm.var('ci'), 'VH': 2, 'VW': 2, 'VC': 4, 'VI': 4, 'tile_oh': 2, 'tile_ow': 2, 'tile_co': 4, 'ann_reduce': ['none', 'none'], "ann_spatial": ['none', 'none', 'none'] } has_bias = hasbias batch = tvm.var("batch") in_channel = tvm.var("in_channel") in_height, in_width = tvm.var("in_height"), tvm.var("in_width") kh, kw = kernels ow = cfg['VW'] oh = cfg['VH'] oc = cfg['VC'] op_name = "%s_ndim%d_%s_k%d_s%d_p%d%d%d%d_d%d_act%d_vc%d_vh%d_vw%d_hasbias%d" % (\ map_conv[optype], ndim, dtype,\ kh, strides[0], pad[0], pad[1], pad[2], pad[3], dilations[0],\ activation_enum_map[activation_type], oc, oh, ow, hasbias) opname = op_name print("DEconv", opname, config_entity) if impl_dtype is None: impl_dtype = dtype out_channel = tvm.var("out_channel") # define placeholder input_tensor = in_tensor = tvm.placeholder((batch, in_channel, in_height, in_width, 4), \ dtype=dtype, name='in_tensor') temp_tensor = kernel_tensor = tvm.placeholder((in_channel*4, out_channel, kh, kw), dtype=dtype, \ name='kernel_tensor') if has_bias: bias = tvm.placeholder((out_channel, ), dtype=dtype, name='bias') bias1 = topi.reshape(bias, (out_channel, 1, 1)) if impl_dtype != dtype: input_tensor = AsType(input_tensor, impl_dtype) temp_tensor = AsType(temp_tensor, impl_dtype) if has_bias: bias1 = AsType(bias1, impl_dtype) # define compute & schedule cfg1 = (True, 1, 1, 1) if cfg is None else (True, cfg["tile_oh"], cfg["tile_ow"], cfg["tile_co"]) out_tensor = _conv_spatial_pack_deconv(cfg1, input_tensor, temp_tensor, out_dtype=impl_dtype) if has_bias: out_tensor = tvm.compute(out_tensor.shape, lambda n, co, h, w, c4: \ out_tensor[n, co, h, w, c4] + bias1[co*4 + c4][0][0], tag="injective") out_tensor = TopiActivation(out_tensor, activation_type) if impl_dtype != dtype: out_tensor = AsType(out_tensor, dtype) # create schedule if use_arm32: s = tvm.create_schedule(out_tensor.op) else: s = schedule_conv2d_nchw_arm_cpu_deconv(cfg, [out_tensor]) attr = [ batch, in_channel, in_height, in_width, out_channel, in_tensor, kernel_tensor ] if has_bias: attr.append(bias) attr.append(out_tensor) tensor_list = attr Genlib(s, tensor_list, device, opname, lib_path)
def _interpolate(im, im_shape, x, y, out_size, dtype): num_batch = im_shape[0] height = im_shape[1] width = im_shape[2] channels = im_shape[3] out_height = out_size[0] out_width = out_size[1] max_y = int(im_shape[1] - 1) max_x = int(im_shape[2] - 1) #[-1,1] -> [0, width-1] x = topi.multiply(topi.add(x, tvm.const(1, dtype=dtype)), width / tvm.const(2, dtype=dtype)) y = topi.multiply(topi.add(y, tvm.const(1, dtype=dtype)), height / tvm.const(2, dtype=dtype)) # do sampling dim3 = out_height * out_width * num_batch x0 = topi.cast(topi.floor(x), 'int32') y0 = topi.cast(topi.floor(y), 'int32') x1 = topi.add(x0,tvm.const(1, dtype="int32")) y1 = topi.add(y0,tvm.const(1, dtype="int32")) x0 = topi.clip(x0, 0, max_x) x1 = topi.clip(x1, 0, max_x) y0 = topi.clip(y0, 0, max_y) y1 = topi.clip(y1, 0, max_y) dim2 = width dim1 = width * height base = tvm.compute((dim3,),lambda i:(i // (out_height * out_width)) * width * height, name = 'base') base_y0 = topi.add(base, topi.multiply(y0, dim2)) base_y1 = topi.add(base, topi.multiply(y1, dim2)) idx_a = topi.add(base_y0, x0) idx_b = topi.add(base_y1, x0) idx_c = topi.add(base_y0, x1) idx_d = topi.add(base_y1, x1) im_flat = topi.reshape(im, (num_batch * height * width, channels)) im_flat = topi.cast(im_flat, dtype) Ia = tvm.compute((dim3, channels),lambda i,j: im_flat[idx_a[i], j], name = 'Ia') Ib = tvm.compute((dim3, channels),lambda i,j: im_flat[idx_b[i], j], name = 'Ib') Ic = tvm.compute((dim3, channels),lambda i,j: im_flat[idx_c[i], j], name = 'Ic') Id = tvm.compute((dim3, channels),lambda i,j: im_flat[idx_d[i], j], name = 'Id') x0_f = topi.cast(x0, dtype) x1_f = topi.cast(x1, dtype) y0_f = topi.cast(y0, dtype) y1_f = topi.cast(y1, dtype) wa = topi.expand_dims(topi.multiply(topi.subtract(x1_f, x), topi.subtract(y1_f, y)), 1) wb = topi.expand_dims(topi.multiply(topi.subtract(x1_f, x), topi.subtract(y, y0_f)), 1) wc = topi.expand_dims(topi.multiply(topi.subtract(x, x0_f), topi.subtract(y1_f, y)), 1) wd = topi.expand_dims(topi.multiply(topi.subtract(x, x0_f), topi.subtract(y, y0_f)), 1) output = topi.add(topi.add(topi.add(topi.multiply(wa, Ia), topi.multiply(wb, Ib)),topi.multiply(wc, Ic)), topi.multiply(wd, Id)) return output
def _interpolate(im, im_shape, x, y, out_size, dtype): num_batch = im_shape[0] height = im_shape[1] width = im_shape[2] channels = im_shape[3] out_height = out_size[0] out_width = out_size[1] max_y = int(im_shape[1] - 1) max_x = int(im_shape[2] - 1) # [-1,1] -> [0, width-1] x_temp = topi.multiply(topi.add(x, tvm.const(1, dtype=dtype)), width / tvm.const(2, dtype=dtype)) y_temp = topi.multiply(topi.add(y, tvm.const(1, dtype=dtype)), height / tvm.const(2, dtype=dtype)) # do sampling dim3 = out_height * out_width * num_batch x_zero = topi.cast(topi.floor(x_temp), 'int32') y_zero = topi.cast(topi.floor(y_temp), 'int32') x_one = topi.add(x_zero, tvm.const(1, dtype="int32")) y_one = topi.add(y_zero, tvm.const(1, dtype="int32")) x_zero = topi.clip(x_zero, 0, max_x) x_one = topi.clip(x_one, 0, max_x) y_zero = topi.clip(y_zero, 0, max_y) y_one = topi.clip(y_one, 0, max_y) dim2 = width base = tvm.compute((dim3, ), lambda i: (i // (out_height * out_width)) * width * height, name='base') base_y0 = topi.add(base, topi.multiply(y_zero, dim2)) base_y1 = topi.add(base, topi.multiply(y_one, dim2)) idx_a = topi.add(base_y0, x_zero) idx_b = topi.add(base_y1, x_zero) idx_c = topi.add(base_y0, x_one) idx_d = topi.add(base_y1, x_one) im_flat = topi.reshape(im, (num_batch * height * width, channels)) im_flat = topi.cast(im_flat, dtype) i_a = tvm.compute((dim3, channels), lambda i, j: im_flat[idx_a[i], j], name='Ia') i_b = tvm.compute((dim3, channels), lambda i, j: im_flat[idx_b[i], j], name='Ib') i_c = tvm.compute((dim3, channels), lambda i, j: im_flat[idx_c[i], j], name='Ic') i_d = tvm.compute((dim3, channels), lambda i, j: im_flat[idx_d[i], j], name='Id') x0_f = topi.cast(x_zero, dtype) x1_f = topi.cast(x_one, dtype) y0_f = topi.cast(y_zero, dtype) y1_f = topi.cast(y_zero, dtype) w_a = topi.expand_dims( topi.multiply(topi.subtract(x1_f, x_temp), topi.subtract(y1_f, y_temp)), 1) w_b = topi.expand_dims( topi.multiply(topi.subtract(x1_f, x_temp), topi.subtract(y_temp, y0_f)), 1) w_c = topi.expand_dims( topi.multiply(topi.subtract(x_temp, x0_f), topi.subtract(y1_f, y_temp)), 1) w_d = topi.expand_dims( topi.multiply(topi.subtract(x_temp, x0_f), topi.subtract(y_temp, y0_f)), 1) output = topi.add( topi.add( topi.add(topi.multiply(w_a, i_a), topi.multiply(w_b, i_b)), topi.multiply(w_c, i_c)), topi.multiply(w_d, i_d)) return output
def compute_reshape(attrs, inputs, out_info): """Compute definition of reshape""" oshape = out_info[0].shape return topi.reshape(inputs[0], oshape)
(np.zeros, 'shape'), (np.ones, 'shape'), (np.zeros, 'shape'), (np.zeros, 'shape'), (np.random.normal, 'size'), (np.random.normal, 'size') ] x = tvm.placeholder((batch_size, num_timesteps * num_input), 'float32') y = tvm.placeholder((batch_size, num_classes), 'float32') s = tvm.placeholder((batch_size, num_hidden), 'float32') h = tvm.placeholder((batch_size, num_hidden), 'float32') weights = [tvm.placeholder(x, 'float32', name="weights") for x in sizes] xs = topi.split(topi.reshape(x, (batch_size, num_timesteps, num_input)), num_timesteps, axis=1) xs = [topi.reshape(x, (batch_size, num_input)) for x in xs] new_s = s new_h = h for i in range(num_timesteps): inp = topi.concatenate([xs[i], new_h], 1) g = topi.tanh(topi.matmul(inp, weights[0]) + weights[1]) j = topi.sigmoid(topi.matmul(inp, weights[2]) + weights[3]) f = topi.sigmoid(topi.matmul(inp, weights[4]) + weights[5]) o = topi.sigmoid(topi.matmul(inp, weights[6]) + weights[7]) new_s = new_s * f + g * j new_h = topi.tanh(new_s) * o logits = topi.matmul(new_h, weights[8]) + weights[9]