def BaseImplementation(input_tensor, temp_tensor, get_input, layout, padding): temp_tensor = topi.flip(temp_tensor, axis=-1) temp_tensor = topi.flip(temp_tensor, axis=-2) temp_tensor = topi.transpose(temp_tensor, axes=(1, 0, 2, 3)) out_tensor = topi.nn.conv2d(get_input(input_tensor, temp_tensor, padding), temp_tensor, (1, 1), padding, (1, 1), layout=layout, out_dtype=input_tensor.dtype) return out_tensor
def verify_flip(in_shape, axis): A = tvm.placeholder(shape=in_shape, name="A") B = topi.flip(A, axis) + 1 def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(B) foo = tvm.build(s, [A, B], device, name="reverse") x_np = np.random.uniform(size=in_shape).astype(A.dtype) out_npy = np.flip(x_np, axis) + 1 data_nd = tvm.nd.array(x_np, ctx) out_nd = tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=A.dtype) foo(data_nd, out_nd) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in ["llvm", "cuda", "opencl", "sdaccel", "aocl_sw_emu"]: check_device(device)
def verify_flip(in_shape, axis): A = tvm.placeholder(shape=in_shape, name="A") B = topi.flip(A, axis) + 1 def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(B) foo = tvm.build(s, [A, B], device, name="reverse") x_np = np.random.uniform(size=in_shape).astype(A.dtype) out_npy = np.flip(x_np, axis) + 1 data_nd = tvm.nd.array(x_np, ctx) out_nd = tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=A.dtype) foo(data_nd, out_nd) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in ["llvm", "cuda", "opencl", "sdaccel", "aocl_sw_emu"]: check_device(device)
def ConvVar(device="llvm", lib_path="./", optype=None,\ ndim=None, layout=None, dtype=None, kernels=None,\ strides=None, pad=None, dilations=None,\ hasbias=None, activation_type=None,\ config_entity=None, impl_dtype=None, channel_multiplier=None,\ use_arm32=False, cfg=None): ''' convolution Args: device: lib_path: optype: ndim: layout: dtype: kernels: strides: pad: dilations: hasbias: activationType: configEntity: impl_dtype: channel_multiplier: use_arm32: cfg: Returns: ''' use_depthwise = optype == 'ConvolutionDepthwise' use_deconv = optype == 'Deconvolution' use_deconv_depthwise = optype == 'DeConvolutionDepthwise' has_bias = hasbias ow = 1 if cfg is None else cfg['VW'] oh = 1 if cfg is None else cfg['VH'] oc = 1 if cfg is None else cfg['VC'] kh, kw = kernels op_name = "%s_ndim%d_%s_k%d_s%d_p%d%d%d%d_d%d_act%d_vc%d_vh%d_vw%d_hasbias%d" % ( \ map_conv[optype], ndim, dtype, \ kh, strides[0], pad[0], pad[1], pad[2], pad[3], dilations[0], \ activation_enum_map[activation_type], oc, oh, ow, hasbias) batch = tvm.var("batch") in_channel = tvm.var("in_channel") in_height, in_width = tvm.var("in_height"), tvm.var("in_width") pad_up, pad_down, pad_left, pad_right = pad opname = op_name print("Conv", opname, config_entity) if impl_dtype is None: impl_dtype = dtype if use_depthwise: multiplier = channel_multiplier out_channel = in_channel * multiplier elif use_deconv_depthwise: multiplier = channel_multiplier out_channel = in_channel * multiplier else: out_channel = tvm.var("out_channel") # define placeholder input_tensor = in_tensor = tvm.placeholder( (batch, in_channel, in_height, in_width), dtype=dtype, name='in_tensor') if use_depthwise: temp_tensor = kernel_tensor = tvm.placeholder((in_channel, multiplier, kh, kw), dtype=dtype,\ name='kernel_tensor') elif use_deconv: temp_tensor = kernel_tensor = tvm.placeholder((in_channel, out_channel, kh, kw), dtype=dtype,\ name='kernel_tensor') elif use_deconv_depthwise: temp_tensor = kernel_tensor = tvm.placeholder((in_channel, multiplier, kh, kw), dtype=dtype,\ name='kernel_tensor') else: temp_tensor = kernel_tensor = tvm.placeholder((out_channel, in_channel, kh, kw), dtype=dtype,\ name='kernel_tensor') if has_bias: bias = tvm.placeholder((out_channel, ), dtype=dtype, name='bias') bias1 = topi.reshape(bias, (out_channel, 1, 1)) if impl_dtype != dtype: input_tensor = AsType(input_tensor, impl_dtype) temp_tensor = AsType(temp_tensor, impl_dtype) if has_bias: bias1 = AsType(bias1, impl_dtype) # define compute & schedule if pad_up != pad_down or pad_left != pad_right: input_tensor = topi.nn.pad(input_tensor, [0, 0, pad_up, pad_left], [0, 0, pad_down, pad_right], name='data_pad') padding = 0, 0 else: padding = pad_up, pad_left if use_depthwise: cfg1 = (True, 1, 1, 1) if cfg is None else (True, cfg["tile_oh"], cfg["tile_ow"], cfg["tile_co"]) out_tensor = _depthwise_spatial_pack(cfg1, input_tensor, temp_tensor, strides, padding, dilations,\ out_dtype=impl_dtype) elif use_deconv: def GetInput(input_tensor, temp_tensor, padding): _, out_c, filter_h, filter_w = temp_tensor.shape if out_c is None: print("temp_tensor.shape err") stride_h, stride_w = strides # dilate stage dilated_input = topi.nn.dilate(input_tensor, [1, 1, stride_h, stride_w], name='DilatedInput') # padding stage fpad_top, fpad_left, fpad_bottom, fpad_right = topi.nn.get_pad_tuple( padding, (filter_h, filter_w)) bpad_top = filter_h - 1 - fpad_top bpad_bottom = filter_h - 1 - fpad_bottom bpad_left = filter_w - 1 - fpad_left bpad_right = filter_w - 1 - fpad_right padded_input = topi.nn.pad(dilated_input, \ [0, 0, bpad_top, bpad_left], \ [0, 0, bpad_bottom, bpad_right], \ name='PaddedInput') return padded_input special_deconv = kh == 2 and kw == 2 and strides[0] == 2 and strides[ 1] == 2 # special_deconv = False if special_deconv: out_tensor = OptimalOut(input_tensor, temp_tensor, in_channel) else: out_tensor = BaseImplementation(input_tensor, temp_tensor, GetInput, layout, padding) elif use_deconv_depthwise: def GetInput(input_tensor, temp_tensor, padding): _, out_c, filter_h, filter_w = temp_tensor.shape if out_c is None: print("temp_tensor.shape err") stride_h, stride_w = strides # dilate stage dilated_input = topi.nn.dilate(input_tensor, [1, 1, stride_h, stride_w], name='DilatedInput') # padding stage fpad_top, fpad_left, fpad_bottom, fpad_right = topi.nn.get_pad_tuple( padding, (filter_h, filter_w)) bpad_top = filter_h - 1 - fpad_top bpad_bottom = filter_h - 1 - fpad_bottom bpad_left = filter_w - 1 - fpad_left bpad_right = filter_w - 1 - fpad_right padded_input = topi.nn.pad(dilated_input, \ [0, 0, bpad_top, bpad_left], \ [0, 0, bpad_bottom, bpad_right], \ name='PaddedInput') return padded_input temp_tensor = topi.flip(temp_tensor, axis=-1) temp_tensor = topi.flip(temp_tensor, axis=-2) out_tensor = topi.nn.depthwise_conv2d_nchw(GetInput(input_tensor, temp_tensor, padding), temp_tensor, (1, 1), \ padding, (1, 1), out_dtype=input_tensor.dtype) else: cfg1 = (True, 1, 1, 1) if cfg is None else (True, cfg["tile_oh"], cfg["tile_ow"], cfg["tile_co"]) out_tensor = _conv_spatial_pack_asm(cfg1, input_tensor, temp_tensor, strides, padding, dilations,\ out_dtype=impl_dtype) if has_bias: out_tensor = tvm.compute(out_tensor.shape, lambda n, co, h, w: out_tensor[n, co, h, w] + bias1[co][0][0],\ tag="injective") out_tensor = TopiActivation(out_tensor, activation_type) if impl_dtype != dtype: out_tensor = AsType(out_tensor, dtype) # create schedule if use_arm32: s = tvm.create_schedule(out_tensor.op) elif use_depthwise: s = schedule_depthwise_conv2d_nchw_arm(cfg, [out_tensor]) elif use_deconv: if special_deconv: s = tvm.create_schedule([out_tensor.op]) else: s = topi.generic.schedule_conv2d_nchw([out_tensor]) elif use_deconv_depthwise: s = tvm.create_schedule([out_tensor.op]) else: s = schedule_conv2d_nchw_arm_cpu([out_tensor]) # generate lib attr = [ batch, in_channel, in_height, in_width, out_channel, in_tensor, kernel_tensor ] tensor_list = [*attr, bias, out_tensor ] if has_bias else [*attr, out_tensor] Genlib(s, tensor_list, device, opname, lib_path)