def test_ethosu_conv2d(): ifm = relay.var("ifm", shape=(1, 10, 20, 30), dtype="uint8") weight = relay.var("weight", shape=(40, 3, 3, 30), dtype="uint8") scale_bias = relay.var("scale_bias", shape=(40, 10), dtype="uint8") lut = relay.var("lut", shape=(), dtype="uint8") conv = ethosu_ops.ethosu_conv2d( ifm, weight, scale_bias, lut, ifm_scale=0.5, ifm_zero_point=10, weight_zero_point=12, ofm_scale=0.25, ofm_zero_point=14, ofm_channels=40, padding=(1, 1, 1, 1), kernel_shape=(3, 3), strides=(1, 1), dilation=(1, 1), ) expr = relay.Function(relay.analysis.free_vars(conv), conv) mod = tvm.IRModule.from_expr(expr) mod = relay.transform.InferType()(mod) lowered = lower_to_te(mod["main"]) assert len(lowered.outputs) == 1 assert len(lowered.inputs) == 4 conv2d_compute = Convolution2DCompute.from_output(lowered.outputs[0]) assert conv2d_compute.conv2d.name == "ethosu_conv2d" input_shapes = set() for inp in lowered.inputs: input_shapes.add(tuple([x.value for x in inp.shape])) assert input_shapes == {(40, 10), (1, 10, 20, 30), (40, 3, 3, 30), ()}
def callback(self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map) -> tvm.relay.Expr: params = ethosu_patterns.QnnConv2DParams(post.op.body) params.ifm.tensor = post.args[0] channels_map = { "NHWC": 3, } kernel_size_map = { "HWIO": params.weights.shape[0:2], "OHWI": params.weights.shape[1:3], "HWOI": params.weights.shape[0:2], } activation_map = {"clip": "CLIP"} weight_to_ohwi_transform_map = {"HWIO": [3, 0, 1, 2]} weights_values = params.weights.values weights_values_ohwi = np.transpose( weights_values, weight_to_ohwi_transform_map[str(params.weights.layout)]) if params.activation: activation = activation_map[params.activation.op.name] clip_min = int(params.activation.attrs.a_min) clip_max = int(params.activation.attrs.a_max) else: activation = "NONE" clip_min = 0 clip_max = 0 scale_bias = vela_api.pack_biases( biases=params.biases.tensor.data.asnumpy(), ifm_scale=params.ifm.q_params.scale_f32, ifm_dtype=np.dtype(params.ifm.dtype), weight_scales=params.weights.q_params.scale_f32, ofm_scale=params.ofm.q_params.scale_f32, is_activation_tanh_or_sigmoid=activation in ["TANH", "SIGMOID"], ) ethosu_conv2d = ethosu_ops.ethosu_conv2d( ifm=post.args[0], weight=relay.const(weights_values_ohwi, params.weights.values.dtype), scale_bias=relay.const(scale_bias, "uint8"), lut=relay.const([], dtype="int8"), ifm_scale=float(params.ifm.q_params.scale_f32), ifm_zero_point=int(params.ifm.q_params.zero_point), weight_zero_point=int(params.weights.q_params.zero_point), ofm_scale=float(params.ofm.q_params.scale_f32), ofm_zero_point=int(params.ofm.q_params.zero_point), kernel_shape=kernel_size_map[str(params.weights.layout)], ofm_channels=params.ofm.shape[channels_map[str( params.ofm.layout)]], strides=params.strides, padding=params.padding, dilation=params.dilation, activation=activation, clip_min=clip_min, clip_max=clip_max, upscale="NONE", ifm_layout=str(params.ifm.layout), ofm_layout=str(params.ofm.layout), ) return ethosu_conv2d
def callback(self, pre, post, node_map): params = ethosu_patterns.FullyConnectedParams(post.op.body) params.ifm.tensor = post.args[0] # IFM reshapes ifm = post.args[0] if len(params.ifm.shape ) != 4 or not params.ifm.shape[1] == params.ifm.shape[2] == 1: ifm = relay.reshape(ifm, (1, 1, 1, params.ifm.shape[-1])) # Weight transformations weights_values = params.weights.values weights_values_ohwi = np.expand_dims(weights_values, axis=(1, 2)) if params.activation: activation = "CLIP" clip_min = int(params.activation.attrs.a_min) clip_max = int(params.activation.attrs.a_max) else: activation = "NONE" clip_min = 0 clip_max = 0 bias_values = (params.biases.tensor.data.asnumpy() if params.biases else np.zeros((params.ofm.shape[-1]))) scale_bias = vela_api.pack_biases( biases=bias_values, ifm_scale=params.ifm.q_params.scale_f32, ifm_dtype=np.dtype(params.ifm.dtype), weight_scales=params.weights.q_params.scale_f32, ofm_scale=params.ofm.q_params.scale_f32, is_activation_tanh_or_sigmoid=False, ) ethosu_fc = ethosu_ops.ethosu_conv2d( ifm=ifm, weight=relay.const(weights_values_ohwi, params.weights.values.dtype), scale_bias=relay.const(scale_bias, "uint8"), lut=relay.const([], dtype="int8"), ifm_scale=float(params.ifm.q_params.scale_f32), ifm_zero_point=int(params.ifm.q_params.zero_point), weight_zero_point=int(params.weights.q_params.zero_point), ofm_scale=float(params.ofm.q_params.scale_f32), ofm_zero_point=int(params.ofm.q_params.zero_point), kernel_shape=[1, 1], ofm_channels=params.weights.shape[0], strides=(1, 1), padding=(0, 0, 0, 0), dilation=(1, 1), activation=activation, clip_min=clip_min, clip_max=clip_max, upscale="NONE", ifm_layout="NHWC", ofm_layout="NHWC", ) if len(params.ofm.shape ) != 4 or not params.ofm.shape[1] == params.ofm.shape[2] == 1: ethosu_fc = relay.reshape(ethosu_fc, params.ofm.shape) return ethosu_fc
def make_ethosu_conv2d( ifm, ifm_channels, ofm_channels, kernel_shape, padding, strides, dilation, lut=relay.const([], dtype="int8"), activation="NONE", ifm_layout="NHWC", ofm_layout="NHWC", weight_dtype="int8", scale_bias_dtype="uint8", rounding_mode="TFL", upscale="NONE", ): # conv params weight_shape = (ofm_channels, kernel_shape[0], kernel_shape[1], ifm_channels) padding = get_pad_tuple(padding, kernel_shape) scale_bias_data = generate_weights_data((weight_shape[0], 10), scale_bias_dtype) scale_bias = relay.const(scale_bias_data, dtype=scale_bias_dtype) weight_data = generate_weights_data(weight_shape, weight_dtype) weight = relay.const(weight_data, dtype=weight_dtype) conv = ethosu_ops.ethosu_conv2d( ifm, weight, scale_bias, lut=lut, ifm_scale=0.5, ifm_zero_point=10, weight_zero_point=12, ofm_scale=0.25, ofm_zero_point=14, kernel_shape=kernel_shape, ofm_channels=ofm_channels, strides=strides, padding=padding, dilation=dilation, activation=activation, clip_min=10 if activation == "CLIP" else 0, clip_max=100 if activation == "CLIP" else 0, rounding_mode=rounding_mode, upscale=upscale, ifm_layout=ifm_layout, ofm_layout=ofm_layout, ) return conv
def callback(self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map) -> tvm.relay.Expr: params = ethosu_patterns.QnnConv2DTransposeParams(post.op.body) params.ifm.tensor = post.args[0] ofm_shape = params.ofm.shape legalize_padding = params.legalize_padding weight_to_ohwi_transform_map = {"IOHW": [1, 2, 3, 0]} weights_values = params.weights.values weights_values_ohwi = np.transpose( weights_values, weight_to_ohwi_transform_map[str(params.weights.layout)]) weights_values_ohwi = np.flip(weights_values_ohwi, (1, 2)) weights = relay.const(weights_values_ohwi, dtype=params.weights.values.dtype) bias_values = (params.biases.tensor.data.asnumpy() if params.biases else np.zeros((params.ifm.shape[-1]))) scale_bias = vela_api.pack_biases( biases=bias_values, ifm_scale=params.ifm.q_params.scale_f32, ifm_dtype=np.dtype(params.ifm.dtype), weight_scales=params.weights.q_params.scale_f32, ofm_scale=params.ofm.q_params.scale_f32, is_activation_tanh_or_sigmoid=False, ) reduced_op = ethosu_ops.ethosu_conv2d( ifm=post.args[0], weight=weights, scale_bias=relay.const(scale_bias, "uint8"), lut=relay.const([], dtype="int8"), ifm_scale=float(params.ifm.q_params.scale_f32), ifm_zero_point=int(params.ifm.q_params.zero_point), weight_zero_point=int(params.weights.q_params.zero_point), ofm_scale=float(params.ofm.q_params.scale_f32), ofm_zero_point=int(params.ofm.q_params.zero_point), kernel_shape=params.kernel_shape, ofm_channels=int(ofm_shape[-1]), strides=(1, 1), padding=legalize_padding, dilation=params.dilation, ifm_layout=str(params.ifm.layout), ofm_layout=str(params.ofm.layout), upscale="ZEROS", ) # Remove additional padding by 'cropping' back to expected size return relay.strided_slice(reduced_op, (0, 0, 0, 0), ofm_shape)