def _create_npu_feature_map( serial_feature_map: spec.SerialFeatureMap) -> vapi.NpuFeatureMap: """This is a helper function to capture a list of arguments to create Vela NpuFeatureMap object. """ layout_map = { "NHWC": vapi.NpuLayout.NHWC, "NHCWB16": vapi.NpuLayout.NHCWB16 } datatype_map = { "uint8": vapi.NpuDataType.UINT8, "int8": vapi.NpuDataType.INT8, "uint16": vapi.NpuDataType.UINT16, "int16": vapi.NpuDataType.INT16, "int32": vapi.NpuDataType.INT32, } layout = str(serial_feature_map.layout.value) data_type = str(serial_feature_map.data_type.value) date_type_bytes = np.iinfo(np.dtype(data_type)).bits // 8 assert layout in layout_map.keys() assert data_type in datatype_map.keys() nfm = vapi.NpuFeatureMap() nfm.data_type = datatype_map[data_type] nfm.shape = vapi.NpuShape3D( int(serial_feature_map.height), int(serial_feature_map.width), int(serial_feature_map.channels), ) nfm.tiles = vapi.NpuTileBox( int(serial_feature_map.tile_height_0), int(serial_feature_map.tile_height_1), int(serial_feature_map.tile_width_0), [ serial_feature_map.tile_address_0, serial_feature_map.tile_address_1, serial_feature_map.tile_address_2, serial_feature_map.tile_address_3, ], ) nfm.quantization = _create_npu_quantization(serial_feature_map.scale, serial_feature_map.zero_point) nfm.layout = layout_map[layout] nfm.strides = vapi.NpuShape3D( int(serial_feature_map.stride_h.value) * date_type_bytes, int(serial_feature_map.stride_w.value) * date_type_bytes, int(serial_feature_map.stride_c.value) * date_type_bytes, ) return nfm
def test_translate_ethosu_depthwise_conv2d(): def extract_ethosu_depthwise_conv2d_extern_call(mod): # There should only be a single function assert len(mod.functions.items()) == 1 primfunc = mod.functions.items()[0][1] ethosu_depthwise_conv2d_calls = list() def populate_ethosu_depthwise_conv2d_calls(stmt): if ( isinstance(stmt, tvm.tir.Call) and stmt.op.name == "tir.call_extern" and stmt.args[0] == "ethosu_depthwise_conv2d" ): ethosu_depthwise_conv2d_calls.append(stmt) stmt_functor.post_order_visit(primfunc.body, populate_ethosu_depthwise_conv2d_calls) return ethosu_depthwise_conv2d_calls[0] depthwise_conv2d_call = extract_ethosu_depthwise_conv2d_extern_call(SingleEthosuDepthwiseConv2D) npu_op, w_zero_point = tir_to_cs_translator.translate_ethosu_depthwise_conv2d( depthwise_conv2d_call ) assert npu_op.ifm.data_type == vapi.NpuDataType.INT8 assert npu_op.ifm.shape == vapi.NpuShape3D(8, 8, 3) assert npu_op.ifm.tiles.height_0 == vapi.NpuTileBox(8, 0, 8, [0, 0, 0, 0]).height_0 assert npu_op.ifm.tiles.height_1 == vapi.NpuTileBox(8, 0, 8, [0, 0, 0, 0]).height_1 assert npu_op.ifm.tiles.width_0 == vapi.NpuTileBox(8, 0, 8, [0, 0, 0, 0]).width_0 assert npu_op.ifm.quantization == pytest.approx(vapi.NpuQuantization(0.6, 11)) assert npu_op.ifm.layout == vapi.NpuLayout.NHWC assert npu_op.ifm.strides == vapi.NpuShape3D(24, 3, 1) # Compare OFM assert npu_op.ofm.data_type == vapi.NpuDataType.INT8 assert npu_op.ofm.shape == vapi.NpuShape3D(6, 7, 3) assert npu_op.ofm.tiles.height_0 == vapi.NpuTileBox(6, 0, 8, [0, 0, 0, 0]).height_0 assert npu_op.ofm.tiles.height_1 == vapi.NpuTileBox(6, 0, 7, [0, 0, 0, 0]).height_1 assert npu_op.ofm.tiles.width_0 == vapi.NpuTileBox(6, 0, 7, [0, 0, 0, 0]).width_0 assert npu_op.ofm.quantization == pytest.approx(vapi.NpuQuantization(0.26, 15)) assert npu_op.ofm.layout == vapi.NpuLayout.NHWC assert npu_op.ofm.strides == vapi.NpuShape3D(21, 3, 1) # Compare kernel and padding assert ( npu_op.kernel.__dict__ == vapi.NpuKernel(w=2, h=3, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1).__dict__ ) assert npu_op.padding == vapi.NpuPadding(top=0, left=0, bottom=0, right=0) # Compare activation assert npu_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU assert npu_op.activation.min == 0 assert npu_op.activation.max == pytest.approx(23.4) # Compare ifm upscaling assert npu_op.ifm_upscale == vapi.NpuResamplingMode.NONE # Compare weight quantization parameters assert w_zero_point == 13
def test_translate_ethosu_conv2d(): test_cases = [ { # Stimulus "tir_module": SingleEthosUConv2D, "param_dict": { 1: np.random.randint( np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 3, 16], "uint8" ), 2: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [16], "int32"), }, # Reference outputs "ref": [ { "ifm": { "data_type": vapi.NpuDataType.UINT8, "shape": vapi.NpuShape3D(8, 8, 3), "tiles": vapi.NpuTileBox(8, 0, 8, [0, 0, 0, 0]), "quantization": vapi.NpuQuantization(0.5, 10), "layout": vapi.NpuLayout.NHWC, "strides": vapi.NpuShape3D(24, 3, 1), }, "ofm": { "data_type": vapi.NpuDataType.UINT8, "shape": vapi.NpuShape3D(8, 8, 16), "tiles": vapi.NpuTileBox(8, 0, 8, [0, 0, 0, 0]), "quantization": vapi.NpuQuantization(0.25, 14), "layout": vapi.NpuLayout.NHWC, "strides": vapi.NpuShape3D(128, 16, 1), }, "kernel": vapi.NpuKernel( w=1, h=1, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1 ), "padding": vapi.NpuPadding(top=0, left=0, bottom=0, right=0), "activation": { "op": vapi.NpuActivationOp.NONE_OR_RELU, "min": -3.5, "max": 60.25, }, "ifm_upscale": vapi.NpuResamplingMode.NONE, "w_zero_point": 12, } ], }, { "tir_module": MultiEthosUConv2D, "param_dict": { 1: np.random.randint( np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 3, 32], "uint8" ), 2: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [32], "int32"), 3: np.random.randint( np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 32, 8], "uint8" ), 4: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [8], "int32"), }, # Reference Outputs "ref": [ { "ifm": { "data_type": vapi.NpuDataType.UINT8, "shape": vapi.NpuShape3D(4, 8, 3), "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]), "quantization": vapi.NpuQuantization(0.5, 10), "layout": vapi.NpuLayout.NHWC, "strides": vapi.NpuShape3D(24, 3, 1), }, "ofm": { "data_type": vapi.NpuDataType.UINT8, "shape": vapi.NpuShape3D(4, 8, 32), "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]), "quantization": vapi.NpuQuantization(0.25, 14), "layout": vapi.NpuLayout.NHWC, "strides": vapi.NpuShape3D(256, 32, 1), }, "kernel": vapi.NpuKernel( w=1, h=1, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1 ), "padding": vapi.NpuPadding(top=0, left=0, bottom=0, right=0), "activation": {"op": None}, "ifm_upscale": vapi.NpuResamplingMode.NONE, "w_zero_point": 12, }, { "ifm": { "data_type": vapi.NpuDataType.UINT8, "shape": vapi.NpuShape3D(4, 8, 32), "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]), "quantization": vapi.NpuQuantization(0.5, 10), "layout": vapi.NpuLayout.NHWC, "strides": vapi.NpuShape3D(256, 32, 1), }, "ofm": { "data_type": vapi.NpuDataType.UINT8, "shape": vapi.NpuShape3D(4, 8, 8), "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]), "quantization": vapi.NpuQuantization(0.25, 14), "layout": vapi.NpuLayout.NHWC, "strides": vapi.NpuShape3D(64, 8, 1), }, "kernel": vapi.NpuKernel( w=1, h=1, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1 ), "padding": vapi.NpuPadding(top=0, left=0, bottom=0, right=0), "activation": { "op": vapi.NpuActivationOp.NONE_OR_RELU, "min": -3.5, "max": 60.25, }, "ifm_upscale": vapi.NpuResamplingMode.NONE, "w_zero_point": 12, }, { "ifm": { "data_type": vapi.NpuDataType.UINT8, "shape": vapi.NpuShape3D(4, 8, 3), "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]), "quantization": vapi.NpuQuantization(0.5, 10), "layout": vapi.NpuLayout.NHWC, "strides": vapi.NpuShape3D(24, 3, 1), }, "ofm": { "data_type": vapi.NpuDataType.UINT8, "shape": vapi.NpuShape3D(4, 8, 32), "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]), "quantization": vapi.NpuQuantization(0.25, 14), "layout": vapi.NpuLayout.NHWC, "strides": vapi.NpuShape3D(256, 32, 1), }, "kernel": vapi.NpuKernel( w=1, h=1, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1 ), "padding": vapi.NpuPadding(top=0, left=0, bottom=0, right=0), "activation": { "op": vapi.NpuActivationOp.NONE_OR_RELU, "min": -3.5, "max": 60.25, }, "ifm_upscale": vapi.NpuResamplingMode.NONE, "w_zero_point": 12, }, { "ifm": { "data_type": vapi.NpuDataType.UINT8, "shape": vapi.NpuShape3D(4, 8, 32), "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]), "quantization": vapi.NpuQuantization(0.5, 10), "layout": vapi.NpuLayout.NHWC, "strides": vapi.NpuShape3D(256, 32, 1), }, "ofm": { "data_type": vapi.NpuDataType.UINT8, "shape": vapi.NpuShape3D(4, 8, 8), "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]), "quantization": vapi.NpuQuantization(0.25, 14), "layout": vapi.NpuLayout.NHWC, "strides": vapi.NpuShape3D(64, 8, 1), }, "kernel": vapi.NpuKernel( w=1, h=1, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1 ), "padding": vapi.NpuPadding(top=0, left=0, bottom=0, right=0), "activation": { "op": vapi.NpuActivationOp.NONE_OR_RELU, "min": -3.5, "max": 60.25, }, "ifm_upscale": vapi.NpuResamplingMode.NONE, "w_zero_point": 12, }, ], }, ] def extract_ethosu_conv2d_extern_calls(mod): """This function will obtain all ethosu_conv2d calls from a NPU TIR module Parameters ---------- mod : tvm.IRModule This is a NPU TIR Module Returns ------- list of tvm.tir.Call objects that are tir extern calls for ethosu_conv2d """ # There should only be a single function assert len(mod.functions.items()) == 1 primfunc = mod.functions.items()[0][1] ethosu_conv2d_calls = list() def populate_ethosu_conv2d_calls(stmt): if ( isinstance(stmt, tvm.tir.Call) and stmt.op.name == "tir.call_extern" and stmt.args[0] == "ethosu_conv2d" ): ethosu_conv2d_calls.append(stmt) stmt_functor.post_order_visit(primfunc.body, populate_ethosu_conv2d_calls) return ethosu_conv2d_calls for test_case in test_cases: ethosu_conv2d_calls = extract_ethosu_conv2d_extern_calls(test_case["tir_module"]) for idx, ethosu_conv2d_call in enumerate(ethosu_conv2d_calls): ref = test_case["ref"][idx] npu_op, w_zero_point = tir_to_cs_translator.translate_ethosu_conv2d(ethosu_conv2d_call) # Compare IFM assert npu_op.ifm.data_type == ref["ifm"]["data_type"] assert npu_op.ifm.shape == ref["ifm"]["shape"] assert npu_op.ifm.tiles.height_0 == ref["ifm"]["tiles"].height_0 assert npu_op.ifm.tiles.height_1 == ref["ifm"]["tiles"].height_1 assert npu_op.ifm.tiles.width_0 == ref["ifm"]["tiles"].width_0 assert npu_op.ifm.quantization == ref["ifm"]["quantization"] assert npu_op.ifm.layout == ref["ifm"]["layout"] assert npu_op.ifm.strides == ref["ifm"]["strides"] # Compare OFM assert npu_op.ofm.data_type == ref["ofm"]["data_type"] assert npu_op.ofm.shape == ref["ofm"]["shape"] assert npu_op.ofm.tiles.height_0 == ref["ofm"]["tiles"].height_0 assert npu_op.ofm.tiles.height_1 == ref["ofm"]["tiles"].height_1 assert npu_op.ofm.tiles.width_0 == ref["ofm"]["tiles"].width_0 assert npu_op.ofm.quantization == ref["ofm"]["quantization"] assert npu_op.ofm.layout == ref["ofm"]["layout"] assert npu_op.ofm.strides == ref["ofm"]["strides"] # Compare kernel and padding assert npu_op.kernel.__dict__ == ref["kernel"].__dict__ assert npu_op.padding == ref["padding"] # Compare activation if ref["activation"]["op"] is None: assert npu_op.activation is None else: assert npu_op.activation.op_type == ref["activation"]["op"] assert npu_op.activation.min == ref["activation"]["min"] assert npu_op.activation.max == ref["activation"]["max"] # Compare ifm upscaling assert npu_op.ifm_upscale == ref["ifm_upscale"] # Compare weight quantization parameters assert w_zero_point == ref["w_zero_point"]