def _compile(ext_func): """ This is the main wrapper that accepts an external relay function and runs all the passes to lower it down to command stream Parameters ---------- ext_func : tvm.relay.function.Function The partitioned relay function Returns ------- cs : str An hex string of the bytes of command stream encoded_constants : str An hex string of the bytes that includes concat'd encoded weights, encoded biases and scales. scratch_size : int The size of the scratch buffer needed. """ mod = tvm.IRModule() mod["main"] = ext_func mod = LegalizeEthosU()(mod) mod = relay.transform.InferType()(mod) # We are currently using copy_constants scheduler In the long run, # this should be a single intelligent and a composite scheduler # that can perform scheduling based on user inputs such as # scratch memory size. tir_mod, params = lower_to_tir(mod["main"], copy_constants()) cmms, encoded_constants, scratch_size = tir_to_cs_translator.translate( tir_mod, params) return cmms, encoded_constants, scratch_size
def relay_to_tir(mod: tvm.ir.IRModule) -> tvm.ir.IRModule: """ This is the hook for python-based lowering of a Relay module which lowers NPU external functions to TIR. Parameters ---------- mod : tvm.ir.IRModule This is the Relay module. Returns ------- mod : tvm.ir.IRModule The Relay module with scheduled NPU external functions. """ mod = OutlineCompilerFunctions("ethos-u")(mod) mod = LegalizeEthosU()(mod) mod = LUTsOptimizer()(mod) mod = relay.transform.InferType()(mod) mod = IdentityOptimizer()(mod) mod = LayoutOptimizer()(mod) mod = relay.transform.InferType()(mod) device_contexts = { gv: "ethos-u" for gv, _ in filter(lambda x: util.is_npu_func(x[1]), mod.functions.items()) } mod = mod.with_attr("device_contexts", device_contexts) # Use the cascader if it is enabled for the U55 accelerator, otherwise use copy_constants # scheduler if util.is_cascader_enabled(): if util.get_accelerator_config() == "ethos-u65-256": raise ValueError( "Cascading is not supported for the U65 accelerator") workspace_memory_pools = mod.attrs["workspace_memory_pools"] if not workspace_memory_pools: raise ValueError( "Workspace memory pool needs to be provided for the U55 cascader" ) if len(workspace_memory_pools.pools) != 1: raise ValueError( "Exactly one workspace pool needs to be provided for the U55 cascader" ) memory_pressure = _calculate_memory_pressure(mod) sram = extract_memory_info(workspace_memory_pools.pools[0], memory_pressure) tir_mod = LowerToTIR( _ethos_u55_cascader(sram, util.is_striping_enabled()))(mod) else: tir_mod = LowerToTIR(copy_constants())(mod) return tir_mod
def test_schedule_diamond_graph(): ifm_a = relay.var("IFM_A", shape=(1, 56, 56, 96), dtype="int8") conv_a = make_ethosu_conv2d(ifm_a, 96, 24, (1, 1), (0, 0), (1, 1), (1, 1)) conv_b = make_ethosu_conv2d(conv_a, 24, 24, (1, 1), (0, 0), (1, 1), (1, 1)) add = make_ethosu_binary_elementwise(conv_a, conv_b, 24, 24, "ADD", "int8") func = relay.Function(relay.analysis.free_vars(add), add) func = run_opt_pass(func, relay.transform.InferType()) test_mod, _ = _lower_to_tir(func, copy_constants()) reference_mod = DiamondGraphTir tvm.ir.assert_structural_equal(test_mod["main"], reference_mod["main"], True)
def test_copy_luts(): ifm_shape = (1, 33, 33, 11) ifm = relay.var("IFM", shape=ifm_shape, dtype="int8") lut = relay.const([i for i in range(256)], dtype="int8") conv = make_ethosu_conv2d( ifm, ifm_shape[3], 8, (3, 3), (0, 0), (1, 1), (1, 1), lut=lut, activation="TANH" ) identity = make_ethosu_identity(conv, lut=lut, activation="TANH") func = relay.Function(relay.analysis.free_vars(identity), identity) func = run_opt_pass(func, relay.transform.InferType()) func, const_dict = extract_constants(func) te_graph = lower_to_te(func) sch = te.create_schedule([te_graph.outputs[0].op]) copy_constants()(te_graph, const_dict, sch) copy_luts()(te_graph, const_dict, sch) assert len(sch.stages) == 17 assert ".global" in sch.stages[5].op.name assert ".global" in sch.stages[7].op.name assert ".local" in sch.stages[9].op.name assert ".local" in sch.stages[10].op.name
def test_constant_as_input(): """Test to check that constants specified as inputs aren't interpreted as an encoded constant.""" def get_graph(): dtype = "uint8" ifm = relay.var("ifm", shape=(1, 16, 16, 32), dtype=dtype) conv1 = make_ethosu_conv2d( ifm, 32, 16, (1, 1), (0, 0), (1, 1), (1, 1), ) scalar = relay.const(np.ones((1, 1, 1, 1), dtype=dtype), dtype=dtype) add1 = make_ethosu_binary_elementwise(conv1, scalar, ifm_channels=32, ifm2_channels=1, operator_type="ADD", ofm_dtype=dtype) func = relay.Function(relay.analysis.free_vars(add1), add1) func = run_opt_pass(func, relay.transform.InferType()) return func tir_mod, params = _lower_to_tir(get_graph(), copy_constants()) # Check tile address for the scalar constant input hasn't been # overwritten. extern_calls = tir_mod["main"].body.body.body.body.body binary_elementwise = extern_calls[-1].value args = binary_elementwise.args reason = "Tile address overwritten" assert args[26] == 0, reason assert args[27] == 0, reason assert args[28] == 0, reason # More generally, check compiles successfully to make sure # nothing else was overrwritten. # With Target Hooks the TIR module needs a target attached # and lowered via make unpacked API. tir_mod["main"] = tir_mod["main"].with_attr("target", tvm.target.Target("ethos-u")) tir_mod = tvm.tir.transform.MakeUnpackedAPI()(tir_mod) tir_to_cs_translator.translate(tir_mod, params)
def test_copy_constants(): ifm_a = relay.var("IFM_A", shape=(1, 26, 26, 32), dtype="int8") conv_a = make_ethosu_conv2d(ifm_a, 32, 8, (3, 3), (0, 0), (1, 1), (1, 1)) conv_b = make_ethosu_conv2d(conv_a, 8, 4, (1, 1), (0, 0), (1, 1), (1, 1)) func = relay.Function(relay.analysis.free_vars(conv_b), conv_b) func = run_opt_pass(func, relay.transform.InferType()) func, const_dict = extract_constants(func) cached_func = lower_to_te(func) sch = te.create_schedule([cached_func.outputs[0].op]) planner = copy_constants() planner(cached_func, const_dict, sch) assert len(sch.stages) == 23 assert ".global" in sch.stages[6].op.name assert ".global" in sch.stages[8].op.name assert ".global" in sch.stages[17].op.name assert ".global" in sch.stages[19].op.name
def relay_to_tir_func(ext_func: relay.Function) -> tvm.tir.PrimFunc: """ This is the hook for python-based lowering of relay function that gets offloaded to the microNPU. Parameters ---------- ext_func : relay.Function This is the partitioned relay function Returns ------- primfunc : tir.PrimFunc This returns the scheduled PrimFunc """ assert len(ext_func.params) == 1 input_size = util.calculate_size_bytes(ext_func.params[0]) output_size = util.calculate_size_bytes(ext_func.body) mod = tvm.IRModule() mod["main"] = ext_func mod = LegalizeEthosU()(mod) mod = LUTsOptimizer()(mod) mod = LayoutOptimizer()(mod) mod = relay.transform.InferType()(mod) # We are currently using copy_constants scheduler In the long run, # this should be a single intelligent and a composite scheduler # that can perform scheduling based on user inputs such as # scratch memory size. tir_mod, const_dict = lower_to_tir(mod["main"], copy_constants()) for idx in const_dict.keys(): const_dict[idx] = tvm.nd.array(const_dict[idx]) primfunc = tir_mod["main"] primfunc = primfunc.with_attr("global_symbol", ext_func.attrs["global_symbol"]) primfunc = primfunc.with_attr("ethos-u.constants", const_dict) primfunc = primfunc.with_attr("ethos-u.input_size", input_size) primfunc = primfunc.with_attr("ethos-u.output_size", output_size) return primfunc
def test_copy(): def _get_func(): data = relay.var("data", shape=(1, 16, 16, 32), dtype="int8") conv = make_ethosu_conv2d( data, 32, 8, (1, 1), (0, 0), (1, 1), (1, 1), ) func = relay.Function(relay.analysis.free_vars(conv), conv) func = run_opt_pass(func, relay.transform.InferType()) return func func = _get_func() mod, _ = lower_to_tir(func, cascader=copy_constants()) script = mod.script(show_meta=True) test_mod = tvm.script.from_source(script) reference_mod = ReferenceModule tvm.ir.assert_structural_equal(test_mod["main"], reference_mod["main"], True)