def transform_module( self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext ) -> tvm.ir.IRModule: for global_var, func in mod.functions.items(): func = rewrite(DepthwiseConv2DRewriter(), func) mod.update_func(global_var, func) return mod
def transform_module(self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext) -> tvm.ir.IRModule: for global_var, func in mod.functions.items(): func = rewrite(PartitionedSplitRewriter(), func) func = rewrite(SplitRewriter(), func) mod.update_func(global_var, func) return mod
def transform_module(self, mod: tvm.ir.IRModule, _) -> tvm.ir.IRModule: npu_functions = filter(lambda x: is_npu_func(x[1]), mod.functions.items()) for global_var, func in npu_functions: npu_pass = npu_pass_class(*self.args, **self.kwargs) func = npu_pass.transform_npu_function(global_var, func) mod.update_func(global_var, func) return mod
def transform_module(self, mod: tvm.ir.IRModule, _) -> tvm.IRModule: """A pass to optimize the layout of NPU operations. If both the producer and consumer of a tensor are NPU operators, then the layout is converted from NHWC to NHCWB16 as this is the layout NPU uses internally.""" assert len(mod.functions.items()) == 1, "Module can only contain one function." global_var, func = mod.functions.items()[0] optimized_func = LayoutOptimization().visit(func) mod.update_func(global_var, optimized_func) return mod
def relay_to_tir(mod: tvm.ir.IRModule) -> tvm.ir.IRModule: """ This is the hook for python-based lowering of a Relay module which lowers NPU external functions to TIR. Parameters ---------- mod : tvm.ir.IRModule This is the Relay module. Returns ------- mod : tvm.ir.IRModule The Relay module with scheduled NPU external functions. """ mod = OutlineCompilerFunctions("ethos-u")(mod) mod = LegalizeEthosU()(mod) mod = LUTsOptimizer()(mod) mod = relay.transform.InferType()(mod) mod = IdentityOptimizer()(mod) mod = LayoutOptimizer()(mod) mod = relay.transform.InferType()(mod) device_contexts = { gv: "ethos-u" for gv, _ in filter(lambda x: util.is_npu_func(x[1]), mod.functions.items()) } mod = mod.with_attr("device_contexts", device_contexts) # Use the cascader if it is enabled for the U55 accelerator, otherwise use copy_constants # scheduler if util.is_cascader_enabled(): if util.get_accelerator_config() == "ethos-u65-256": raise ValueError( "Cascading is not supported for the U65 accelerator") workspace_memory_pools = mod.attrs["workspace_memory_pools"] if not workspace_memory_pools: raise ValueError( "Workspace memory pool needs to be provided for the U55 cascader" ) if len(workspace_memory_pools.pools) != 1: raise ValueError( "Exactly one workspace pool needs to be provided for the U55 cascader" ) memory_pressure = _calculate_memory_pressure(mod) sram = extract_memory_info(workspace_memory_pools.pools[0], memory_pressure) tir_mod = LowerToTIR( _ethos_u55_cascader(sram, util.is_striping_enabled()))(mod) else: tir_mod = LowerToTIR(copy_constants())(mod) return tir_mod
def transform_module(self, mod: tvm.ir.IRModule, _) -> tvm.IRModule: """Visit relay nodes in the given module. Parameters ---------- func : tvm.relay.function.Function The function to apply the optimization pass for multiple LUTs to. mod : tvm.IRModule The module to apply the optimization pass for multiple LUTs to. Returns ------- mod : tvm.IRModule New module with optimized LUTs. """ assert len(mod.functions.items()) == 1, "Module can only contain one function." global_var, func = mod.functions.items()[0] optimized_func = OptimizeLUTs().visit(func) mod.update_func(global_var, optimized_func) return mod
def relay_to_tir(self, mod: tvm.ir.IRModule) -> tvm.ir.IRModule: """ This is the hook for python-based lowering of a Relay module which lowers NPU external functions to TIR. Parameters ---------- mod : tvm.ir.IRModule This is the Relay module. Returns ------- mod : tvm.ir.IRModule The Relay module with scheduled NPU external functions. """ mod = _ffi_api.OutlineCompilerFunctions(self.target_name)(mod) for gvar, func in mod.functions.items(): if "Compiler" in func.attrs and func.attrs[ "Compiler"] == self.target_name: func = self._lower_relay_to_tir(func) func = self._lower_stir_to_nstir(func) mod.update_func(gvar, func) return mod
def relay_to_tir(mod: tvm.ir.IRModule) -> tvm.ir.IRModule: """ This is the hook for python-based lowering of a Relay module which lowers NPU external functions to TIR. Parameters ---------- mod : tvm.ir.IRModule This is the Relay module. Returns ------- mod : tvm.ir.IRModule The Relay module with scheduled NPU external functions. """ mod = OutlineCompilerFunctions("ethos-u")(mod) mod = LegalizeEthosU()(mod) mod = LUTsOptimizer()(mod) mod = relay.transform.InferType()(mod) mod = IdentityOptimizer()(mod) mod = LayoutOptimizer()(mod) mod = relay.transform.InferType()(mod) device_contexts = { gv: "ethos-u" for gv, _ in filter(lambda x: util.is_npu_func(x[1]), mod.functions.items()) } mod = mod.with_attr("device_contexts", device_contexts) # We are currently using copy_constants scheduler In the long run, # this should be a single intelligent and a composite scheduler # that can perform scheduling based on user inputs such as # scratch memory size. mod = LowerToTIR(copy_constants)(mod) return mod