def translate(tir_module, params): """This will take an tir module for the NPU and compile to command stream Parameters ---------- tir_module : tvm.IRModule The TIR module containing ethosu extern calls params : dict A dictionary containing TIR primfunc argument ordering idx to constant NDArray map accel_type : ethosu.vela.api.NpuAccelerator the accelerator variant the tir module needs to compiled to Returns ------- cs : str An hex string of the bytes of command stream encoded_constants : str An hex string of the bytes that includes concat'd encoded weights, encoded biases and scales. base_addresses : List[util.BaseAddress] base addresses to be used by the driver """ # The NPU has 6 usable regions ranging from 0-6 # The regions 0, 3, and 4 is already used for input, # output and constant, respectively (See _get_regions()). # Thus, for scratch we are left with 5, 2 and 1. candidate_regions_for_scratch = [5, 2, 1] ( scratch_region_map, dynamic_allocation_size, dynamic_allocation_region, ) = analyze_scratch_memory_acesses(tir_module, candidate_regions_for_scratch) buffer_info = extract_buffer_info(tir_module, params) call_extern_list = extract_call_extern_list(tir_module) _npu_ops = list() for call_extern in call_extern_list: _npu_ops.append(translate_ethosu_tir_call_extern(call_extern)) _npu_ops, constant_data = assign_addresses(buffer_info, _npu_ops, scratch_region_map) base_addresses = extract_param_base_addresses(tir_module, buffer_info, scratch_region_map) if dynamic_allocation_size: base_addresses.append( util.BaseAddress( name="dynamic_allocation", primfunc_param_idx=None, region=dynamic_allocation_region, size=dynamic_allocation_size, is_runtime_allocation=True, )) target_accel_config = vela_api.get_accelerator_config() cmds = vapi.npu_generate_register_command_stream(_npu_ops, target_accel_config) payload = vapi.npu_create_driver_payload(cmds, target_accel_config) return payload.hex(), constant_data, base_addresses
def translate(tir_module, params): """This will take an tir module for the NPU and compile to command stream Parameters ---------- tir_module : tvm.IRModule The TIR module containing ethosu extern calls params : dict A dictionary containing TIR primfunc argument ordering idx to constant NDArray map accel_type : ethosu.vela.api.NpuAccelerator the accelerator variant the tir module needs to compiled to Returns ------- cs : str An hex string of the bytes of command stream encoded_constants : str An hex string of the bytes that includes concat'd encoded weights, encoded biases and scales. base_addresses : List[util.BaseAddress] base addresses to be used by the driver """ buffer_info = extract_buffer_info(tir_module, params) call_extern_list = extract_call_extern_list(tir_module) _npu_ops = list() for call_extern in call_extern_list: _npu_ops.append(translate_ethosu_tir_call_extern(call_extern)) _npu_ops, constant_data, scratch_size = assign_addresses(buffer_info, _npu_ops) base_addresses = extract_param_base_addresses(tir_module, buffer_info) if scratch_size > 0: base_addresses.append( util.BaseAddress( "scratch", None, _REGION_MAP[BufferType.scratch], scratch_size, True, ) ) target_accel_config = vela_api.get_accelerator_config() cmds = vapi.npu_generate_register_command_stream(_npu_ops, target_accel_config) payload = vapi.npu_create_driver_payload(cmds, target_accel_config) return payload.hex(), constant_data, base_addresses
def extract_param_base_addresses(mod, buffer_info, scratch_region_map) -> List[util.BaseAddress]: """This function extracts base addresses to be used by the driver Parameters ---------- mod : tvm.IRModule The TIR Module for NPU buffer_info : Dict[tvm.tir.Var, BufferInfo] Information regarding buffer vars used in the PrimFunc Returns ------- List[util.BaseAddress] base addresses to be used by the driver """ # There should only be a single function assert len(mod.functions.items()) == 1 primfunc = mod.functions.items()[0][1] base_addresses = list() idx = 0 for param in primfunc.params: # constants are pooled together and handled specially # this will change after tir.allocate_const. # For now, we are skipping generating buffer addresses here if buffer_info[param].btype == BufferType.constant: continue buffer = primfunc.buffer_map[param] dtype = buffer.dtype element_size_bytes = np.iinfo(dtype).bits // 8 size_bytes = element_size_bytes * np.prod(list(buffer.shape)) base_addresses.append( util.BaseAddress( param.name, idx, _get_region(buffer_info[param].btype, param, scratch_region_map), size_bytes, )) idx += 1 return base_addresses