def translate(tir_module, params): """This will take an tir module for the NPU and compile to command stream Parameters ---------- tir_module : tvm.IRModule The TIR module containing ethosu extern calls params : dict A dictionary containing TIR primfunc argument ordering idx to constant NDArray map accel_type : ethosu.vela.api.NpuAccelerator the accelerator variant the tir module needs to compiled to Returns ------- cs : str An hex string of the bytes of command stream encoded_constants : str An hex string of the bytes that includes concat'd encoded weights, encoded biases and scales. base_addresses : List[util.BaseAddress] base addresses to be used by the driver """ # The NPU has 6 usable regions ranging from 0-6 # The regions 0, 3, and 4 is already used for input, # output and constant, respectively (See _get_regions()). # Thus, for scratch we are left with 5, 2 and 1. candidate_regions_for_scratch = [5, 2, 1] ( scratch_region_map, dynamic_allocation_size, dynamic_allocation_region, ) = analyze_scratch_memory_acesses(tir_module, candidate_regions_for_scratch) buffer_info = extract_buffer_info(tir_module, params) call_extern_list = extract_call_extern_list(tir_module) _npu_ops = list() for call_extern in call_extern_list: _npu_ops.append(translate_ethosu_tir_call_extern(call_extern)) _npu_ops, constant_data = assign_addresses(buffer_info, _npu_ops, scratch_region_map) base_addresses = extract_param_base_addresses(tir_module, buffer_info, scratch_region_map) if dynamic_allocation_size: base_addresses.append( util.BaseAddress( name="dynamic_allocation", primfunc_param_idx=None, region=dynamic_allocation_region, size=dynamic_allocation_size, is_runtime_allocation=True, )) target_accel_config = vela_api.get_accelerator_config() cmds = vapi.npu_generate_register_command_stream(_npu_ops, target_accel_config) payload = vapi.npu_create_driver_payload(cmds, target_accel_config) return payload.hex(), constant_data, base_addresses
def test_create_driver_payload(accelerator: NpuAccelerator): """Tests npu_create_driver_payload""" # Generate a random command stream with defined beginning and end random.seed(0) num_commands = 793 register_command_stream = random.choices(range(1 << 32), k=num_commands) register_command_stream[0] = 0xFEDCBA98 register_command_stream[-1] = 0xA0B1C2D3 payload = npu_create_driver_payload(register_command_stream, accelerator) header_size = 32 # expected driver header size in bytes assert len(payload) == header_size + 4 * num_commands # Check that the first register command is located directly after the header assert list(payload[header_size:header_size + 4]) == [0x98, 0xBA, 0xDC, 0xFE] # Check that the last register command is present in the payload assert list(payload[-4:]) == [0xD3, 0xC2, 0xB1, 0xA0]
def translate(tir_module, params): """This will take an tir module for the NPU and compile to command stream Parameters ---------- tir_module : tvm.IRModule The TIR module containing ethosu extern calls params : dict A dictionary containing TIR primfunc argument ordering idx to constant NDArray map accel_type : ethosu.vela.api.NpuAccelerator the accelerator variant the tir module needs to compiled to Returns ------- cs : str An hex string of the bytes of command stream encoded_constants : str An hex string of the bytes that includes concat'd encoded weights, encoded biases and scales. base_addresses : List[util.BaseAddress] base addresses to be used by the driver """ buffer_info = extract_buffer_info(tir_module, params) call_extern_list = extract_call_extern_list(tir_module) _npu_ops = list() for call_extern in call_extern_list: _npu_ops.append(translate_ethosu_tir_call_extern(call_extern)) _npu_ops, constant_data, scratch_size = assign_addresses(buffer_info, _npu_ops) base_addresses = extract_param_base_addresses(tir_module, buffer_info) if scratch_size > 0: base_addresses.append( util.BaseAddress( "scratch", None, _REGION_MAP[BufferType.scratch], scratch_size, True, ) ) target_accel_config = vela_api.get_accelerator_config() cmds = vapi.npu_generate_register_command_stream(_npu_ops, target_accel_config) payload = vapi.npu_create_driver_payload(cmds, target_accel_config) return payload.hex(), constant_data, base_addresses
def translate(tir_module, params): """This will take an tir module for the NPU and compile to command stream Parameters ---------- tir_module : tvm.IRModule The TIR module containing ethosu extern calls params : dict A dictionary containing TIR primfunc argument ordering idx to constant NDArray map accel_type : ethosu.vela.api.NpuAccelerator the accelerator variant the tir module needs to compiled to Returns ------- cs : str An hex string of the bytes of command stream encoded_constants : str An hex string of the bytes that includes concat'd encoded weights, encoded biases and scales. scratch_size : int The size of the scratch buffer needed. """ buffer_info = extract_buffer_info(tir_module, params) call_extern_list = extract_call_extern_list(tir_module) _npu_ops = list() for call_extern in call_extern_list: _npu_ops.append(translate_ethosu_tir_call_extern(call_extern)) _npu_ops, constant_tensor, scratch_size = assign_addresses( buffer_info, _npu_ops) target_accel_config = vela_api.get_accelerator_config() cmds = vapi.npu_generate_register_command_stream(_npu_ops, target_accel_config) payload = vapi.npu_create_driver_payload(cmds, target_accel_config) hex_value = "" if constant_tensor is None else constant_tensor.tobytes( ).hex() return payload.hex(), hex_value, scratch_size