コード例 #1
0
ファイル: codegen.py プロジェクト: chenghanpeng/tvm
def _ethos_u55_cascader(sram, enable_striping) -> Callable:
    # TODO(ekalda): Extract the flash info from ConstantPools once it is implemented
    flash = MemoryRegion(name="FLASH",
                         size=10**7,
                         read_bandwidth=4,
                         write_bandwidth=4)

    device_config = EthosuDeviceConfig(util.get_accelerator_config())
    cascader_options = CascaderOptions(
        cascade_region=sram,
        max_proposals=64,
        stripe_factors=5,
        max_plan_size=10,
        always_copy_size=1024,
        max_open_plans=8,
        max_closed_plans=32,
        enable_striping=enable_striping,
    )
    return _create_cascader(
        options=cascader_options,
        io_region=sram,
        constant_region=flash,
        working_regions=[sram],
        device_config=device_config,
    )
コード例 #2
0
def test_compiler_attr_default():
    default_config = {
        "accelerator_config": "ethos-u55-256",
    }
    with tvm.transform.PassContext(opt_level=3):
        with tvm.target.Target("c -device=micro_dev"):
            assert util.get_accelerator_config(
            ) == default_config["accelerator_config"]
コード例 #3
0
ファイル: codegen.py プロジェクト: chenghanpeng/tvm
def relay_to_tir(mod: tvm.ir.IRModule) -> tvm.ir.IRModule:
    """
    This is the hook for python-based lowering of a Relay module which lowers NPU
    external functions to TIR.

    Parameters
    ----------
    mod : tvm.ir.IRModule
        This is the Relay module.

    Returns
    -------
    mod : tvm.ir.IRModule
        The Relay module with scheduled NPU external functions.
    """
    mod = OutlineCompilerFunctions("ethos-u")(mod)
    mod = LegalizeEthosU()(mod)
    mod = LUTsOptimizer()(mod)
    mod = relay.transform.InferType()(mod)
    mod = IdentityOptimizer()(mod)
    mod = LayoutOptimizer()(mod)
    mod = relay.transform.InferType()(mod)

    device_contexts = {
        gv: "ethos-u"
        for gv, _ in filter(lambda x: util.is_npu_func(x[1]),
                            mod.functions.items())
    }
    mod = mod.with_attr("device_contexts", device_contexts)

    # Use the cascader if it is enabled for the U55 accelerator, otherwise use copy_constants
    # scheduler
    if util.is_cascader_enabled():
        if util.get_accelerator_config() == "ethos-u65-256":
            raise ValueError(
                "Cascading is not supported for the U65 accelerator")

        workspace_memory_pools = mod.attrs["workspace_memory_pools"]

        if not workspace_memory_pools:
            raise ValueError(
                "Workspace memory pool needs to be provided for the U55 cascader"
            )
        if len(workspace_memory_pools.pools) != 1:
            raise ValueError(
                "Exactly one workspace pool needs to be provided for the U55 cascader"
            )

        memory_pressure = _calculate_memory_pressure(mod)
        sram = extract_memory_info(workspace_memory_pools.pools[0],
                                   memory_pressure)
        tir_mod = LowerToTIR(
            _ethos_u55_cascader(sram, util.is_striping_enabled()))(mod)
    else:
        tir_mod = LowerToTIR(copy_constants())(mod)

    return tir_mod
コード例 #4
0
def test_compiler_attr():
    config = {
        "accelerator_config": "ethos-u55-32",
    }
    with tvm.transform.PassContext(opt_level=3,
                                   config={"relay.ext.ethosu.options":
                                           config}):
        with tvm.target.Target("c -device=micro_dev"):
            assert util.get_accelerator_config(
            ) == config["accelerator_config"]
コード例 #5
0
ファイル: vela_api.py プロジェクト: stjordanis/tvm
def get_target_accel_type():
    """This is a helper function to convert cli accelerator type str argument
    to NpuAccelerator"""
    npu_accel_str_map = {
        "ethos-u55-256": vapi.NpuAccelerator.Ethos_U55_256,
        "ethos-u55-128": vapi.NpuAccelerator.Ethos_U55_128,
        "ethos-u55-64": vapi.NpuAccelerator.Ethos_U55_64,
        "ethos-u55-32": vapi.NpuAccelerator.Ethos_U55_32,
    }
    accel_type_str = util.get_accelerator_config()
    assert accel_type_str in npu_accel_str_map.keys(
    ), f"{accel_type_str} is not supported"
    return npu_accel_str_map[accel_type_str]
コード例 #6
0
def assign_addresses(buffer_info, npu_ops, scratch_region_map):
    """This function will assign addresses to tensors
    within two buffers : scratch and constants.
    The scratch is the buffer created to hold all intermediary data
    The constants is the buffer created via unifying all the constant data
    (post-encoding).
    Parameters
    ----------
    buffer_info : dict
        This is the dictionary obtained via calling extract_buffer_info.
        The key is the buffer name to BufferInfo
    npu_ops : list
        A list of Vela NpuOps with tir.BufferLoads for addresses
        A list of Vela NpuOps with tir.Loads for addresses
    scratch_region_map : Dict[tvm.tir.Var, RegionOffset]
        A buffer_var to region and offset map.
    Returns
    -------
    npu_ops : list
        A list of Vela NpuOps with addesses within scratch and constant buffers
    constant_tensor : NDArray
        A unified constant data array of uint8 as the constant buffer
    """
    def replace_npu_fm_with_address(npu_fm):
        assert isinstance(npu_fm.tiles.addresses[0], tvm.tir.BufferLoad)
        buffer = npu_fm.tiles.addresses[0].buffer.data
        if buffer in scratch_region_map.keys():
            address = scratch_region_map[buffer].offset
            region = scratch_region_map[buffer].region
        else:
            assert buffer in buffer_addresses.keys()
            address, buffer_type = buffer_addresses[buffer]
            region = _get_region(buffer_type)
        assert (len(npu_fm.tiles.addresses[0].indices) == 1
                ), "Ethos-U translation expects flattened buffers"
        index = npu_fm.tiles.addresses[0].indices[0] * (
            np.iinfo(np.dtype(npu_fm.tiles.addresses[0])).bits // 8)
        npu_fm.tiles.addresses[0] = address + int(index)
        npu_fm.tiles.addresses[1] = (address if isinstance(
            npu_fm.tiles.addresses[1], tvm.tir.BufferLoad) else 0)
        npu_fm.tiles.addresses[2] = (address if isinstance(
            npu_fm.tiles.addresses[2], tvm.tir.BufferLoad) else 0)
        npu_fm.tiles.addresses[3] = 0
        npu_fm.region = region
        return npu_fm

    def replace_npu_address_range_with_address(npu_addr_range):
        assert isinstance(npu_addr_range.address, tvm.tir.BufferLoad)
        buffer = npu_addr_range.address.buffer.data
        index = int(npu_addr_range.address.indices[0] *
                    (np.iinfo(np.dtype(npu_addr_range.address)).bits // 8))
        if buffer in scratch_region_map.keys():
            return vapi.NpuAddressRange(
                scratch_region_map[buffer].region,
                scratch_region_map[buffer].offset + index,
                npu_addr_range.length,
            )
        assert buffer in buffer_addresses.keys(
        ), f"searching for buffer : {buffer}, but not found"
        address, buffer_type = buffer_addresses[buffer]
        address = address + int(npu_addr_range.address.indices[0].value)
        return vapi.NpuAddressRange(_get_region(buffer_type), address,
                                    npu_addr_range.length)

    def replace_tir_loads(npu_object):
        if isinstance(npu_object, vapi.NpuFeatureMap):
            return replace_npu_fm_with_address(npu_object)
        if isinstance(npu_object, vapi.NpuAddressRange):
            return replace_npu_address_range_with_address(npu_object)
        return npu_object

    def classify_io(buffer):
        for _npu_op in npu_ops:
            if issubclass(type(_npu_op), vapi.NpuBlockOperation):
                if _npu_op.ifm and _npu_op.ifm.tiles.addresses[
                        0].buffer.data == buffer:
                    return BufferType.input
                if _npu_op.ifm2 and _npu_op.ifm2.tiles.addresses[
                        0].buffer.data == buffer:
                    return BufferType.input
                if _npu_op.ofm and _npu_op.ofm.tiles.addresses[
                        0].buffer.data == buffer:
                    return BufferType.output

        raise ValueError(f"Unused IO : {buffer} in tir module.")

    constant_hex_data = []
    total_constant_len = 0
    buffer_addresses = dict()
    for _buffer, info in buffer_info.items():
        if info.values is not None:
            assert info.btype == BufferType.constant
            assert len(info.shape) == 1
            buffer_addresses[_buffer] = ((total_constant_len,
                                          info.btype) if constant_hex_data else
                                         (0, info.btype))
            dtype_bytes = np.iinfo(np.dtype(info.dtype)).bits // 8
            size_in_bytes = dtype_bytes * np.prod(list(info.shape))
            # Every memory address the NPU access have to be 16 byte aligned
            size_in_bytes = util.round_up(size_in_bytes, 16)
            constant_tensor = np.resize(info.values,
                                        size_in_bytes // dtype_bytes)
            constant_tensor = constant_tensor.tobytes().hex()
            constant_hex_data.append(constant_tensor)
            total_constant_len += len(constant_tensor) // 2
        else:
            if info.btype == BufferType.input_or_output or info.btype == BufferType.input:
                buffer_type = info.btype
                if info.btype == BufferType.input_or_output:
                    buffer_type = classify_io(_buffer)
                assert buffer_type in (BufferType.input, BufferType.output)
                address = 0
                buffer_addresses[_buffer] = (address, buffer_type)
                buffer_info[_buffer] = BufferInfo(values=None,
                                                  shape=info.dtype,
                                                  dtype=info.dtype,
                                                  btype=buffer_type)
            elif info.btype == BufferType.shram:
                accl_config = util.get_accelerator_config()
                arch_config = get_accelerator_arch_config(accl_config)
                address = arch_config.lut_start_address
                buffer_addresses[_buffer] = (address, info.btype)
            else:
                # These buffer_vars are already updated in scratch_region_map
                assert info.btype == BufferType.scratch

    for npu_op in npu_ops:
        for attr_name, attr in npu_op.__dict__.items():
            if isinstance(attr, list):
                new_attr = list()
                for attr_ in attr:
                    new_attr.append(replace_tir_loads(attr_))
                setattr(npu_op, attr_name, new_attr)
            else:
                setattr(npu_op, attr_name, replace_tir_loads(attr))

    constant_data = "".join(constant_hex_data)
    return (npu_ops, constant_data)
コード例 #7
0
def assign_addresses(buffer_info, npu_ops):
    """This function will assign addresses to tensors
    within two buffers : scratch and constants.
    The scratch is the buffer created to hold all intermediary data
    The constants is the buffer created via unifying all the constant data
    (post-encoding).
    Parameters
    ----------
    buffer_info : dict
        This is the dictionary obtained via calling extract_buffer_info.
        The key is the buffer name to BufferInfo
    npu_ops : list
        A list of Vela NpuOps with tir.Loads for addresses
    Returns
    -------
    npu_ops : list
        A list of Vela NpuOps with addesses within scratch and constant buffers
    constant_tensor : NDArray
        A unified constant data array of uint8 as the constant buffer
    scratch_size : int
        The size of the scratch tensor.
    """
    def replace_npu_fm_with_address(npu_fm):
        assert isinstance(npu_fm.tiles.addresses[0], tvm.tir.Load)
        # We currently does not support tiles
        # Change this when tiles are needed
        # (i.e. when using rolling buffers)
        assert npu_fm.tiles.addresses[1:] == [0, 0, 0]
        npu_fm.tiles.addresses[1:] = [0, 0, 0]
        buffer = npu_fm.tiles.addresses[0].buffer_var
        assert buffer in buffer_addresses.keys()
        address, buffer_type = buffer_addresses[buffer]
        index = npu_fm.tiles.addresses[0].index * (
            np.iinfo(np.dtype(npu_fm.tiles.addresses[0])).bits // 8)
        npu_fm.tiles.addresses[0] = address + int(index)
        npu_fm.region = _REGION_MAP[buffer_type]
        return npu_fm

    def replace_npu_address_range_with_address(npu_addr_range):
        assert isinstance(npu_addr_range.address, tvm.tir.Load)
        buffer = npu_addr_range.address.buffer_var
        assert buffer in buffer_addresses.keys(
        ), f"searching for buffer : {buffer}, but not found"
        address, buffer_type = buffer_addresses[buffer]
        return vapi.NpuAddressRange(_REGION_MAP[buffer_type], address,
                                    npu_addr_range.length)

    def replace_tir_loads(npu_object):
        if isinstance(npu_object, vapi.NpuFeatureMap):
            return replace_npu_fm_with_address(npu_object)
        if isinstance(npu_object, vapi.NpuAddressRange):
            return replace_npu_address_range_with_address(npu_object)
        return npu_object

    def classify_io(buffer):
        for _npu_op in npu_ops:
            if issubclass(type(_npu_op), vapi.NpuBlockOperation):
                if _npu_op.ifm and _npu_op.ifm.tiles.addresses[
                        0].buffer_var == buffer:
                    return BufferType.input
                if _npu_op.ifm2 and _npu_op.ifm2.tiles.addresses[
                        0].buffer_var == buffer:
                    return BufferType.input
                if _npu_op.ofm and _npu_op.ofm.tiles.addresses[
                        0].buffer_var == buffer:
                    return BufferType.output

        raise ValueError(f"Unused IO : {buffer} in tir module.")

    scratch_size = 0
    constant_tensor = None
    buffer_addresses = dict()
    for _buffer, info in buffer_info.items():
        if info.values is not None:
            assert np.dtype(info.dtype) == np.uint8
            assert info.btype == BufferType.constant
            assert len(info.shape) == 1
            if constant_tensor is None:
                buffer_addresses[_buffer] = (0, info.btype)
                assert info.values.dtype == np.uint8
                size_in_bytes = info.values.size
                # Every memory address the NPU access have to be 16 byte aligned
                size_in_bytes = util.round_up(size_in_bytes, 16)
                constant_tensor = np.resize(info.values, size_in_bytes)
            else:
                buffer_addresses[_buffer] = (constant_tensor.size, info.btype)
                assert info.values.dtype == np.uint8
                size_in_bytes = info.values.size
                # Every memory address the NPU access have to be 16 byte aligned
                size_in_bytes = util.round_up(size_in_bytes, 16)
                constant_tensor = np.append(
                    constant_tensor, np.resize(info.values, size_in_bytes))
        else:
            if info.btype == BufferType.input_or_output:
                buffer_type = classify_io(_buffer)
                assert buffer_type in (BufferType.input, BufferType.output)
                address = 0
                buffer_addresses[_buffer] = (address, buffer_type)
            elif info.btype == BufferType.shram:
                accl_config = util.get_accelerator_config()
                arch_config = get_accelerator_arch_config(accl_config)
                address = arch_config.lut_start_address
                buffer_addresses[_buffer] = (address, info.btype)
            else:
                size_in_bytes = int(
                    (np.iinfo(np.dtype(info.dtype)).bits // 8) *
                    np.prod(list(info.shape)))
                # Every memory address the NPU access have to be 16 byte aligned
                size_in_bytes = util.round_up(size_in_bytes, 16)
                assert info.btype == BufferType.scratch
                address = scratch_size
                scratch_size += size_in_bytes
                buffer_addresses[_buffer] = (address, info.btype)

    for npu_op in npu_ops:
        for attr_name, attr in npu_op.__dict__.items():
            if isinstance(attr, list):
                new_attr = list()
                for attr_ in attr:
                    new_attr.append(replace_tir_loads(attr_))
                setattr(npu_op, attr_name, new_attr)
            else:
                setattr(npu_op, attr_name, replace_tir_loads(attr))

    return npu_ops, constant_tensor, scratch_size