Beispiel #1
0
    def add(self,
            name: str,
            dtype: DefinedType,
            ctype: str,
            ancestor: int = 0,
            allow_shadowing: bool = False):
        if not isinstance(name, str):
            raise TypeError('Variable name type cannot be %s' %
                            type(name).__name__)

        for _, scope, can_access_parent in reversed(self._scopes):
            if name in scope:
                err_str = "Shadowing variable {} from type {} to {}".format(
                    name, scope[name], dtype)
                if (allow_shadowing or config.Config.get_bool(
                        "compiler", "allow_shadowing")):
                    if not allow_shadowing:
                        print("WARNING: " + err_str)
                else:
                    raise cgx.CodegenError(err_str)
            if not can_access_parent:
                break
        self._scopes[-1 - ancestor][1][name] = (dtype, ctype)
Beispiel #2
0
    def get_generated_codeobjects(self):

        execution_mode = Config.get("compiler", "xilinx", "mode")

        kernel_file_name = "DACE_BINARY_DIR \"/{}".format(self._program_name)
        if execution_mode == "software_emulation":
            kernel_file_name += "_sw_emu.xclbin\""
            xcl_emulation_mode = "\"sw_emu\""
            xilinx_sdx = "DACE_VITIS_DIR"
        elif execution_mode == "hardware_emulation":
            kernel_file_name += "_hw_emu.xclbin\""
            xcl_emulation_mode = "\"hw_emu\""
            xilinx_sdx = "DACE_VITIS_DIR"
        elif execution_mode == "hardware" or execution_mode == "simulation":
            kernel_file_name += "_hw.xclbin\""
            xcl_emulation_mode = None
            xilinx_sdx = None
        else:
            raise cgx.CodegenError(
                "Unknown Xilinx execution mode: {}".format(execution_mode))

        set_env_vars = ""
        set_str = "dace::set_environment_variable(\"{}\", {});\n"
        unset_str = "dace::unset_environment_variable(\"{}\");\n"
        set_env_vars += (set_str.format("XCL_EMULATION_MODE",
                                        xcl_emulation_mode)
                         if xcl_emulation_mode is not None else
                         unset_str.format("XCL_EMULATION_MODE"))
        set_env_vars += (set_str.format("XILINX_SDX", xilinx_sdx) if xilinx_sdx
                         is not None else unset_str.format("XILINX_SDX"))
        set_env_vars += set_str.format(
            "EMCONFIG_PATH", "DACE_BINARY_DIR"
        ) if execution_mode == 'hardware_emulation' else unset_str.format(
            "EMCONFIG_PATH")

        host_code = CodeIOStream()
        host_code.write("""\
#include "dace/xilinx/host.h"
#include "dace/dace.h"
""")
        if len(self._dispatcher.instrumentation) > 1:
            host_code.write("""\
#include "dace/perf/reporting.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <limits>
""")
        host_code.write("\n\n")

        self._frame.generate_fileheader(self._global_sdfg, host_code,
                                        'xilinx_host')

        params_comma = self._global_sdfg.signature(with_arrays=False)
        if params_comma:
            params_comma = ', ' + params_comma

        host_code.write("""
DACE_EXPORTED int __dace_init_xilinx({sdfg.name}_t *__state{signature}) {{
    {environment_variables}

    __state->fpga_context = new dace::fpga::Context();
    __state->fpga_context->Get().MakeProgram({kernel_file_name});
    return 0;
}}

DACE_EXPORTED void __dace_exit_xilinx({sdfg.name}_t *__state) {{
    delete __state->fpga_context;
}}

{host_code}""".format(signature=params_comma,
                      sdfg=self._global_sdfg,
                      environment_variables=set_env_vars,
                      kernel_file_name=kernel_file_name,
                      host_code="".join([
                          "{separator}\n// Kernel: {kernel_name}"
                          "\n{separator}\n\n{code}\n\n".format(
                              separator="/" * 79, kernel_name=name, code=code)
                          for (name, code) in self._host_codes
                      ])))

        host_code_obj = CodeObject(self._program_name,
                                   host_code.getvalue(),
                                   "cpp",
                                   XilinxCodeGen,
                                   "Xilinx",
                                   target_type="host")

        kernel_code_objs = [
            CodeObject(kernel_name,
                       code,
                       "cpp",
                       XilinxCodeGen,
                       "Xilinx",
                       target_type="device")
            for (kernel_name, code) in self._kernel_codes
        ]

        # Memory bank and streaming interfaces connectivity configuration file
        link_cfg = CodeIOStream()
        self._other_codes["link.cfg"] = link_cfg
        link_cfg.write("[connectivity]")
        are_assigned = [v is not None for v in self._bank_assignments.values()]
        if any(are_assigned):
            if not all(are_assigned):
                raise RuntimeError("Some, but not all global memory arrays "
                                   "were assigned to memory banks: {}".format(
                                       self._bank_assignments))
            # Emit mapping from kernel memory interfaces to DRAM banks
            for (kernel_name, interface_name), (
                    memory_type,
                    memory_bank) in self._bank_assignments.items():
                link_cfg.write(
                    f"sp={kernel_name}_1.m_axi_{interface_name}:{memory_type}[{memory_bank}]"
                )
        # Emit mapping between inter-kernel streaming interfaces
        for _, (src, dst) in self._stream_connections.items():
            link_cfg.write(f"stream_connect={src}:{dst}")

        other_objs = []
        for name, code in self._other_codes.items():
            name = name.split(".")
            other_objs.append(
                CodeObject(name[0],
                           code.getvalue(),
                           ".".join(name[1:]),
                           XilinxCodeGen,
                           "Xilinx",
                           target_type="device"))

        return [host_code_obj] + kernel_code_objs + other_objs
Beispiel #3
0
    def get_generated_codeobjects(self):

        execution_mode = Config.get("compiler", "xilinx", "mode")

        kernel_file_name = "DACE_BINARY_DIR \"/{}".format(self._program_name)
        if execution_mode == "software_emulation":
            kernel_file_name += "_sw_emu.xclbin\""
            xcl_emulation_mode = "\"sw_emu\""
            xilinx_sdx = "DACE_VITIS_DIR"
        elif execution_mode == "hardware_emulation":
            kernel_file_name += "_hw_emu.xclbin\""
            xcl_emulation_mode = "\"hw_emu\""
            xilinx_sdx = "DACE_VITIS_DIR"
        elif execution_mode == "hardware" or execution_mode == "simulation":
            kernel_file_name += "_hw.xclbin\""
            xcl_emulation_mode = None
            xilinx_sdx = None
        else:
            raise cgx.CodegenError(
                "Unknown Xilinx execution mode: {}".format(execution_mode))

        set_env_vars = ""
        set_str = "dace::set_environment_variable(\"{}\", {});\n"
        unset_str = "dace::unset_environment_variable(\"{}\");\n"
        set_env_vars += (set_str.format("XCL_EMULATION_MODE",
                                        xcl_emulation_mode)
                         if xcl_emulation_mode is not None else
                         unset_str.format("XCL_EMULATION_MODE"))
        set_env_vars += (set_str.format("XILINX_SDX", xilinx_sdx) if xilinx_sdx
                         is not None else unset_str.format("XILINX_SDX"))

        host_code = CodeIOStream()
        host_code.write("""\
#include "dace/xilinx/host.h"
#include "dace/dace.h"
#include <iostream>\n\n""")

        self._frame.generate_fileheader(self._global_sdfg, host_code,
                                        'xilinx_host')

        params_comma = self._global_sdfg.signature(with_arrays=False)
        if params_comma:
            params_comma = ', ' + params_comma

        host_code.write("""
DACE_EXPORTED int __dace_init_xilinx({sdfg.name}_t *__state{signature}) {{
    {environment_variables}

    __state->fpga_context = new dace::fpga::Context();
    __state->fpga_context->Get().MakeProgram({kernel_file_name});
    return 0;
}}

DACE_EXPORTED void __dace_exit_xilinx({sdfg.name}_t *__state) {{
    delete __state->fpga_context;
}}

{host_code}""".format(signature=params_comma,
                      sdfg=self._global_sdfg,
                      environment_variables=set_env_vars,
                      kernel_file_name=kernel_file_name,
                      host_code="".join([
                          "{separator}\n// Kernel: {kernel_name}"
                          "\n{separator}\n\n{code}\n\n".format(
                              separator="/" * 79, kernel_name=name, code=code)
                          for (name, code) in self._host_codes
                      ])))

        host_code_obj = CodeObject(self._program_name,
                                   host_code.getvalue(),
                                   "cpp",
                                   XilinxCodeGen,
                                   "Xilinx",
                                   target_type="host")

        kernel_code_objs = [
            CodeObject(kernel_name,
                       code,
                       "cpp",
                       XilinxCodeGen,
                       "Xilinx",
                       target_type="device")
            for (kernel_name, code) in self._kernel_codes
        ]

        # Configuration file with interface assignments
        are_assigned = [
            v is not None for v in self._interface_assignments.values()
        ]
        bank_assignment_code = []
        if any(are_assigned):
            if not all(are_assigned):
                raise RuntimeError("Some, but not all global memory arrays "
                                   "were assigned to memory banks: {}".format(
                                       self._interface_assignments))
            are_assigned = True
        else:
            are_assigned = False
        for name, _ in self._host_codes:
            # Only iterate over assignments if any exist
            if are_assigned:
                for (kernel_name, interface_name), (
                        memory_type,
                        memory_bank) in self._interface_assignments.items():
                    if kernel_name != name:
                        continue
                    bank_assignment_code.append("{},{},{}".format(
                        interface_name, memory_type.name, memory_bank))
            # Create file even if there are no assignments
            kernel_code_objs.append(
                CodeObject("{}_memory_interfaces".format(name),
                           "\n".join(bank_assignment_code),
                           "csv",
                           XilinxCodeGen,
                           "Xilinx",
                           target_type="device"))

        return [host_code_obj] + kernel_code_objs
Beispiel #4
0
    def generate_module(self, sdfg, state, kernel_name, name, subgraph,
                        parameters, module_stream, entry_stream, host_stream,
                        instrumentation_stream):
        """Generates a module that will run as a dataflow function in the FPGA
           kernel."""

        state_id = sdfg.node_id(state)
        dfg = sdfg.nodes()[state_id]

        kernel_args_call = []
        kernel_args_module = []
        for is_output, pname, p, interface_ids in parameters:
            if isinstance(p, dt.Array):
                for bank, interface_id in fpga.iterate_hbm_interface_ids(
                        p, interface_ids):
                    arr_name = fpga.fpga_ptr(pname,
                                             p,
                                             sdfg,
                                             bank,
                                             is_output,
                                             is_array_interface=True)
                    # Add interface ID to called module, but not to the module
                    # arguments
                    argname = fpga.fpga_ptr(pname,
                                            p,
                                            sdfg,
                                            bank,
                                            is_output,
                                            is_array_interface=True,
                                            interface_id=interface_id)

                    kernel_args_call.append(argname)
                    dtype = p.dtype
                    kernel_args_module.append("{} {}*{}".format(
                        dtype.ctype, "const " if not is_output else "",
                        arr_name))
            else:
                if isinstance(p, dt.Stream):
                    kernel_args_call.append(
                        p.as_arg(with_types=False, name=pname))
                    if p.is_stream_array():
                        kernel_args_module.append(
                            "dace::FIFO<{}, {}, {}> {}[{}]".format(
                                p.dtype.base_type.ctype, p.veclen,
                                p.buffer_size, pname, p.size_string()))
                    else:
                        kernel_args_module.append(
                            "dace::FIFO<{}, {}, {}> &{}".format(
                                p.dtype.base_type.ctype, p.veclen,
                                p.buffer_size, pname))
                else:
                    kernel_args_call.append(
                        p.as_arg(with_types=False, name=pname))
                    kernel_args_module.append(
                        p.as_arg(with_types=True, name=pname))

        # Check if we are generating an RTL module, in which case only the
        # accesses to the streams should be handled
        rtl_tasklet = None
        for n in subgraph.nodes():
            if (isinstance(n, dace.nodes.Tasklet)
                    and n.language == dace.dtypes.Language.SystemVerilog):
                rtl_tasklet = n
                break
        if rtl_tasklet:
            entry_stream.write(
                f'// [RTL] HLSLIB_DATAFLOW_FUNCTION({name}, {", ".join(kernel_args_call)});'
            )
            module_stream.write(
                f'// [RTL] void {name}({", ".join(kernel_args_module)});\n\n')

            # _1 in names are due to vitis
            for node in subgraph.source_nodes():
                if isinstance(sdfg.arrays[node.data], dt.Stream):
                    if node.data not in self._stream_connections:
                        self._stream_connections[node.data] = [None, None]
                    for edge in state.out_edges(node):
                        rtl_name = "{}_{}_{}_{}".format(
                            edge.dst, sdfg.sdfg_id, sdfg.node_id(state),
                            state.node_id(edge.dst))
                        self._stream_connections[
                            node.data][1] = '{}_top_1.s_axis_{}'.format(
                                rtl_name, edge.dst_conn)

            for node in subgraph.sink_nodes():
                if isinstance(sdfg.arrays[node.data], dt.Stream):
                    if node.data not in self._stream_connections:
                        self._stream_connections[node.data] = [None, None]
                    for edge in state.in_edges(node):
                        rtl_name = "{}_{}_{}_{}".format(
                            edge.src, sdfg.sdfg_id, sdfg.node_id(state),
                            state.node_id(edge.src))
                        self._stream_connections[
                            node.data][0] = '{}_top_1.m_axis_{}'.format(
                                rtl_name, edge.src_conn)

            # Make the dispatcher trigger generation of the RTL module, but
            # ignore the generated code, as the RTL codegen will generate the
            # appropriate files.
            ignore_stream = CodeIOStream()
            self._dispatcher.dispatch_subgraph(sdfg,
                                               subgraph,
                                               state_id,
                                               ignore_stream,
                                               ignore_stream,
                                               skip_entry_node=False)

            # Launch the kernel from the host code
            rtl_name = self.rtl_tasklet_name(rtl_tasklet, state, sdfg)
            host_stream.write(
                f"  auto kernel_{rtl_name} = program.MakeKernel(\"{rtl_name}_top\"{', '.join([''] + [name for _, name, p, _ in parameters if not isinstance(p, dt.Stream)])}).ExecuteTaskFork();",
                sdfg, state_id, rtl_tasklet)
            if state.instrument == dtypes.InstrumentationType.FPGA:
                self.instrument_opencl_kernel(rtl_name, state_id, sdfg.sdfg_id,
                                              instrumentation_stream)

            return

        # create a unique module name to prevent name clashes
        module_function_name = f"module_{name}_{sdfg.sdfg_id}"

        # Unrolling processing elements: if there first scope of the subgraph
        # is an unrolled map, generate a processing element for each iteration
        scope_children = subgraph.scope_children()
        top_scopes = [
            n for n in scope_children[None]
            if isinstance(n, dace.sdfg.nodes.EntryNode)
        ]
        unrolled_loops = 0
        if len(top_scopes) == 1:
            scope = top_scopes[0]
            if scope.unroll:
                self._unrolled_pes.add(scope.map)
                kernel_args_call += ", ".join(scope.map.params)
                kernel_args_module += ["int " + p for p in scope.params]
                for p, r in zip(scope.map.params, scope.map.range):
                    if len(r) > 3:
                        raise cgx.CodegenError("Strided unroll not supported")
                    entry_stream.write(
                        "for (size_t {param} = {begin}; {param} < {end}; "
                        "{param} += {increment}) {{\n#pragma HLS UNROLL".
                        format(param=p,
                               begin=r[0],
                               end=r[1] + 1,
                               increment=r[2]))
                    unrolled_loops += 1

        # Generate caller code in top-level function
        entry_stream.write(
            "HLSLIB_DATAFLOW_FUNCTION({}, {});".format(
                module_function_name, ", ".join(kernel_args_call)), sdfg,
            state_id)

        for _ in range(unrolled_loops):
            entry_stream.write("}")

        # ----------------------------------------------------------------------
        # Generate kernel code
        # ----------------------------------------------------------------------

        self._dispatcher.defined_vars.enter_scope(subgraph)

        module_body_stream = CodeIOStream()

        module_body_stream.write(
            "void {}({}) {{".format(module_function_name,
                                    ", ".join(kernel_args_module)), sdfg,
            state_id)

        # Register the array interface as a naked pointer for use inside the
        # FPGA kernel
        interfaces_added = set()
        for is_output, argname, arg, interface_id in parameters:
            for bank, _ in fpga.iterate_hbm_interface_ids(arg, interface_id):
                if (not (isinstance(arg, dt.Array) and arg.storage
                         == dace.dtypes.StorageType.FPGA_Global)):
                    continue
                ctype = dtypes.pointer(arg.dtype).ctype
                ptr_name = fpga.fpga_ptr(argname,
                                         arg,
                                         sdfg,
                                         bank,
                                         is_output,
                                         None,
                                         is_array_interface=True)
                if not is_output:
                    ctype = f"const {ctype}"
                self._dispatcher.defined_vars.add(ptr_name,
                                                  DefinedType.Pointer, ctype)
                if argname in interfaces_added:
                    continue
                interfaces_added.add(argname)
                self._dispatcher.defined_vars.add(argname,
                                                  DefinedType.ArrayInterface,
                                                  ctype,
                                                  allow_shadowing=True)
        module_body_stream.write("\n")

        # Allocate local transients
        data_to_allocate = (set(subgraph.top_level_transients()) -
                            set(sdfg.shared_transients()) -
                            set([p[1] for p in parameters]))
        allocated = set()
        for node in subgraph.nodes():
            if not isinstance(node, dace.sdfg.nodes.AccessNode):
                continue
            if node.data not in data_to_allocate or node.data in allocated:
                continue
            allocated.add(node.data)
            self._dispatcher.dispatch_allocate(sdfg, state, state_id, node,
                                               node.desc(sdfg), module_stream,
                                               module_body_stream)

        self._dispatcher.dispatch_subgraph(sdfg,
                                           subgraph,
                                           state_id,
                                           module_stream,
                                           module_body_stream,
                                           skip_entry_node=False)

        module_stream.write(module_body_stream.getvalue(), sdfg, state_id)
        module_stream.write("}\n\n")

        self._dispatcher.defined_vars.exit_scope(subgraph)
Beispiel #5
0
    def generate_module(self, sdfg, state, name, subgraph, parameters,
                        symbol_parameters, module_stream, entry_stream,
                        host_stream):
        """Generates a module that will run as a dataflow function in the FPGA
           kernel."""

        state_id = sdfg.node_id(state)
        dfg = sdfg.nodes()[state_id]

        kernel_args_call = []
        kernel_args_module = []
        added = set()

        parameters = list(sorted(parameters, key=lambda t: t[1]))
        arrays = dtypes.deduplicate(
            [p for p in parameters if not isinstance(p[2], dace.data.Scalar)])
        scalars = [p for p in parameters if isinstance(p[2], dace.data.Scalar)]
        scalars += ((False, k, v, None) for k, v in symbol_parameters.items())
        scalars = dace.dtypes.deduplicate(sorted(scalars, key=lambda t: t[1]))
        for is_output, pname, p, interface_id in itertools.chain(
                arrays, scalars):
            if isinstance(p, dace.data.Array):
                arr_name = "{}_{}".format(pname, "out" if is_output else "in")
                # Add interface ID to called module, but not to the module
                # arguments
                argname = arr_name
                if interface_id is not None:
                    argname = arr_name + "_%d" % interface_id

                kernel_args_call.append(argname)
                dtype = p.dtype
                kernel_args_module.append("{} {}*{}".format(
                    dtype.ctype, "const " if not is_output else "", arr_name))
            else:
                # Don't make duplicate arguments for other types than arrays
                if pname in added:
                    continue
                added.add(pname)
                if isinstance(p, dace.data.Stream):
                    kernel_args_call.append(
                        p.as_arg(with_types=False, name=pname))
                    if p.is_stream_array():
                        kernel_args_module.append(
                            "dace::FIFO<{}, {}, {}> {}[{}]".format(
                                p.dtype.base_type.ctype, p.veclen,
                                p.buffer_size, pname, p.size_string()))
                    else:
                        kernel_args_module.append(
                            "dace::FIFO<{}, {}, {}> &{}".format(
                                p.dtype.base_type.ctype, p.veclen,
                                p.buffer_size, pname))
                else:
                    kernel_args_call.append(
                        p.as_arg(with_types=False, name=pname))
                    kernel_args_module.append(
                        p.as_arg(with_types=True, name=pname))

        # create a unique module name to prevent name clashes
        module_function_name = f"module_{name}_{sdfg.sdfg_id}"

        # Unrolling processing elements: if there first scope of the subgraph
        # is an unrolled map, generate a processing element for each iteration
        scope_children = subgraph.scope_children()
        top_scopes = [
            n for n in scope_children[None]
            if isinstance(n, dace.sdfg.nodes.EntryNode)
        ]
        unrolled_loops = 0
        if len(top_scopes) == 1:
            scope = top_scopes[0]
            if scope.unroll:
                self._unrolled_pes.add(scope.map)
                kernel_args_call += ", ".join(scope.map.params)
                kernel_args_module += ["int " + p for p in scope.params]
                for p, r in zip(scope.map.params, scope.map.range):
                    if len(r) > 3:
                        raise cgx.CodegenError("Strided unroll not supported")
                    entry_stream.write(
                        "for (size_t {param} = {begin}; {param} < {end}; "
                        "{param} += {increment}) {{\n#pragma HLS UNROLL".
                        format(param=p,
                               begin=r[0],
                               end=r[1] + 1,
                               increment=r[2]))
                    unrolled_loops += 1

        # Generate caller code in top-level function
        entry_stream.write(
            "HLSLIB_DATAFLOW_FUNCTION({}, {});".format(
                module_function_name, ", ".join(kernel_args_call)), sdfg,
            state_id)

        for _ in range(unrolled_loops):
            entry_stream.write("}")

        # ----------------------------------------------------------------------
        # Generate kernel code
        # ----------------------------------------------------------------------

        self._dispatcher.defined_vars.enter_scope(subgraph)

        module_body_stream = CodeIOStream()

        module_body_stream.write(
            "void {}({}) {{".format(module_function_name,
                                    ", ".join(kernel_args_module)), sdfg,
            state_id)

        # Construct ArrayInterface wrappers to pack input and output pointers
        # to the same global array
        in_args = {
            argname
            for out, argname, arg, _ in parameters
            if isinstance(arg, dace.data.Array)
            and arg.storage == dace.dtypes.StorageType.FPGA_Global and not out
        }
        out_args = {
            argname
            for out, argname, arg, _ in parameters
            if isinstance(arg, dace.data.Array)
            and arg.storage == dace.dtypes.StorageType.FPGA_Global and out
        }
        if len(in_args) > 0 or len(out_args) > 0:
            # Add ArrayInterface objects to wrap input and output pointers to
            # the same array
            module_body_stream.write("\n")
            interfaces_added = set()
            for _, argname, arg, _ in parameters:
                if argname in interfaces_added:
                    continue
                interfaces_added.add(argname)
                has_in_ptr = argname in in_args
                has_out_ptr = argname in out_args
                if not has_in_ptr and not has_out_ptr:
                    continue
                in_ptr = ("{}_in".format(argname) if has_in_ptr else "nullptr")
                out_ptr = ("{}_out".format(argname)
                           if has_out_ptr else "nullptr")
                ctype = "dace::ArrayInterface<{}>".format(arg.dtype.ctype)
                module_body_stream.write("{} {}({}, {});".format(
                    ctype, argname, in_ptr, out_ptr))
                self._dispatcher.defined_vars.add(argname,
                                                  DefinedType.ArrayInterface,
                                                  ctype,
                                                  allow_shadowing=True)
            module_body_stream.write("\n")

        # Allocate local transients
        data_to_allocate = (set(subgraph.top_level_transients()) -
                            set(sdfg.shared_transients()) -
                            set([p[1] for p in parameters]))
        allocated = set()
        for node in subgraph.nodes():
            if not isinstance(node, dace.sdfg.nodes.AccessNode):
                continue
            if node.data not in data_to_allocate or node.data in allocated:
                continue
            allocated.add(node.data)
            self._dispatcher.dispatch_allocate(sdfg, state, state_id, node,
                                               module_stream,
                                               module_body_stream)

        self._dispatcher.dispatch_subgraph(sdfg,
                                           subgraph,
                                           state_id,
                                           module_stream,
                                           module_body_stream,
                                           skip_entry_node=False)

        module_stream.write(module_body_stream.getvalue(), sdfg, state_id)
        module_stream.write("}\n\n")

        self._dispatcher.defined_vars.exit_scope(subgraph)