Exemple #1
0
 def generate_no_dependence_post(self, kernel_stream, sdfg, state_id, node,
                                 var_name):
     '''
     Adds post loop pragma for ignoring loop carried dependencies on a given variable
     '''
     defined_type, _ = self._dispatcher.defined_vars.get(var_name)
     if defined_type == DefinedType.ArrayInterface:
         var_name = cpp.array_interface_variable(var_name, True,
                                                 self._dispatcher)
     kernel_stream.write(
         "#pragma HLS DEPENDENCE variable={} false".format(var_name), sdfg,
         state_id, node)
Exemple #2
0
    def generate_host_header(self, sdfg, kernel_function_name, parameters,
                             host_code_stream):

        kernel_args = []
        for is_output, name, arg, if_id in parameters:
            if isinstance(arg, dt.Array):
                argname = cpp.array_interface_variable(name, is_output, None)
                if if_id is not None:
                    argname = f"{argname}_{if_id}"

                kernel_args.append(arg.as_arg(with_types=True, name=argname))
            else:
                kernel_args.append(arg.as_arg(with_types=True, name=name))

        host_code_stream.write(
            """\
// Signature of kernel function (with raw pointers) for argument matching
DACE_EXPORTED void {kernel_function_name}({kernel_args});\n\n""".format(
                kernel_function_name=kernel_function_name,
                kernel_args=", ".join(kernel_args)), sdfg)
Exemple #3
0
 def make_kernel_argument(data,
                          var_name,
                          is_output,
                          with_vectorization,
                          interface_id=None):
     if isinstance(data, dt.Array):
         var_name = cpp.array_interface_variable(var_name, is_output, None)
         if interface_id is not None:
             var_name = var_name = f"{var_name}_{interface_id}"
         if with_vectorization:
             dtype = data.dtype
         else:
             dtype = data.dtype.base_type
         return "{} *{}".format(dtype.ctype, var_name)
     if isinstance(data, dt.Stream):
         ctype = "dace::FIFO<{}, {}, {}>".format(data.dtype.base_type.ctype,
                                                 data.dtype.veclen,
                                                 data.buffer_size)
         return "{} &{}".format(ctype, var_name)
     else:
         return data.as_arg(with_types=True, name=var_name)
Exemple #4
0
    def generate_nsdfg_arguments(self, sdfg, dfg, state, node):
        # Connectors that are both input and output share the same name, unless
        # they are pointers to global memory in device code, in which case they
        # are split into explicit input and output interfaces
        inout = set(node.in_connectors.keys() & node.out_connectors.keys())

        memlet_references = []
        for _, _, _, vconn, in_memlet in sorted(
                state.in_edges(node), key=lambda e: e.dst_conn or ""):
            if in_memlet.data is None:
                continue
            is_memory_interface = (self._dispatcher.defined_vars.get(
                in_memlet.data, 1)[0] == DefinedType.ArrayInterface)
            if is_memory_interface:
                interface_name = cpp.array_interface_variable(
                    vconn, False, None)
                # Register the raw pointer as a defined variable
                self._dispatcher.defined_vars.add(
                    interface_name, DefinedType.Pointer,
                    node.in_connectors[vconn].ctype)
                interface_ref = cpp.emit_memlet_reference(
                    self._dispatcher,
                    sdfg,
                    in_memlet,
                    interface_name,
                    conntype=node.in_connectors[vconn],
                    is_write=False)
                memlet_references.append(interface_ref)
            if vconn in inout:
                continue
            ref = cpp.emit_memlet_reference(self._dispatcher,
                                            sdfg,
                                            in_memlet,
                                            vconn,
                                            conntype=node.in_connectors[vconn],
                                            is_write=False)
            if not is_memory_interface:
                memlet_references.append(ref)

        for _, uconn, _, _, out_memlet in sorted(
                state.out_edges(node), key=lambda e: e.src_conn or ""):
            if out_memlet.data is None:
                continue
            ref = cpp.emit_memlet_reference(
                self._dispatcher,
                sdfg,
                out_memlet,
                uconn,
                conntype=node.out_connectors[uconn],
                is_write=True)
            is_memory_interface = (self._dispatcher.defined_vars.get(
                out_memlet.data, 1)[0] == DefinedType.ArrayInterface)
            if is_memory_interface:
                interface_name = cpp.array_interface_variable(
                    uconn, True, None)
                # Register the raw pointer as a defined variable
                self._dispatcher.defined_vars.add(
                    interface_name, DefinedType.Pointer,
                    node.out_connectors[uconn].ctype)
                memlet_references.append(
                    cpp.emit_memlet_reference(
                        self._dispatcher,
                        sdfg,
                        out_memlet,
                        interface_name,
                        conntype=node.out_connectors[uconn],
                        is_write=True))
            else:
                memlet_references.append(ref)

        return memlet_references
Exemple #5
0
    def generate_module(self, sdfg, state, name, subgraph, parameters,
                        module_stream, entry_stream, host_stream):
        """Generates a module that will run as a dataflow function in the FPGA
           kernel."""

        state_id = sdfg.node_id(state)
        dfg = sdfg.nodes()[state_id]

        kernel_args_call = []
        kernel_args_module = []
        for is_output, pname, p, interface_id in parameters:
            if isinstance(p, dt.Array):
                arr_name = cpp.array_interface_variable(pname, is_output, None)
                # Add interface ID to called module, but not to the module
                # arguments
                argname = arr_name
                if interface_id is not None:
                    argname = f"{arr_name}_{interface_id}"

                kernel_args_call.append(argname)
                dtype = p.dtype
                kernel_args_module.append("{} {}*{}".format(
                    dtype.ctype, "const " if not is_output else "", arr_name))
            else:
                if isinstance(p, dt.Stream):
                    kernel_args_call.append(
                        p.as_arg(with_types=False, name=pname))
                    if p.is_stream_array():
                        kernel_args_module.append(
                            "dace::FIFO<{}, {}, {}> {}[{}]".format(
                                p.dtype.base_type.ctype, p.veclen,
                                p.buffer_size, pname, p.size_string()))
                    else:
                        kernel_args_module.append(
                            "dace::FIFO<{}, {}, {}> &{}".format(
                                p.dtype.base_type.ctype, p.veclen,
                                p.buffer_size, pname))
                else:
                    kernel_args_call.append(
                        p.as_arg(with_types=False, name=pname))
                    kernel_args_module.append(
                        p.as_arg(with_types=True, name=pname))

        # Check if we are generating an RTL module, in which case only the
        # accesses to the streams should be handled
        rtl_tasklet = None
        for n in subgraph.nodes():
            if (isinstance(n, dace.nodes.Tasklet)
                    and n.language == dace.dtypes.Language.SystemVerilog):
                rtl_tasklet = n
                break
        if rtl_tasklet:
            entry_stream.write(
                f'// [RTL] HLSLIB_DATAFLOW_FUNCTION({name}, {", ".join(kernel_args_call)});'
            )
            module_stream.write(
                f'// [RTL] void {name}({", ".join(kernel_args_module)});\n\n')

            # _1 in names are due to vitis
            for node in subgraph.source_nodes():
                if isinstance(sdfg.arrays[node.data], dt.Stream):
                    if node.data not in self._stream_connections:
                        self._stream_connections[node.data] = [None, None]
                    for edge in state.out_edges(node):
                        rtl_name = "{}_{}_{}_{}".format(
                            edge.dst, sdfg.sdfg_id, sdfg.node_id(state),
                            state.node_id(edge.dst))
                        self._stream_connections[
                            node.data][1] = '{}_top_1.s_axis_{}'.format(
                                rtl_name, edge.dst_conn)

            for node in subgraph.sink_nodes():
                if isinstance(sdfg.arrays[node.data], dt.Stream):
                    if node.data not in self._stream_connections:
                        self._stream_connections[node.data] = [None, None]
                    for edge in state.in_edges(node):
                        rtl_name = "{}_{}_{}_{}".format(
                            edge.src, sdfg.sdfg_id, sdfg.node_id(state),
                            state.node_id(edge.src))
                        self._stream_connections[
                            node.data][0] = '{}_top_1.m_axis_{}'.format(
                                rtl_name, edge.src_conn)

            # Make the dispatcher trigger generation of the RTL module, but
            # ignore the generated code, as the RTL codegen will generate the
            # appropriate files.
            ignore_stream = CodeIOStream()
            self._dispatcher.dispatch_subgraph(sdfg,
                                               subgraph,
                                               state_id,
                                               ignore_stream,
                                               ignore_stream,
                                               skip_entry_node=False)

            # Launch the kernel from the host code
            rtl_name = self.rtl_tasklet_name(rtl_tasklet, state, sdfg)
            host_stream.write(
                f"  auto kernel_{rtl_name} = program.MakeKernel(\"{rtl_name}_top\", {', '.join([name for _, name, p, _ in parameters if not isinstance(p, dt.Stream)])}).ExecuteTaskFork();",
                sdfg, state_id, rtl_tasklet)

            return

        # create a unique module name to prevent name clashes
        module_function_name = f"module_{name}_{sdfg.sdfg_id}"

        # Unrolling processing elements: if there first scope of the subgraph
        # is an unrolled map, generate a processing element for each iteration
        scope_children = subgraph.scope_children()
        top_scopes = [
            n for n in scope_children[None]
            if isinstance(n, dace.sdfg.nodes.EntryNode)
        ]
        unrolled_loops = 0
        if len(top_scopes) == 1:
            scope = top_scopes[0]
            if scope.unroll:
                self._unrolled_pes.add(scope.map)
                kernel_args_call += ", ".join(scope.map.params)
                kernel_args_module += ["int " + p for p in scope.params]
                for p, r in zip(scope.map.params, scope.map.range):
                    if len(r) > 3:
                        raise cgx.CodegenError("Strided unroll not supported")
                    entry_stream.write(
                        "for (size_t {param} = {begin}; {param} < {end}; "
                        "{param} += {increment}) {{\n#pragma HLS UNROLL".
                        format(param=p,
                               begin=r[0],
                               end=r[1] + 1,
                               increment=r[2]))
                    unrolled_loops += 1

        # Generate caller code in top-level function
        entry_stream.write(
            "HLSLIB_DATAFLOW_FUNCTION({}, {});".format(
                module_function_name, ", ".join(kernel_args_call)), sdfg,
            state_id)

        for _ in range(unrolled_loops):
            entry_stream.write("}")

        # ----------------------------------------------------------------------
        # Generate kernel code
        # ----------------------------------------------------------------------

        self._dispatcher.defined_vars.enter_scope(subgraph)

        module_body_stream = CodeIOStream()

        module_body_stream.write(
            "void {}({}) {{".format(module_function_name,
                                    ", ".join(kernel_args_module)), sdfg,
            state_id)

        # Register the array interface as a naked pointer for use inside the
        # FPGA kernel
        interfaces_added = set()
        for is_output, argname, arg, _ in parameters:
            if (not (isinstance(arg, dt.Array)
                     and arg.storage == dace.dtypes.StorageType.FPGA_Global)):
                continue
            ctype = dtypes.pointer(arg.dtype).ctype
            ptr_name = cpp.array_interface_variable(argname, is_output, None)
            if not is_output:
                ctype = f"const {ctype}"
            self._dispatcher.defined_vars.add(ptr_name, DefinedType.Pointer,
                                              ctype)
            if argname in interfaces_added:
                continue
            interfaces_added.add(argname)
            self._dispatcher.defined_vars.add(argname,
                                              DefinedType.ArrayInterface,
                                              ctype,
                                              allow_shadowing=True)
        module_body_stream.write("\n")

        # Allocate local transients
        data_to_allocate = (set(subgraph.top_level_transients()) -
                            set(sdfg.shared_transients()) -
                            set([p[1] for p in parameters]))
        allocated = set()
        for node in subgraph.nodes():
            if not isinstance(node, dace.sdfg.nodes.AccessNode):
                continue
            if node.data not in data_to_allocate or node.data in allocated:
                continue
            allocated.add(node.data)
            self._dispatcher.dispatch_allocate(sdfg, state, state_id, node,
                                               module_stream,
                                               module_body_stream)

        self._dispatcher.dispatch_subgraph(sdfg,
                                           subgraph,
                                           state_id,
                                           module_stream,
                                           module_body_stream,
                                           skip_entry_node=False)

        module_stream.write(module_body_stream.getvalue(), sdfg, state_id)
        module_stream.write("}\n\n")

        self._dispatcher.defined_vars.exit_scope(subgraph)