Exemple #1
0
    def on_tend(self,
                timer_name: str,
                stream: CodeIOStream,
                sdfg=None,
                state=None,
                node=None):
        idstr = self._idstr(sdfg, state, node)

        state_id = -1
        node_id = -1
        if state is not None:
            state_id = sdfg.node_id(state)
            if node is not None:
                node_id = state.node_id(node)

        stream.write(
            '''auto __dace_tend_{id} = std::chrono::high_resolution_clock::now();
unsigned long int __dace_ts_start_{id} = std::chrono::duration_cast<std::chrono::microseconds>(__dace_tbegin_{id}.time_since_epoch()).count();
unsigned long int __dace_ts_end_{id} = std::chrono::duration_cast<std::chrono::microseconds>(__dace_tend_{id}.time_since_epoch()).count();
__state->report.add_completion("{timer_name}", "Timer", __dace_ts_start_{id}, __dace_ts_end_{id}, {sdfg_id}, {state_id}, {node_id});'''
            .format(timer_name=timer_name,
                    id=idstr,
                    sdfg_id=sdfg.sdfg_id,
                    state_id=state_id,
                    node_id=node_id))
Exemple #2
0
 def __init__(self, *args, **kwargs):
     self._dispatcher = disp.TargetDispatcher(self)
     self._dispatcher.register_state_dispatcher(self)
     self._initcode = CodeIOStream()
     self._exitcode = CodeIOStream()
     self.statestruct: List[str] = []
     self.environments: List[Any] = []
Exemple #3
0
    def generate_header(self, sdfg: SDFG, global_stream: CodeIOStream,
                        callsite_stream: CodeIOStream):
        """ Generate the header of the frame-code. Code exists in a separate
            function for overriding purposes.
            :param sdfg: The input SDFG.
            :param global_stream: Stream to write to (global).
            :param callsite_stream: Stream to write to (at call site).
        """
        fname = sdfg.name
        params = sdfg.signature()

        # Write frame code - header
        global_stream.write(
            '/* DaCe AUTO-GENERATED FILE. DO NOT MODIFY */\n' +
            '#include <dace/dace.h>\n', sdfg)

        self.generate_fileheader(sdfg, callsite_stream)

        callsite_stream.write(
            'void __program_%s_internal(%s)\n{\n' % (fname, params), sdfg)

        # Invoke all instrumentation providers
        for instr in self._dispatcher.instrumentation.values():
            if instr is not None:
                instr.on_sdfg_begin(sdfg, callsite_stream, global_stream)
Exemple #4
0
    def copy_memory(self, sdfg: SDFG, dfg: SDFGState, state_id: int, src_node: nodes.Node, dst_node: nodes.Node,
                    edge: gr.MultiConnectorEdge[mm.Memlet], function_stream: CodeIOStream,
                    callsite_stream: CodeIOStream) -> None:

        # Check whether it is a known reduction that is possible in SVE
        reduction_type = detect_reduction_type(edge.data.wcr)
        if reduction_type not in util.REDUCTION_TYPE_TO_SVE:
            raise util.NotSupportedError('Unsupported reduction in SVE')

        nc = not is_write_conflicted(dfg, edge)
        desc = edge.src.desc(sdfg)
        if not nc or not isinstance(desc.dtype, (dtypes.pointer, dtypes.vector)):
            # WCR on vectors works in two steps:
            # 1. Reduce the SVE register using SVE instructions into a scalar
            # 2. WCR the scalar to memory using DaCe functionality
            wcr = self.cpu_codegen.write_and_resolve_expr(sdfg, edge.data, not nc, None, '@', dtype=desc.dtype)
            callsite_stream.write(wcr[:wcr.find('@')] + util.REDUCTION_TYPE_TO_SVE[reduction_type] +
                                  f'(svptrue_{util.TYPE_TO_SVE_SUFFIX[desc.dtype]}(), ' + src_node.label +
                                  wcr[wcr.find('@') + 1:] + ');')
            return
        else:
            ######################
            # Horizontal non-atomic reduction
            raise NotImplementedError()

        return super().copy_memory(sdfg, dfg, state_id, src_node, dst_node, edge, function_stream, callsite_stream)
Exemple #5
0
    def generate_header(self, sdfg: SDFG, global_stream: CodeIOStream,
                        callsite_stream: CodeIOStream):
        """ Generate the header of the frame-code. Code exists in a separate
            function for overriding purposes.
            :param sdfg: The input SDFG.
            :param global_stream: Stream to write to (global).
            :param callsite_stream: Stream to write to (at call site).
        """
        # Write frame code - header
        global_stream.write(
            '/* DaCe AUTO-GENERATED FILE. DO NOT MODIFY */\n' +
            '#include <dace/dace.h>\n', sdfg)

        # Write header required by environments
        for env in self.environments:
            self.statestruct.extend(env.state_fields)

        # Instrumentation preamble
        if len(self._dispatcher.instrumentation) > 1:
            self.statestruct.append('dace::perf::Report report;')
            # Reset report if written every invocation
            if config.Config.get_bool('instrumentation',
                                      'report_each_invocation'):
                callsite_stream.write('__state->report.reset();', sdfg)

        self.generate_fileheader(sdfg, global_stream, 'frame')
Exemple #6
0
    def allocate_array(self, sdfg: SDFG, dfg: SDFGState, state_id: int,
                       node: nodes.Node, nodedesc: data.Data,
                       global_stream: CodeIOStream,
                       declaration_stream: CodeIOStream,
                       allocation_stream: CodeIOStream) -> None:
        if nodedesc.storage == dtypes.StorageType.SVE_Register:
            sve_type = util.TYPE_TO_SVE[nodedesc.dtype]
            self.dispatcher.defined_vars.add(node.data, DefinedType.Scalar,
                                             sve_type)
            return

        if util.get_sve_scope(sdfg, dfg, node) is not None and isinstance(
                nodedesc, data.Scalar) and isinstance(nodedesc.dtype,
                                                      dtypes.vector):
            # Special allocation if vector Code->Code register in SVE scope
            # We prevent dace::vec<>'s and allocate SVE registers instead
            if self.dispatcher.defined_vars.has(node.data):
                sve_type = util.TYPE_TO_SVE[nodedesc.dtype.vtype]
                self.dispatcher.defined_vars.add(node.data, DefinedType.Scalar,
                                                 sve_type)
                declaration_stream.write(f'{sve_type} {node.data};')
            return

        self.cpu_codegen.allocate_array(sdfg, dfg, state_id, node, nodedesc,
                                        global_stream, declaration_stream,
                                        allocation_stream)
Exemple #7
0
 def define_out_memlet(self, sdfg: sdfg.SDFG, dfg: state.StateSubgraphView,
                       state_id: int, src_node: nodes.Node,
                       dst_node: nodes.Node, edge: graph.MultiConnectorEdge,
                       function_stream: prettycode.CodeIOStream,
                       callsite_stream: prettycode.CodeIOStream):
     """
         Generate output copy code (handled within the rtl tasklet code).
     """
     if isinstance(edge.src, nodes.Tasklet) and isinstance(
             edge.dst, nodes.AccessNode):
         if isinstance(src_node.out_connectors[edge.src_conn],
                       dtypes.pointer):  # pointer accessor
             line: str = "{} {} = &{}[0];".format(
                 src_node.out_connectors[edge.src_conn].ctype,
                 edge.src_conn, edge.dst.data)
         elif isinstance(src_node.out_connectors[edge.src_conn],
                         dtypes.vector):  # vector accessor
             line: str = "{} {} = *({} *)(&{}[0]);".format(
                 src_node.out_connectors[edge.src_conn].ctype,
                 edge.src_conn,
                 src_node.out_connectors[edge.src_conn].ctype,
                 edge.dst.data)
         else:  # scalar accessor
             line: str = "{}* {} = &{}[0];".format(
                 src_node.out_connectors[edge.src_conn].ctype,
                 edge.src_conn, edge.dst.data)
     else:
         raise RuntimeError(
             "Not handling define_out_memlet case of type {} -> {}.".format(
                 type(edge.src), type(edge.dst)))
     # write accessor to file
     callsite_stream.write(line)
Exemple #8
0
    def on_tbegin(self, stream: CodeIOStream, sdfg=None, state=None,
                  node=None):
        idstr = self._idstr(sdfg, state, node)

        stream.write(
            'auto __dace_tbegin_%s = std::chrono::high_resolution_clock::now();'
            % idstr)
Exemple #9
0
 def copy_memory(self, sdfg: sdfg.SDFG, dfg: state.StateSubgraphView,
                 state_id: int, src_node: nodes.Node, dst_node: nodes.Node,
                 edge: graph.MultiConnectorEdge,
                 function_stream: prettycode.CodeIOStream,
                 callsite_stream: prettycode.CodeIOStream):
     """
         Generate input/output memory copies from the array references to local variables (i.e. for the tasklet code).
     """
     if isinstance(edge.src, nodes.AccessNode) and isinstance(
             edge.dst, nodes.Tasklet):  # handle AccessNode->Tasklet
         if isinstance(dst_node.in_connectors[edge.dst_conn],
                       dtypes.pointer):  # pointer accessor
             line: str = "{} {} = &{}[0];".format(
                 dst_node.in_connectors[edge.dst_conn].ctype, edge.dst_conn,
                 edge.src.data)
         elif isinstance(dst_node.in_connectors[edge.dst_conn],
                         dtypes.vector):  # vector accessor
             line: str = "{} {} = *({} *)(&{}[0]);".format(
                 dst_node.in_connectors[edge.dst_conn].ctype, edge.dst_conn,
                 dst_node.in_connectors[edge.dst_conn].ctype, edge.src.data)
         else:  # scalar accessor
             line: str = "{}* {} = &{}[0];".format(
                 dst_node.in_connectors[edge.dst_conn].ctype, edge.dst_conn,
                 edge.src.data)
     else:
         raise RuntimeError(
             "Not handling copy_memory case of type {} -> {}.".format(
                 type(edge.src), type(edge.dst)))
     # write accessor to file
     callsite_stream.write(line)
Exemple #10
0
    def allocate_array(self, sdfg: dace.SDFG, dfg: StateSubgraphView,
                       state_id: int, node: nodes.AccessNode,
                       function_stream: CodeIOStream,
                       declaration_stream: CodeIOStream,
                       allocation_stream: CodeIOStream):
        name = node.data
        nodedesc = node.desc(sdfg)

        # Based on the hardware, the total size must be 16^2
        assert nodedesc.total_size == 16 * 16
        # Majority is detected by the strides of the data
        maj = 'row' if nodedesc.strides[-1] == 1 else 'col'

        # Write a fragment based on the storage type
        if nodedesc.storage == dace.StorageType.TensorCore_Accumulator:
            declaration_stream.write(
                'wmma::fragment<wmma::accumulator, '
                '16, 16, 16, float> {};'.format(name), sdfg, state_id, node)
        else:
            declaration_stream.write(
                'wmma::fragment<wmma::matrix_{mat}, '
                '16, 16, 16, half, wmma::{maj}_major> '
                '{name};'.format(
                    mat=('a' if 'A' in nodedesc.storage.name else 'b'),
                    maj=maj,
                    name=name), sdfg, state_id, node)
Exemple #11
0
 def define_out_memlet(self, sdfg: dace.SDFG, dfg: StateSubgraphView,
                       state_id: int, src_node: nodes.Node,
                       dst_node: nodes.Node, edge: MultiConnectorEdge,
                       function_stream: CodeIOStream,
                       callsite_stream: CodeIOStream):
     # Output memlets that are directed at WMMA fragments can use the "auto"
     # keyword for simplicity.
     callsite_stream.write(f'auto& {edge.src_conn} = {edge.data.data};')
Exemple #12
0
    def add_header(self, function_stream: CodeIOStream):
        if self.has_generated_header:
            return
        self.has_generated_header = True

        function_stream.write('#include <arm_sve.h>\n')

        # TODO: Find this automatically at compile time
        function_stream.write(f'#define {util.REGISTER_BYTE_SIZE} 64\n')
Exemple #13
0
 def __init__(self, *args, **kwargs):
     self._dispatcher = disp.TargetDispatcher(self)
     self._dispatcher.register_state_dispatcher(self)
     self._initcode = CodeIOStream()
     self._exitcode = CodeIOStream()
     self.statestruct: List[str] = []
     self.environments: List[Any] = []
     self.to_allocate: DefaultDict[
         Union[SDFG, SDFGState, nodes.EntryNode],
         List[Tuple[int, int,
                    nodes.AccessNode]]] = collections.defaultdict(list)
Exemple #14
0
    def generate_kernel_internal(self, sdfg, state, kernel_name, subgraphs,
                                 kernel_stream, function_stream,
                                 callsite_stream):
        """Main entry function for generating a Xilinx kernel."""

        (global_data_parameters, top_level_local_data, subgraph_parameters,
         scalar_parameters, symbol_parameters,
         nested_global_transients) = self.make_parameters(
             sdfg, state, subgraphs)

        # Scalar parameters are never output
        sc_parameters = [(False, pname, param)
                         for pname, param in scalar_parameters]

        host_code_stream = CodeIOStream()

        # Generate host code
        self.generate_host_header(sdfg, kernel_name,
                                  global_data_parameters + sc_parameters,
                                  symbol_parameters, host_code_stream)
        self.generate_host_function_boilerplate(
            sdfg, state, kernel_name, global_data_parameters + sc_parameters,
            symbol_parameters, nested_global_transients, host_code_stream,
            function_stream, callsite_stream)
        self.generate_host_function_body(
            sdfg, state, kernel_name, global_data_parameters + sc_parameters,
            symbol_parameters, host_code_stream)
        # Store code to be passed to compilation phase
        self._host_codes.append((kernel_name, host_code_stream.getvalue()))

        # Now we write the device code
        module_stream = CodeIOStream()
        entry_stream = CodeIOStream()

        state_id = sdfg.node_id(state)

        self.generate_kernel_boilerplate_pre(sdfg, state_id, kernel_name,
                                             global_data_parameters,
                                             scalar_parameters,
                                             symbol_parameters, module_stream,
                                             entry_stream)

        # Emit allocations
        for node in top_level_local_data:
            self._dispatcher.dispatch_allocate(sdfg, state, state_id, node,
                                               module_stream, entry_stream)
            self._dispatcher.dispatch_initialize(sdfg, state, state_id, node,
                                                 module_stream, entry_stream)

        self.generate_modules(sdfg, state, kernel_name, subgraphs,
                              subgraph_parameters, sc_parameters,
                              symbol_parameters, module_stream, entry_stream,
                              host_code_stream)

        kernel_stream.write(module_stream.getvalue())
        kernel_stream.write(entry_stream.getvalue())

        self.generate_kernel_boilerplate_post(kernel_stream, sdfg, state_id)
Exemple #15
0
    def on_sdfg_begin(self, sdfg: SDFG, local_stream: CodeIOStream,
                      global_stream: CodeIOStream,
                      codegen: 'DaCeCodeGenerator'):
        # Initialize serializer versioning object
        if sdfg.parent is None:
            self.codegen = codegen
            codegen.statestruct.append('dace::DataSerializer *serializer;')
            sdfg.append_init_code(
                f'__state->serializer = new dace::DataSerializer("");\n')

            # Add method that controls serializer input
            global_stream.write(self._generate_report_setter(sdfg))
Exemple #16
0
    def __init__(self, frame_codegen, sdfg):
        self._frame = frame_codegen
        self._dispatcher = frame_codegen.dispatcher
        dispatcher = self._dispatcher

        fileheader = CodeIOStream()
        self._frame.generate_fileheader(sdfg, fileheader)

        self._codeobj = CodeObject(
            sdfg.name + '_mpi', """
#include <dace/dace.h>
#include <mpi.h>

MPI_Comm __dace_mpi_comm;
int __dace_comm_size = 1;
int __dace_comm_rank = 0;

{file_header}

DACE_EXPORTED int __dace_init_mpi({params});
DACE_EXPORTED void __dace_exit_mpi({params});

int __dace_init_mpi({params}) {{
    int isinit = 0;
    if (MPI_Initialized(&isinit) != MPI_SUCCESS)
        return 1;
    if (!isinit) {{
        if (MPI_Init(NULL, NULL) != MPI_SUCCESS)
            return 1;
    }}

    MPI_Comm_dup(MPI_COMM_WORLD, &__dace_mpi_comm);
    MPI_Comm_rank(__dace_mpi_comm, &__dace_comm_rank);
    MPI_Comm_size(__dace_mpi_comm, &__dace_comm_size);

    printf(\"MPI was initialized on proc %i of %i\\n\", __dace_comm_rank,
           __dace_comm_size);
    return 0;
}}

void __dace_exit_mpi({params}) {{
    MPI_Comm_free(&__dace_mpi_comm);
    MPI_Finalize();

    printf(\"MPI was finalized on proc %i of %i\\n\", __dace_comm_rank,
           __dace_comm_size);
}}
""".format(params=sdfg.signature(), file_header=fileheader.getvalue()), 'cpp',
            MPICodeGen, 'MPI')

        # Register dispatchers
        dispatcher.register_map_dispatcher(dtypes.ScheduleType.MPI, self)
Exemple #17
0
    def get_generated_codeobjects(self):
        fileheader = CodeIOStream()
        sdfg = self._global_sdfg
        self._frame.generate_fileheader(sdfg, fileheader, 'mpi')

        params_comma = self._global_sdfg.init_signature(
            free_symbols=self._frame.free_symbols(self._global_sdfg))
        if params_comma:
            params_comma = ', ' + params_comma

        codeobj = CodeObject(
            sdfg.name + '_mpi', """
#include <dace/dace.h>
#include <mpi.h>

MPI_Comm __dace_mpi_comm;
int __dace_comm_size = 1;
int __dace_comm_rank = 0;

{file_header}

DACE_EXPORTED int __dace_init_mpi({sdfg.name}_t *__state{params});
DACE_EXPORTED void __dace_exit_mpi({sdfg.name}_t *__state);

int __dace_init_mpi({sdfg.name}_t *__state{params}) {{
    int isinit = 0;
    if (MPI_Initialized(&isinit) != MPI_SUCCESS)
        return 1;
    if (!isinit) {{
        if (MPI_Init(NULL, NULL) != MPI_SUCCESS)
            return 1;
    }}

    MPI_Comm_dup(MPI_COMM_WORLD, &__dace_mpi_comm);
    MPI_Comm_rank(__dace_mpi_comm, &__dace_comm_rank);
    MPI_Comm_size(__dace_mpi_comm, &__dace_comm_size);

    printf(\"MPI was initialized on proc %i of %i\\n\", __dace_comm_rank,
           __dace_comm_size);
    return 0;
}}

void __dace_exit_mpi({sdfg.name}_t *__state) {{
    MPI_Comm_free(&__dace_mpi_comm);
    MPI_Finalize();

    printf(\"MPI was finalized on proc %i of %i\\n\", __dace_comm_rank,
           __dace_comm_size);
}}
""".format(params=params_comma, sdfg=sdfg, file_header=fileheader.getvalue()),
            'cpp', MPICodeGen, 'MPI')
        return [codeobj]
Exemple #18
0
    def on_tend(self,
                timer_name: str,
                stream: CodeIOStream,
                sdfg=None,
                state=None,
                node=None):
        idstr = self._idstr(sdfg, state, node)

        stream.write(
            '''auto __dace_tend_{id} = std::chrono::high_resolution_clock::now();
std::chrono::duration<double, std::milli> __dace_tdiff_{id} = __dace_tend_{id} - __dace_tbegin_{id};
dace::perf::report.add("timer_{timer_name}", __dace_tdiff_{id}.count());'''.
            format(timer_name=timer_name, id=idstr))
Exemple #19
0
    def generate_fileheader(self, sdfg: SDFG, global_stream: CodeIOStream):
        """ Generate a header in every output file that includes custom types
            and constants.
            :param sdfg: The input SDFG.
            :param global_stream: Stream to write to (global).
        """
        #########################################################
        # Custom types
        datatypes = set()
        # Types of this SDFG
        for _, arrname, arr in sdfg.arrays_recursive():
            if arr is not None:
                datatypes.add(arr.dtype)

        # Emit unique definitions
        wrote_something = False
        for typ in datatypes:
            if hasattr(typ, 'emit_definition'):
                if not wrote_something:
                    global_stream.write("", sdfg)
                wrote_something = True
                global_stream.write(typ.emit_definition(), sdfg)
        if wrote_something:
            global_stream.write("", sdfg)

        #########################################################
        # Write constants
        self.generate_constants(sdfg, global_stream)

        for sd in sdfg.all_sdfgs_recursive():
            global_stream.write(sd.global_code, sd)
Exemple #20
0
    def dispatch_allocate(self,
                          sdfg: SDFG,
                          dfg: ScopeSubgraphView,
                          state_id: int,
                          node: nodes.AccessNode,
                          datadesc: dt.Data,
                          function_stream: prettycode.CodeIOStream,
                          callsite_stream: prettycode.CodeIOStream,
                          declare: bool = True,
                          allocate: bool = True):
        """ Dispatches a code generator for data allocation. """
        self._used_targets.add(self._array_dispatchers[datadesc.storage])

        if datadesc.lifetime is dtypes.AllocationLifetime.Persistent:
            declaration_stream = CodeIOStream()
            callsite_stream = self.frame._initcode
        else:
            declaration_stream = callsite_stream

        if declare and not allocate:
            self._array_dispatchers[datadesc.storage].declare_array(
                sdfg, dfg, state_id, node, datadesc, function_stream,
                declaration_stream)
        elif allocate:
            self._array_dispatchers[datadesc.storage].allocate_array(
                sdfg, dfg, state_id, node, datadesc, function_stream,
                declaration_stream, callsite_stream)
Exemple #21
0
 def generate_constants(self, sdfg: SDFG, callsite_stream: CodeIOStream):
     # Write constants
     for cstname, (csttype, cstval) in sdfg.constants_prop.items():
         if isinstance(csttype, data.Array):
             const_str = "constexpr " + csttype.dtype.ctype + \
                 " " + cstname + "[" + str(cstval.size) + "] = {"
             it = np.nditer(cstval, order='C')
             for i in range(cstval.size - 1):
                 const_str += str(it[0]) + ", "
                 it.iternext()
             const_str += str(it[0]) + "};\n"
             callsite_stream.write(const_str, sdfg)
         else:
             callsite_stream.write(
                 "constexpr %s %s = %s;\n" %
                 (csttype.dtype.ctype, cstname, sym2cpp(cstval)), sdfg)
Exemple #22
0
 def generate_constants(self, sdfg: SDFG, callsite_stream: CodeIOStream):
     # Write constants
     for cstname, cstval in sdfg.constants.items():
         if isinstance(cstval, np.ndarray):
             dtype = dtypes.typeclass(cstval.dtype.type)
             const_str = "constexpr " + dtype.ctype + \
                 " " + cstname + "[" + str(cstval.size) + "] = {"
             it = np.nditer(cstval, order='C')
             for i in range(cstval.size - 1):
                 const_str += str(it[0]) + ", "
                 it.iternext()
             const_str += str(it[0]) + "};\n"
             callsite_stream.write(const_str, sdfg)
         else:
             callsite_stream.write(
                 "constexpr auto %s = %s;\n" % (cstname, str(cstval)), sdfg)
Exemple #23
0
    def _setup_gpu_runtime(self, sdfg: SDFG, global_stream: CodeIOStream):
        if self.gpu_runtime_init:
            return
        self.gpu_runtime_init = True
        self.backend = config.Config.get('compiler', 'cuda', 'backend')
        if self.backend == 'cuda':
            header_name = 'cuda_runtime.h'
        elif self.backend == 'hip':
            header_name = 'hip/hip_runtime.h'
        else:
            raise NameError('GPU backend "%s" not recognized' % self.backend)

        global_stream.write('#include <%s>' % header_name)

        # For other file headers
        sdfg.append_global_code('\n#include <%s>' % header_name, None)
Exemple #24
0
    def generate_host_function_body(self, sdfg: dace.SDFG,
                                    state: dace.SDFGState, kernel_name: str,
                                    predecessors: list, parameters: list,
                                    rtl_tasklet_names: list,
                                    kernel_stream: CodeIOStream,
                                    instrumentation_stream: CodeIOStream):
        '''
        Generate the host-specific code for spawning and synchronizing the given kernel.
        :param sdfg:
        :param state:
        :param predecessors: list containing all the name of kernels that must be finished before starting this one
        :param parameters: list containing the kernel parameters (of all kernels in this state)
        :param rtl_tasklet_names
        :param kernel_stream: Device-specific code stream
        :param instrumentation_stream: Code for profiling kernel execution time.
        '''

        kernel_args = []
        for _, name, p, interface_ids in parameters:
            if isinstance(p, dt.Array):
                for bank, _ in fpga.iterate_hbm_interface_ids(
                        p, interface_ids):
                    kernel_args.append(
                        p.as_arg(False,
                                 name=fpga.fpga_ptr(name, p, sdfg, bank)))
            else:
                kernel_args.append(p.as_arg(False, name=name))

        kernel_function_name = kernel_name
        kernel_file_name = "{}.xclbin".format(kernel_name)

        # Check if this kernel depends from other kernels
        needs_synch = len(predecessors) > 0

        if needs_synch:
            # Build a vector containing all the events associated with the kernels from which this one depends
            kernel_deps_name = f"deps_{kernel_name}"
            kernel_stream.write(f"std::vector<cl::Event> {kernel_deps_name};")
            for pred in predecessors:
                # concatenate events from predecessor kernel
                kernel_stream.write(
                    f"{kernel_deps_name}.push_back({pred}_event);")

        # Launch HLS kernel, passing synchronization events (if any)
        kernel_stream.write(
            f"""\
  auto {kernel_name}_kernel = program.MakeKernel({kernel_function_name}, "{kernel_function_name}", {", ".join(kernel_args)});
  cl::Event {kernel_name}_event = {kernel_name}_kernel.ExecuteTaskFork({f'{kernel_deps_name}.begin(), {kernel_deps_name}.end()' if needs_synch else ''});
  all_events.push_back({kernel_name}_event);""", sdfg, sdfg.node_id(state))
        if state.instrument == dtypes.InstrumentationType.FPGA:
            self.instrument_opencl_kernel(kernel_name, sdfg.node_id(state),
                                          sdfg.sdfg_id, instrumentation_stream)

        # Join RTL tasklets
        for name in rtl_tasklet_names:
            kernel_stream.write(f"kernel_{name}.wait();\n", sdfg,
                                sdfg.node_id(state))
Exemple #25
0
    def generate_out_register(self,
                              sdfg: SDFG,
                              state: SDFGState,
                              edge: graph.MultiConnectorEdge[mm.Memlet],
                              code: CodeIOStream,
                              use_data_name: bool = False) -> bool:
        """
            Responsible for generating temporary out registers in a Tasklet, given an outgoing edge.
            Returns `True` if a writeback of this register is needed.
        """
        if edge.src_conn is None:
            return

        dst_node = state.memlet_path(edge)[-1].dst

        src_type = edge.src.out_connectors[edge.src_conn]
        src_name = edge.src_conn

        if use_data_name:
            src_name = edge.data.data

        if isinstance(dst_node, nodes.AccessNode) and isinstance(
                dst_node.desc(sdfg), data.Stream):
            # Streams don't need writeback and are treated differently
            self.stream_associations[edge.src_conn] = (edge.data.data,
                                                       src_type.base_type)
            return False
        elif edge.data.wcr is not None:
            # WCR is addressed within the unparser to capture conditionals
            self.wcr_associations[edge.src_conn] = (dst_node, edge,
                                                    src_type.base_type)
            return False

        # Create temporary registers
        ctype = None
        if util.is_vector(src_type):
            ctype = util.TYPE_TO_SVE[src_type.type]
        elif util.is_scalar(src_type):
            ctype = src_type.ctype
        else:
            raise util.NotSupportedError(
                'Unsupported Code->Code edge (pointer)')

        self.dispatcher.defined_vars.add(src_name, DefinedType.Scalar, ctype)
        code.write(f'{ctype} {src_name};')

        return True
Exemple #26
0
    def create_empty_definition(self,
                                conn: dace.typeclass,
                                edge: gr.MultiConnectorEdge[mm.Memlet],
                                callsite_stream: CodeIOStream,
                                output: bool = False,
                                is_code_code: bool = False):
        """ Creates a simple variable definition `type name;`, which works for both vectors and regular data types. """

        var_name = None
        var_type = None
        var_ctype = None

        if output:
            var_name = edge.dst_conn
        else:
            var_name = edge.src_conn

        if is_code_code:
            # For edges between Tasklets (Code->Code), we use the data as name because these registers are temporary and shared
            var_name = edge.data.data

        if isinstance(conn, dtypes.vector):
            # Creates an SVE register

            if conn.type not in util.TYPE_TO_SVE:
                raise util.NotSupportedError('Data type not supported')

            # In case of a WCR, we must initialize it with the identity value.
            # This is to prevent cases in a conditional WCR, where we don't write and it is filled with garbage.
            # Currently, the initial is 0, because product reduction is not supported in SVE.
            init_str = ''
            if edge.data.wcr:
                init_str = ' = {}(0)'.format(util.instr('dup', type=conn.type))

            var_type = conn.type
            var_ctype = util.TYPE_TO_SVE[var_type]

            callsite_stream.write('{} {}{};'.format(var_ctype, var_name,
                                                    init_str))
        else:
            raise NotImplementedError(
                f'Output into scalar or pointer is not supported ({var_name})')

        self.dispatcher.defined_vars.add(var_name, var_type, var_ctype)
Exemple #27
0
 def copy_memory(self, sdfg: sdfg.SDFG, dfg: state.StateSubgraphView,
                 state_id: int, src_node: nodes.Node, dst_node: nodes.Node,
                 edge: graph.MultiConnectorEdge,
                 function_stream: prettycode.CodeIOStream,
                 callsite_stream: prettycode.CodeIOStream):
     """
         Generate input/output memory copies from the array references to local variables (i.e. for the tasklet code).
     """
     if isinstance(edge.src, nodes.AccessNode) and isinstance(
             edge.dst, nodes.Tasklet):  # handle AccessNode->Tasklet
         if isinstance(dst_node.in_connectors[edge.dst_conn],
                       dtypes.pointer):  # pointer accessor
             line: str = "{} {} = &{}[0];".format(
                 dst_node.in_connectors[edge.dst_conn].ctype, edge.dst_conn,
                 edge.src.data)
         elif isinstance(dst_node.in_connectors[edge.dst_conn],
                         dtypes.vector):  # vector accessor
             line: str = "{} {} = *({} *)(&{}[0]);".format(
                 dst_node.in_connectors[edge.dst_conn].ctype, edge.dst_conn,
                 dst_node.in_connectors[edge.dst_conn].ctype, edge.src.data)
         else:  # scalar accessor
             arr = sdfg.arrays[edge.data.data]
             if isinstance(arr, data.Array):
                 line: str = "{}* {} = &{}[0];".format(
                     dst_node.in_connectors[edge.dst_conn].ctype,
                     edge.dst_conn, edge.src.data)
             elif isinstance(arr, data.Scalar):
                 line: str = "{} {} = {};".format(
                     dst_node.in_connectors[edge.dst_conn].ctype,
                     edge.dst_conn, edge.src.data)
     elif isinstance(edge.src, nodes.MapEntry) and isinstance(
             edge.dst, nodes.Tasklet):
         rtl_name = self.unique_name(edge.dst, sdfg.nodes()[state_id], sdfg)
         self.n_unrolled[rtl_name] = symbolic.evaluate(
             edge.src.map.range[0][1] + 1, sdfg.constants)
         line: str = f'{dst_node.in_connectors[edge.dst_conn]} {edge.dst_conn} = &{edge.data.data}[{edge.src.map.params[0]}*{edge.data.volume}];'
     else:
         raise RuntimeError(
             "Not handling copy_memory case of type {} -> {}.".format(
                 type(edge.src), type(edge.dst)))
     # write accessor to file
     callsite_stream.write(line)
Exemple #28
0
    def generate_node(self, sdfg: SDFG, state: SDFGState, state_id: int,
                      node: nodes.Node, function_stream: CodeIOStream,
                      callsite_stream: CodeIOStream):
        self.add_header(function_stream)

        if not isinstance(node, nodes.Tasklet):
            return

        scope = util.get_sve_scope(sdfg, state, node)

        # Reset the stream variable mappings
        self.stream_associations = dict()
        self.wcr_associations = dict()

        callsite_stream.write('{')
        self.dispatcher.defined_vars.enter_scope(node)

        ##################
        # Generate tasklet

        # Inputs
        for edge in state.in_edges(node):
            self.generate_read(sdfg, state, scope.map, edge, callsite_stream)

        requires_wb = []

        # Temporary output registers
        for edge in state.out_edges(node):
            if self.generate_out_register(sdfg, state, edge, callsite_stream):
                requires_wb.append(edge)

        # Tasklet code
        self.unparse_tasklet(sdfg, state, state_id, node, function_stream,
                             callsite_stream)

        # Writeback from temporary registers to memory
        for edge in requires_wb:
            self.generate_writeback(sdfg, state, scope, edge, callsite_stream)

        self.dispatcher.defined_vars.exit_scope(node)
        callsite_stream.write('}')
Exemple #29
0
    def generate_scope(self, sdfg: dace.SDFG, scope: ScopeSubgraphView,
                       state_id: int, function_stream: CodeIOStream,
                       callsite_stream: CodeIOStream):

        entry_node = scope.source_nodes()[0]

        index_list = []
        for begin, end, stride in entry_node.map.range:
            l = []
            while begin <= end:
                l.append(begin)
                begin += stride
            index_list.append(l)

        for indices in product(*index_list):
            callsite_stream.write('{')
            for param, index in zip(entry_node.map.params, indices):
                callsite_stream.write(f'auto {param} = {sym2cpp(index)};')
            self._dispatcher.dispatch_subgraph(sdfg,
                                               scope,
                                               state_id,
                                               function_stream,
                                               callsite_stream,
                                               skip_entry_node=True,
                                               skip_exit_node=True)
            callsite_stream.write('}')
Exemple #30
0
    def generate_scope(self, sdfg: dace.SDFG, scope: ScopeSubgraphView,
                       state_id: int, function_stream: CodeIOStream,
                       callsite_stream: CodeIOStream):

        entry_node: nd.MapEntry = scope.source_nodes()[0]
        index_list = []

        for begin, end, stride in entry_node.map.range:
            l = []
            while begin <= end:
                l.append(begin)
                begin += stride
            index_list.append(l)

        sdfgconsts = sdfg.constants_prop
        sdfg.constants_prop = copy.deepcopy(sdfg.constants_prop)

        mapsymboltypes = entry_node.new_symbols(sdfg, scope,
                                                [entry_node.map.params])
        for indices in product(*index_list):
            callsite_stream.write('{')
            nsdfg_unroll_info = None
            for param, index in zip(entry_node.map.params, indices):
                if nsdfg_unroll_info is None:
                    nsdfg_unroll_info = self.nsdfg_prepare_unroll(
                        scope, str(param), str(index))
                else:
                    self.nsdfg_prepare_unroll(scope, str(param), str(index))
                callsite_stream.write(
                    f"constexpr {mapsymboltypes[param]} {param} = "
                    f"{dace.codegen.targets.common.sym2cpp(index)};\n", sdfg)
                sdfg.add_constant(param, int(index))

            callsite_stream.write('{')
            self._dispatcher.dispatch_subgraph(
                sdfg,
                scope,
                state_id,
                function_stream,
                callsite_stream,
                skip_entry_node=True,
                skip_exit_node=True,
            )
            callsite_stream.write('}')
            callsite_stream.write('}')
            self.nsdfg_after_unroll(nsdfg_unroll_info)

        sdfg.constants_prop = sdfgconsts