Exemple #1
0
    def generate_scope(self, sdfg: dace.SDFG, scope: ScopeSubgraphView,
                       state_id: int, function_stream: CodeIOStream,
                       callsite_stream: CodeIOStream):

        entry_node = scope.source_nodes()[0]

        index_list = []
        for begin, end, stride in entry_node.map.range:
            l = []
            while begin <= end:
                l.append(begin)
                begin += stride
            index_list.append(l)

        for indices in product(*index_list):
            callsite_stream.write('{')
            for param, index in zip(entry_node.map.params, indices):
                callsite_stream.write(f'auto {param} = {sym2cpp(index)};')
            self._dispatcher.dispatch_subgraph(sdfg,
                                               scope,
                                               state_id,
                                               function_stream,
                                               callsite_stream,
                                               skip_entry_node=True,
                                               skip_exit_node=True)
            callsite_stream.write('}')
Exemple #2
0
    def generate_scope(self, sdfg: dace.SDFG, scope: ScopeSubgraphView,
                       state_id: int, function_stream: CodeIOStream,
                       callsite_stream: CodeIOStream):

        entry_node: nd.MapEntry = scope.source_nodes()[0]
        index_list = []

        for begin, end, stride in entry_node.map.range:
            l = []
            while begin <= end:
                l.append(begin)
                begin += stride
            index_list.append(l)

        sdfgconsts = sdfg.constants_prop
        sdfg.constants_prop = copy.deepcopy(sdfg.constants_prop)

        mapsymboltypes = entry_node.new_symbols(sdfg, scope,
                                                [entry_node.map.params])
        for indices in product(*index_list):
            callsite_stream.write('{')
            nsdfg_unroll_info = None
            for param, index in zip(entry_node.map.params, indices):
                if nsdfg_unroll_info is None:
                    nsdfg_unroll_info = self.nsdfg_prepare_unroll(
                        scope, str(param), str(index))
                else:
                    self.nsdfg_prepare_unroll(scope, str(param), str(index))
                callsite_stream.write(
                    f"constexpr {mapsymboltypes[param]} {param} = "
                    f"{dace.codegen.targets.common.sym2cpp(index)};\n", sdfg)
                sdfg.add_constant(param, int(index))

            callsite_stream.write('{')
            self._dispatcher.dispatch_subgraph(
                sdfg,
                scope,
                state_id,
                function_stream,
                callsite_stream,
                skip_entry_node=True,
                skip_exit_node=True,
            )
            callsite_stream.write('}')
            callsite_stream.write('}')
            self.nsdfg_after_unroll(nsdfg_unroll_info)

        sdfg.constants_prop = sdfgconsts
Exemple #3
0
    def generate_scope(self, sdfg: dace.SDFG, scope: ScopeSubgraphView,
                       state_id: int, function_stream: CodeIOStream,
                       callsite_stream: CodeIOStream):
        entry_node = scope.source_nodes()[0]

        loop_type = list(set([sdfg.arrays[a].dtype for a in sdfg.arrays]))[0]
        ltype_size = loop_type.bytes

        long_type = copy.copy(dace.int64)
        long_type.ctype = 'int64_t'

        self.counter_type = {
            1: dace.int8,
            2: dace.int16,
            4: dace.int32,
            8: long_type
        }[ltype_size]

        callsite_stream.write('{')

        # Define all input connectors of the map entry
        state_dfg = sdfg.node(state_id)
        for e in dace.sdfg.dynamic_map_inputs(state_dfg, entry_node):
            if e.data.data != e.dst_conn:
                callsite_stream.write(
                    self.cpu_codegen.memlet_definition(
                        sdfg, e.data, False, e.dst_conn,
                        e.dst.in_connectors[e.dst_conn]), sdfg, state_id,
                    entry_node)

        # We only create an SVE do-while in the innermost loop
        for param, rng in zip(entry_node.map.params, entry_node.map.range):
            begin, end, stride = (sym2cpp(r) for r in rng)

            self.dispatcher.defined_vars.enter_scope(sdfg)

            # Check whether we are in the innermost loop
            if param != entry_node.map.params[-1]:
                # Default C++ for-loop
                callsite_stream.write(
                    f'for(auto {param} = {begin}; {param} <= {end}; {param} += {stride}) {{'
                )
            else:
                # Generate the SVE loop header

                # The name of our loop predicate is always __pg_{param}
                self.dispatcher.defined_vars.add('__pg_' + param,
                                                 DefinedType.Scalar, 'svbool_t')

                # Declare our counting variable (e.g. i) and precompute the loop predicate for our range
                callsite_stream.write(
                    f'''{self.counter_type} {param} = {begin};
                    svbool_t __pg_{param} = svwhilele_b{ltype_size * 8}({param}, ({self.counter_type}) {end});
                    do {{''', sdfg, state_id, entry_node)

        # Dispatch the subgraph generation
        self.dispatcher.dispatch_subgraph(sdfg,
                                          scope,
                                          state_id,
                                          function_stream,
                                          callsite_stream,
                                          skip_entry_node=True,
                                          skip_exit_node=True)

        # Close the loops from above (in reverse)
        for param, rng in zip(reversed(entry_node.map.params),
                              reversed(entry_node.map.range)):
            # The innermost loop is SVE and needs a special while-footer, otherwise we just add the closing bracket
            if param != entry_node.map.params[-1]:
                # Close the default C++ for-loop
                callsite_stream.write('}')
            else:
                # Generate the SVE loop footer

                _, end, stride = (sym2cpp(r) for r in rng)

                # Increase the counting variable (according to the number of processed elements)
                # Then recompute the loop predicate and test for it
                callsite_stream.write(
                    f'''{param} += svcntp_b{ltype_size * 8}(__pg_{param}, __pg_{param}) * {stride};
                    __pg_{param} = svwhilele_b{ltype_size * 8}({param}, ({self.counter_type}) {end});
                    }} while(svptest_any(svptrue_b{ltype_size * 8}(), __pg_{param}));''',
                    sdfg, state_id, entry_node)

            self.dispatcher.defined_vars.exit_scope(sdfg)

        callsite_stream.write('}')
Exemple #4
0
    def generate_scope(self, sdfg: dace.SDFG, scope: ScopeSubgraphView, state_id: int, function_stream: CodeIOStream,
                       callsite_stream: CodeIOStream):
        entry_node = scope.source_nodes()[0]
        current_map = entry_node.map
        self.current_map = current_map

        if len(current_map.params) > 1:
            raise util.NotSupportedError('SVE map must be one dimensional')

        loop_types = list(set([util.get_base_type(sdfg.arrays[a].dtype) for a in sdfg.arrays]))

        # Edge case if no arrays are used
        loop_type = loop_types[0] if len(loop_types) > 0 else dace.int64

        ltype_size = loop_type.bytes

        long_type = copy.copy(dace.int64)
        long_type.ctype = 'int64_t'

        self.counter_type = {1: dace.int8, 2: dace.int16, 4: dace.int32, 8: long_type}[ltype_size]

        callsite_stream.write('{')
        self.dispatcher.defined_vars.enter_scope(scope)

        # Define all dynamic input connectors of the map entry
        state_dfg = sdfg.node(state_id)
        for e in dace.sdfg.dynamic_map_inputs(state_dfg, entry_node):
            if e.data.data != e.dst_conn:
                callsite_stream.write(
                    self.cpu_codegen.memlet_definition(sdfg, e.data, False, e.dst_conn,
                                                       e.dst.in_connectors[e.dst_conn]), sdfg, state_id, entry_node)

        param = current_map.params[0]
        rng = current_map.range[0]
        begin, end, stride = (sym2cpp(r) for r in rng)

        # Generate the SVE loop header
        # The name of our loop predicate is always __pg_{param}
        self.dispatcher.defined_vars.add('__pg_' + param, DefinedType.Scalar, 'svbool_t')

        # Declare our counting variable (e.g. i) and precompute the loop predicate for our range
        callsite_stream.write(f'{self.counter_type} {param} = {begin};')

        end_param = f'__{param}_to'
        callsite_stream.write(f'{self.counter_type} {end_param} = {end};')

        callsite_stream.write(f'svbool_t __pg_{param} = svwhilele_b{ltype_size * 8}({param}, {end_param});')

        # Test for the predicate
        callsite_stream.write(f'while(svptest_any(svptrue_b{ltype_size * 8}(), __pg_{param})) {{')

        # Allocate scope related memory
        for node, _ in scope.all_nodes_recursive():
            if isinstance(node, nodes.Tasklet):
                # Create empty shared registers for outputs into other tasklets
                for edge in state_dfg.out_edges(node):
                    if isinstance(edge.dst, dace.nodes.Tasklet):
                        self.generate_out_register(sdfg, state_dfg, edge, callsite_stream, True)

        # Dispatch the subgraph generation
        self.dispatcher.dispatch_subgraph(sdfg,
                                          scope,
                                          state_id,
                                          function_stream,
                                          callsite_stream,
                                          skip_entry_node=True,
                                          skip_exit_node=True)

        # Increase the counting variable (according to the number of processed elements)
        size_letter = {1: 'b', 2: 'h', 4: 'w', 8: 'd'}[ltype_size]
        callsite_stream.write(f'{param} += svcnt{size_letter}() * {stride};')

        # Then recompute the loop predicate
        callsite_stream.write(f'__pg_{param} = svwhilele_b{ltype_size * 8}({param}, {end_param});')

        callsite_stream.write('}')

        self.dispatcher.defined_vars.exit_scope(scope)
        callsite_stream.write('}')