예제 #1
0
    def generate_scope(self, sdfg: dace.SDFG, scope: ScopeSubgraphView,
                       state_id: int, function_stream: CodeIOStream,
                       callsite_stream: CodeIOStream):
        entry_node = scope.source_nodes()[0]

        loop_type = list(set([sdfg.arrays[a].dtype for a in sdfg.arrays]))[0]
        ltype_size = loop_type.bytes

        long_type = copy.copy(dace.int64)
        long_type.ctype = 'int64_t'

        self.counter_type = {
            1: dace.int8,
            2: dace.int16,
            4: dace.int32,
            8: long_type
        }[ltype_size]

        callsite_stream.write('{')

        # Define all input connectors of the map entry
        state_dfg = sdfg.node(state_id)
        for e in dace.sdfg.dynamic_map_inputs(state_dfg, entry_node):
            if e.data.data != e.dst_conn:
                callsite_stream.write(
                    self.cpu_codegen.memlet_definition(
                        sdfg, e.data, False, e.dst_conn,
                        e.dst.in_connectors[e.dst_conn]), sdfg, state_id,
                    entry_node)

        # We only create an SVE do-while in the innermost loop
        for param, rng in zip(entry_node.map.params, entry_node.map.range):
            begin, end, stride = (sym2cpp(r) for r in rng)

            self.dispatcher.defined_vars.enter_scope(sdfg)

            # Check whether we are in the innermost loop
            if param != entry_node.map.params[-1]:
                # Default C++ for-loop
                callsite_stream.write(
                    f'for(auto {param} = {begin}; {param} <= {end}; {param} += {stride}) {{'
                )
            else:
                # Generate the SVE loop header

                # The name of our loop predicate is always __pg_{param}
                self.dispatcher.defined_vars.add('__pg_' + param,
                                                 DefinedType.Scalar, 'svbool_t')

                # Declare our counting variable (e.g. i) and precompute the loop predicate for our range
                callsite_stream.write(
                    f'''{self.counter_type} {param} = {begin};
                    svbool_t __pg_{param} = svwhilele_b{ltype_size * 8}({param}, ({self.counter_type}) {end});
                    do {{''', sdfg, state_id, entry_node)

        # Dispatch the subgraph generation
        self.dispatcher.dispatch_subgraph(sdfg,
                                          scope,
                                          state_id,
                                          function_stream,
                                          callsite_stream,
                                          skip_entry_node=True,
                                          skip_exit_node=True)

        # Close the loops from above (in reverse)
        for param, rng in zip(reversed(entry_node.map.params),
                              reversed(entry_node.map.range)):
            # The innermost loop is SVE and needs a special while-footer, otherwise we just add the closing bracket
            if param != entry_node.map.params[-1]:
                # Close the default C++ for-loop
                callsite_stream.write('}')
            else:
                # Generate the SVE loop footer

                _, end, stride = (sym2cpp(r) for r in rng)

                # Increase the counting variable (according to the number of processed elements)
                # Then recompute the loop predicate and test for it
                callsite_stream.write(
                    f'''{param} += svcntp_b{ltype_size * 8}(__pg_{param}, __pg_{param}) * {stride};
                    __pg_{param} = svwhilele_b{ltype_size * 8}({param}, ({self.counter_type}) {end});
                    }} while(svptest_any(svptrue_b{ltype_size * 8}(), __pg_{param}));''',
                    sdfg, state_id, entry_node)

            self.dispatcher.defined_vars.exit_scope(sdfg)

        callsite_stream.write('}')
예제 #2
0
def concurrent_subgraphs(graph):
    """ Finds subgraphs of an SDFGState or ScopeSubgraphView that can
        run concurrently. """
    from dace.sdfg.scope import ScopeSubgraphView

    if not isinstance(graph, (SDFGState, ScopeSubgraphView)):
        raise TypeError(
            "Expected SDFGState or ScopeSubgraphView, got: {}".format(
                type(graph).__name__))
    candidates = graph.source_nodes()
    components = collections.OrderedDict()  # {start node: nodes in component}
    for cand in candidates:
        if isinstance(cand, nd.AccessNode):
            # AccessNodes can be read from multiple concurrent components, so
            # check all out edges
            start_nodes = [e.dst for e in graph.out_edges(cand)]
            for n in start_nodes:
                if n not in components:
                    components[n] = {cand, n}
                else:
                    # Components can read from multiple start arrays
                    components[n].add(cand)
        else:
            # The source node == the first control or compute node
            components[cand] = {cand}
    subgraphs = []  # [{nodes in subgraph}]
    for i, start_node in enumerate(components):
        # Do BFS and find all nodes reachable from this start node
        seen = set()
        to_search = [start_node]
        while len(to_search) > 0:
            node = to_search.pop()
            if node in seen:
                continue
            seen.add(node)
            for e in graph.out_edges(node):
                if e.dst not in seen:
                    to_search.append(e.dst)
        # If this component overlaps with any previously determined components,
        # fuse them
        to_delete = []
        for i, other in enumerate(subgraphs):
            if len(other & seen) > 0:
                to_delete.append(i)
        if len(to_delete) == 0:
            # If there was no overlap, this is a concurrent subgraph
            subgraphs.append(seen | components[start_node])
        else:
            # Merge overlapping subgraphs
            new_subgraph = seen | components[start_node]

            for index in reversed(to_delete):
                new_subgraph |= subgraphs.pop(index)

            subgraphs.append(new_subgraph)

    # Now stick each of the found components in a ScopeSubgraphView and return
    # them. Sort according to original order of nodes
    all_nodes = graph.nodes()
    return [
        ScopeSubgraphView(graph, [n for n in all_nodes if n in sg], None)
        for sg in subgraphs
    ]
예제 #3
0
파일: codegen.py 프로젝트: am-ivanov/dace
    def generate_scope(self, sdfg: dace.SDFG, scope: ScopeSubgraphView, state_id: int, function_stream: CodeIOStream,
                       callsite_stream: CodeIOStream):
        entry_node = scope.source_nodes()[0]
        current_map = entry_node.map
        self.current_map = current_map

        if len(current_map.params) > 1:
            raise util.NotSupportedError('SVE map must be one dimensional')

        loop_types = list(set([util.get_base_type(sdfg.arrays[a].dtype) for a in sdfg.arrays]))

        # Edge case if no arrays are used
        loop_type = loop_types[0] if len(loop_types) > 0 else dace.int64

        ltype_size = loop_type.bytes

        long_type = copy.copy(dace.int64)
        long_type.ctype = 'int64_t'

        self.counter_type = {1: dace.int8, 2: dace.int16, 4: dace.int32, 8: long_type}[ltype_size]

        callsite_stream.write('{')
        self.dispatcher.defined_vars.enter_scope(scope)

        # Define all dynamic input connectors of the map entry
        state_dfg = sdfg.node(state_id)
        for e in dace.sdfg.dynamic_map_inputs(state_dfg, entry_node):
            if e.data.data != e.dst_conn:
                callsite_stream.write(
                    self.cpu_codegen.memlet_definition(sdfg, e.data, False, e.dst_conn,
                                                       e.dst.in_connectors[e.dst_conn]), sdfg, state_id, entry_node)

        param = current_map.params[0]
        rng = current_map.range[0]
        begin, end, stride = (sym2cpp(r) for r in rng)

        # Generate the SVE loop header
        # The name of our loop predicate is always __pg_{param}
        self.dispatcher.defined_vars.add('__pg_' + param, DefinedType.Scalar, 'svbool_t')

        # Declare our counting variable (e.g. i) and precompute the loop predicate for our range
        callsite_stream.write(f'{self.counter_type} {param} = {begin};')

        end_param = f'__{param}_to'
        callsite_stream.write(f'{self.counter_type} {end_param} = {end};')

        callsite_stream.write(f'svbool_t __pg_{param} = svwhilele_b{ltype_size * 8}({param}, {end_param});')

        # Test for the predicate
        callsite_stream.write(f'while(svptest_any(svptrue_b{ltype_size * 8}(), __pg_{param})) {{')

        # Allocate scope related memory
        for node, _ in scope.all_nodes_recursive():
            if isinstance(node, nodes.Tasklet):
                # Create empty shared registers for outputs into other tasklets
                for edge in state_dfg.out_edges(node):
                    if isinstance(edge.dst, dace.nodes.Tasklet):
                        self.generate_out_register(sdfg, state_dfg, edge, callsite_stream, True)

        # Dispatch the subgraph generation
        self.dispatcher.dispatch_subgraph(sdfg,
                                          scope,
                                          state_id,
                                          function_stream,
                                          callsite_stream,
                                          skip_entry_node=True,
                                          skip_exit_node=True)

        # Increase the counting variable (according to the number of processed elements)
        size_letter = {1: 'b', 2: 'h', 4: 'w', 8: 'd'}[ltype_size]
        callsite_stream.write(f'{param} += svcnt{size_letter}() * {stride};')

        # Then recompute the loop predicate
        callsite_stream.write(f'__pg_{param} = svwhilele_b{ltype_size * 8}({param}, {end_param});')

        callsite_stream.write('}')

        self.dispatcher.defined_vars.exit_scope(scope)
        callsite_stream.write('}')