Beispiel #1
0
class RedundantArrayCopying2(pm.Transformation):
    """ Implements the redundant array removal transformation. Removes 
        multiples of array B in pattern A -> B.
    """

    _in_array = nodes.AccessNode('_')
    _out_array = nodes.AccessNode('_')

    @staticmethod
    def expressions():
        return [
            nxutil.node_path_graph(RedundantArrayCopying2._in_array,
                                   RedundantArrayCopying2._out_array),
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        in_array = graph.nodes()[candidate[RedundantArrayCopying2._in_array]]
        out_array = graph.nodes()[candidate[RedundantArrayCopying2._out_array]]

        # Ensure out degree is one (only one target, which is out_array)
        found = 0
        for _, _, dst, _, _ in graph.out_edges(in_array):
            if (isinstance(dst, nodes.AccessNode) and dst != out_array
                    and dst.data == out_array.data):
                found += 1

        return found > 0

    @staticmethod
    def match_to_str(graph, candidate):
        out_array = graph.nodes()[candidate[RedundantArrayCopying2._out_array]]

        return 'Remove ' + str(out_array)

    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        graph = sdfg.nodes()[self.state_id]
        in_array = gnode(RedundantArrayCopying2._in_array)
        out_array = gnode(RedundantArrayCopying2._out_array)

        for e1 in graph.out_edges(in_array):
            dst = e1.dst
            if (isinstance(dst, nodes.AccessNode) and dst != out_array
                    and dst.data == out_array.data):
                for e2 in graph.out_edges(dst):
                    graph.add_edge(out_array, None, e2.dst, e2.dst_conn,
                                   e2.data)
                    graph.remove_edge(e2)
                graph.remove_edge(e1)
                graph.remove_node(dst)

    def modifies_graph(self):
        return True
Beispiel #2
0
def output_node_for_array(state, data: str):
    for n in state.sink_nodes():
        if isinstance(n, nd.AccessNode):
            if n.data == data:
                return n

    return nd.AccessNode(data)
Beispiel #3
0
class RedundantArrayCopying3(pm.Transformation):
    """ Implements the redundant array removal transformation. Removes multiples
        of array B in pattern MapEntry -> B.
    """

    _arrays_removed = 0
    _map_entry = nodes.MapEntry(nodes.Map("", [], []))
    _out_array = nodes.AccessNode("_")

    @staticmethod
    def expressions():
        return [
            nxutil.node_path_graph(RedundantArrayCopying3._map_entry,
                                   RedundantArrayCopying3._out_array)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        map_entry = graph.nodes()[candidate[RedundantArrayCopying3._map_entry]]
        out_array = graph.nodes()[candidate[RedundantArrayCopying3._out_array]]

        # Ensure out degree is one (only one target, which is out_array)
        found = 0
        for _, _, dst, _, _ in graph.out_edges(map_entry):
            if (isinstance(dst, nodes.AccessNode) and dst != out_array
                    and dst.data == out_array.data):
                found += 1

        return found > 0

    @staticmethod
    def match_to_str(graph, candidate):
        out_array = graph.nodes()[candidate[RedundantArrayCopying3._out_array]]

        return "Remove " + str(out_array)

    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        graph = sdfg.nodes()[self.state_id]
        map_entry = gnode(RedundantArrayCopying3._map_entry)
        out_array = gnode(RedundantArrayCopying3._out_array)

        for e1 in graph.out_edges(map_entry):
            dst = e1.dst
            if (isinstance(dst, nodes.AccessNode) and dst != out_array
                    and dst.data == out_array.data):
                for e2 in graph.out_edges(dst):
                    graph.add_edge(out_array, None, e2.dst, e2.dst_conn,
                                   e2.data)
                    graph.remove_edge(e2)
                graph.remove_edge(e1)
                graph.remove_node(dst)
                if Config.get_bool("debugprint"):
                    RedundantArrayCopying3._arrays_removed += 1
Beispiel #4
0
def input_node_for_array(state, data: str):
    # If the node appears as one of the source nodes, return it first
    for n in state.source_nodes():
        if isinstance(n, nd.AccessNode):
            if n.data == data:
                return n
    # Otherwise, if the node is located elsewhere, return it
    for n in state.nodes():
        if isinstance(n, nd.AccessNode):
            if n.data == data:
                return n

    return nd.AccessNode(data)
Beispiel #5
0
    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        tasklet = graph.nodes()[self.subgraph[StreamTransient._tasklet]]
        map_exit = graph.nodes()[self.subgraph[StreamTransient._map_exit]]
        outer_map_exit = graph.nodes()[self.subgraph[
            StreamTransient._outer_map_exit]]
        memlet = None
        edge = None
        for e in graph.out_edges(map_exit):
            memlet = e.data
            # TODO: What if there's more than one?
            if e.dst == outer_map_exit and isinstance(sdfg.arrays[memlet.data],
                                                      data.Stream):
                edge = e
                break
        tasklet_memlet = None
        for e in graph.out_edges(tasklet):
            tasklet_memlet = e.data
            if tasklet_memlet.data == memlet.data:
                break

        bbox = map_exit.map.range.bounding_box_size()
        bbox_approx = [symbolic.overapproximate(dim) for dim in bbox]
        dataname = memlet.data

        # Create the new node: Temporary stream and an access node
        newstream = sdfg.add_stream(
            'tile_' + dataname,
            sdfg.arrays[memlet.data].dtype,
            1,
            bbox_approx[0],
            [1],
            transient=True,
        )
        snode = nodes.AccessNode('tile_' + dataname)

        to_stream_mm = copy.deepcopy(memlet)
        to_stream_mm.data = snode.data
        tasklet_memlet.data = snode.data

        # Reconnect, assuming one edge to the stream
        graph.remove_edge(edge)
        graph.add_edge(map_exit, None, snode, None, to_stream_mm)
        graph.add_edge(snode, None, outer_map_exit, None, memlet)

        return
Beispiel #6
0
    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        tasklet = graph.nodes()[self.subgraph[StreamTransient._tasklet]]
        map_exit = graph.nodes()[self.subgraph[StreamTransient._map_exit]]
        outer_map_exit = graph.nodes()[self.subgraph[
            StreamTransient._outer_map_exit]]
        memlet = None
        edge = None
        for e in graph.out_edges(tasklet):
            memlet = e.data
            # TODO: What if there's more than one?
            if e.dst == map_exit and e.data.wcr is not None:
                break
        out_memlet = None
        for e in graph.out_edges(map_exit):
            out_memlet = e.data
            if out_memlet.data == memlet.data:
                edge = e
                break
        dataname = memlet.data

        # Create a new node with the same size as the output
        newdata = sdfg.add_array('trans_' + dataname,
                                 sdfg.arrays[memlet.data].shape,
                                 sdfg.arrays[memlet.data].dtype,
                                 transient=True)
        dnode = nodes.AccessNode('trans_' + dataname)

        to_data_mm = copy.deepcopy(memlet)
        to_data_mm.data = dnode.data
        to_data_mm.num_accesses = memlet.num_elements()

        to_exit_mm = copy.deepcopy(out_memlet)
        to_exit_mm.num_accesses = out_memlet.num_elements()
        memlet.data = dnode.data

        # Reconnect, assuming one edge to the stream
        graph.remove_edge(edge)
        graph.add_edge(map_exit, edge.src_conn, dnode, None, to_data_mm)
        graph.add_edge(dnode, None, outer_map_exit, edge.dst_conn, to_exit_mm)

        return
Beispiel #7
0
class MapReduceFusion(pm.Transformation):
    """ Implements the map-reduce-fusion transformation.
        Fuses a map with an immediately following reduction, where the array
        between the map and the reduction is not used anywhere else.
    """

    _tasklet = nodes.Tasklet('_')
    _tmap_exit = nodes.MapExit(nodes.Map("", [], []))
    _in_array = nodes.AccessNode('_')
    _reduce = nodes.Reduce('lambda: None', None)
    _out_array = nodes.AccessNode('_')

    @staticmethod
    def expressions():
        return [
            nxutil.node_path_graph(MapReduceFusion._tasklet,
                                   MapReduceFusion._tmap_exit,
                                   MapReduceFusion._in_array,
                                   MapReduceFusion._reduce,
                                   MapReduceFusion._out_array)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        tmap_exit = graph.nodes()[candidate[MapReduceFusion._tmap_exit]]
        in_array = graph.nodes()[candidate[MapReduceFusion._in_array]]
        reduce_node = graph.nodes()[candidate[MapReduceFusion._reduce]]
        tasklet = graph.nodes()[candidate[MapReduceFusion._tasklet]]

        # Make sure that the array is only accessed by the map and the reduce
        if any([
                src != tmap_exit
                for src, _, _, _, memlet in graph.in_edges(in_array)
        ]):
            return False
        if any([
                dest != reduce_node
                for _, _, dest, _, memlet in graph.out_edges(in_array)
        ]):
            return False

        tmem = next(e for e in graph.edges_between(tasklet, tmap_exit)
                    if e.data.data == in_array.data).data

        # (strict) Make sure that the transient is not accessed anywhere else
        # in this state or other states
        if strict and (len([
                n for n in graph.nodes()
                if isinstance(n, nodes.AccessNode) and n.data == in_array.data
        ]) > 1 or in_array.data in sdfg.shared_transients()):
            return False

        # If memlet already has WCR and it is different from reduce node,
        # do not match
        if tmem.wcr is not None and tmem.wcr != reduce_node.wcr:
            return False

        # Verify that reduction ranges match tasklet map
        tout_memlet = graph.in_edges(in_array)[0].data
        rin_memlet = graph.out_edges(in_array)[0].data
        if tout_memlet.subset != rin_memlet.subset:
            return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        tasklet = candidate[MapReduceFusion._tasklet]
        map_exit = candidate[MapReduceFusion._tmap_exit]
        reduce = candidate[MapReduceFusion._reduce]

        return ' -> '.join(str(node) for node in [tasklet, map_exit, reduce])

    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        tmap_exit = graph.nodes()[self.subgraph[MapReduceFusion._tmap_exit]]
        in_array = graph.nodes()[self.subgraph[MapReduceFusion._in_array]]
        reduce_node = graph.nodes()[self.subgraph[MapReduceFusion._reduce]]
        out_array = graph.nodes()[self.subgraph[MapReduceFusion._out_array]]

        # Set nodes to remove according to the expression index
        nodes_to_remove = [in_array]
        nodes_to_remove.append(reduce_node)

        memlet_edge = None
        for edge in graph.in_edges(tmap_exit):
            if edge.data.data == in_array.data:
                memlet_edge = edge
                break
        if memlet_edge is None:
            raise RuntimeError('Reduction memlet cannot be None')

        # Find which indices should be removed from new memlet
        input_edge = graph.in_edges(reduce_node)[0]
        axes = reduce_node.axes or list(range(input_edge.data.subset))
        array_edge = graph.out_edges(reduce_node)[0]

        # Delete relevant edges and nodes
        graph.remove_nodes_from(nodes_to_remove)

        # Filter out reduced dimensions from subset
        filtered_subset = [
            dim for i, dim in enumerate(memlet_edge.data.subset)
            if i not in axes
        ]
        if len(filtered_subset) == 0:  # Output is a scalar
            filtered_subset = [0]

        # Modify edge from tasklet to map exit
        memlet_edge.data.data = out_array.data
        memlet_edge.data.wcr = reduce_node.wcr
        memlet_edge.data.wcr_identity = reduce_node.identity
        memlet_edge.data.subset = type(
            memlet_edge.data.subset)(filtered_subset)

        # Add edge from map exit to output array
        graph.add_edge(
            memlet_edge.dst, 'OUT_' + memlet_edge.dst_conn[3:], array_edge.dst,
            array_edge.dst_conn,
            Memlet(array_edge.data.data, array_edge.data.num_accesses,
                   array_edge.data.subset, array_edge.data.veclen,
                   reduce_node.wcr, reduce_node.identity))
Beispiel #8
0
def add_indirection_subgraph(sdfg, graph, src, dst, memlet):
    """ Replaces the specified edge in the specified graph with a subgraph that
        implements indirection without nested AST memlet objects. """
    if not isinstance(memlet, astnodes._Memlet):
        raise TypeError("Expected memlet to be astnodes._Memlet")

    indirect_inputs = set()
    indirect_outputs = set()

    # Scheme for multi-array indirection:
    # 1. look for all arrays and accesses, create set of arrays+indices
    #    from which the index memlets will be constructed from
    # 2. each separate array creates a memlet, of which num_accesses = len(set)
    # 3. one indirection tasklet receives them all + original array and
    #    produces the right output index/range memlet
    #########################
    # Step 1
    accesses = OrderedDict()
    newsubset = dcpy(memlet.subset)
    for dimidx, dim in enumerate(memlet.subset):
        # Range/Index disambiguation
        direct_assignment = False
        if not isinstance(dim, tuple):
            dim = [dim]
            direct_assignment = True

        for i, r in enumerate(dim):
            for expr in sympy.preorder_traversal(r):
                if symbolic.is_sympy_userfunction(expr):
                    fname = expr.func.__name__
                    if fname not in accesses:
                        accesses[fname] = []

                    # Replace function with symbol (memlet local name to-be)
                    if expr.args in accesses[fname]:
                        aindex = accesses[fname].index(expr.args)
                        toreplace = 'index_' + fname + '_' + str(aindex)
                    else:
                        accesses[fname].append(expr.args)
                        toreplace = 'index_' + fname + '_' + str(
                            len(accesses[fname]) - 1)

                    if direct_assignment:
                        newsubset[dimidx] = r.subs(expr, toreplace)
                    else:
                        newsubset[dimidx][i] = r.subs(expr, toreplace)
    #########################
    # Step 2
    ind_inputs = {'__ind_' + memlet.local_name}
    ind_outputs = {'lookup'}
    # Add accesses to inputs
    for arrname, arr_accesses in accesses.items():
        for i in range(len(arr_accesses)):
            ind_inputs.add('index_%s_%d' % (arrname, i))

    tasklet = nd.Tasklet("Indirection", ind_inputs, ind_outputs)

    input_index_memlets = []
    for arrname, arr_accesses in accesses.items():
        arr = memlet.otherdeps[arrname]
        for i, access in enumerate(arr_accesses):
            # Memlet to load the indirection index
            indexMemlet = Memlet(arrname, 1, sbs.Indices(list(access)), 1)
            input_index_memlets.append(indexMemlet)
            graph.add_edge(src, None, tasklet, "index_%s_%d" % (arrname, i),
                           indexMemlet)

    #########################
    # Step 3
    # Create new tasklet that will perform the indirection
    indirection_ast = ast.parse("lookup = {arr}[{index}]".format(
        arr='__ind_' + memlet.local_name,
        index=', '.join([symbolic.symstr(s) for s in newsubset])))
    # Conserve line number of original indirection code
    tasklet.code = ast.copy_location(indirection_ast.body[0], memlet.ast)

    # Create transient variable to trigger the indirected load
    if memlet.num_accesses == 1:
        storage = sdfg.add_scalar('__' + memlet.local_name + '_value',
                                  memlet.data.dtype,
                                  transient=True)
    else:
        storage = sdfg.add_array('__' + memlet.local_name + '_value',
                                 memlet.data.dtype,
                                 storage=types.StorageType.Default,
                                 transient=True,
                                 shape=memlet.bounding_box_size())
    indirectRange = sbs.Range([(0, s - 1, 1) for s in storage.shape])
    dataNode = nd.AccessNode('__' + memlet.local_name + '_value')

    # Create memlet that depends on the full array that we look up in
    fullRange = sbs.Range([(0, s - 1, 1) for s in memlet.data.shape])
    fullMemlet = Memlet(memlet.dataname, memlet.num_accesses, fullRange,
                        memlet.veclen)
    graph.add_edge(src, None, tasklet, '__ind_' + memlet.local_name,
                   fullMemlet)

    # Memlet to store the final value into the transient, and to load it into
    # the tasklet that needs it
    indirectMemlet = Memlet('__' + memlet.local_name + '_value',
                            memlet.num_accesses, indirectRange, memlet.veclen)
    graph.add_edge(tasklet, 'lookup', dataNode, None, indirectMemlet)

    valueMemlet = Memlet('__' + memlet.local_name + '_value',
                         memlet.num_accesses, indirectRange, memlet.veclen)
    graph.add_edge(dataNode, None, dst, memlet.local_name, valueMemlet)
Beispiel #9
0
    def apply(self, sdfg: sd.SDFG):

        #######################################################
        # Step 0: SDFG metadata

        # Find all input and output data descriptors
        input_nodes = []
        output_nodes = []
        global_code_nodes = [[] for _ in sdfg.nodes()]

        for i, state in enumerate(sdfg.nodes()):
            sdict = state.scope_dict()
            for node in state.nodes():
                if (isinstance(node, nodes.AccessNode)
                        and node.desc(sdfg).transient == False):
                    if (state.out_degree(node) > 0
                            and node.data not in input_nodes):
                        # Special case: nodes that lead to dynamic map ranges
                        # must stay on host
                        for e in state.out_edges(node):
                            last_edge = state.memlet_path(e)[-1]
                            if (isinstance(last_edge.dst, nodes.EntryNode)
                                    and last_edge.dst_conn and
                                    not last_edge.dst_conn.startswith('IN_')):
                                break
                        else:
                            input_nodes.append((node.data, node.desc(sdfg)))
                    if (state.in_degree(node) > 0
                            and node.data not in output_nodes):
                        output_nodes.append((node.data, node.desc(sdfg)))
                elif isinstance(node, nodes.CodeNode) and sdict[node] is None:
                    if not isinstance(node, nodes.EmptyTasklet):
                        global_code_nodes[i].append(node)

            # Input nodes may also be nodes with WCR memlets and no identity
            for e in state.edges():
                if e.data.wcr is not None and e.data.wcr_identity is None:
                    if (e.data.data not in input_nodes
                            and sdfg.arrays[e.data.data].transient == False):
                        input_nodes.append(
                            (e.data.data, sdfg.arrays[e.data.data]))

        start_state = sdfg.start_state
        end_states = sdfg.sink_nodes()

        #######################################################
        # Step 1: Create cloned GPU arrays and replace originals

        cloned_arrays = {}
        for inodename, inode in set(input_nodes):
            if isinstance(inode, data.Scalar):  # Scalars can remain on host
                continue
            newdesc = inode.clone()
            newdesc.storage = dtypes.StorageType.GPU_Global
            newdesc.transient = True
            name = sdfg.add_datadesc('gpu_' + inodename,
                                     newdesc,
                                     find_new_name=True)
            cloned_arrays[inodename] = name

        for onodename, onode in set(output_nodes):
            if onodename in cloned_arrays:
                continue
            newdesc = onode.clone()
            newdesc.storage = dtypes.StorageType.GPU_Global
            newdesc.transient = True
            name = sdfg.add_datadesc('gpu_' + onodename,
                                     newdesc,
                                     find_new_name=True)
            cloned_arrays[onodename] = name

        # Replace nodes
        for state in sdfg.nodes():
            for node in state.nodes():
                if (isinstance(node, nodes.AccessNode)
                        and node.data in cloned_arrays):
                    node.data = cloned_arrays[node.data]

        # Replace memlets
        for state in sdfg.nodes():
            for edge in state.edges():
                if edge.data.data in cloned_arrays:
                    edge.data.data = cloned_arrays[edge.data.data]

        #######################################################
        # Step 2: Create copy-in state
        excluded_copyin = self.exclude_copyin.split(',')

        copyin_state = sdfg.add_state(sdfg.label + '_copyin')
        sdfg.add_edge(copyin_state, start_state, ed.InterstateEdge())

        for nname, desc in dtypes.deduplicate(input_nodes):
            if nname in excluded_copyin or nname not in cloned_arrays:
                continue
            src_array = nodes.AccessNode(nname, debuginfo=desc.debuginfo)
            dst_array = nodes.AccessNode(cloned_arrays[nname],
                                         debuginfo=desc.debuginfo)
            copyin_state.add_node(src_array)
            copyin_state.add_node(dst_array)
            copyin_state.add_nedge(
                src_array, dst_array,
                memlet.Memlet.from_array(src_array.data, src_array.desc(sdfg)))

        #######################################################
        # Step 3: Create copy-out state
        excluded_copyout = self.exclude_copyout.split(',')

        copyout_state = sdfg.add_state(sdfg.label + '_copyout')
        for state in end_states:
            sdfg.add_edge(state, copyout_state, ed.InterstateEdge())

        for nname, desc in dtypes.deduplicate(output_nodes):
            if nname in excluded_copyout or nname not in cloned_arrays:
                continue
            src_array = nodes.AccessNode(cloned_arrays[nname],
                                         debuginfo=desc.debuginfo)
            dst_array = nodes.AccessNode(nname, debuginfo=desc.debuginfo)
            copyout_state.add_node(src_array)
            copyout_state.add_node(dst_array)
            copyout_state.add_nedge(
                src_array, dst_array,
                memlet.Memlet.from_array(dst_array.data, dst_array.desc(sdfg)))

        #######################################################
        # Step 4: Modify transient data storage

        for state in sdfg.nodes():
            sdict = state.scope_dict()
            for node in state.nodes():
                if isinstance(node,
                              nodes.AccessNode) and node.desc(sdfg).transient:
                    nodedesc = node.desc(sdfg)

                    # Special case: nodes that lead to dynamic map ranges must
                    # stay on host
                    if any(
                            isinstance(
                                state.memlet_path(e)[-1].dst, nodes.EntryNode)
                            for e in state.out_edges(node)):
                        continue

                    if sdict[node] is None:
                        # NOTE: the cloned arrays match too but it's the same
                        # storage so we don't care
                        nodedesc.storage = dtypes.StorageType.GPU_Global

                        # Try to move allocation/deallocation out of loops
                        if (self.toplevel_trans
                                and not isinstance(nodedesc, data.Stream)):
                            nodedesc.toplevel = True
                    else:
                        # Make internal transients registers
                        if self.register_trans:
                            nodedesc.storage = dtypes.StorageType.Register

        #######################################################
        # Step 5: Wrap free tasklets and nested SDFGs with a GPU map

        for state, gcodes in zip(sdfg.nodes(), global_code_nodes):
            for gcode in gcodes:
                if gcode.label in self.exclude_tasklets.split(','):
                    continue
                # Create map and connectors
                me, mx = state.add_map(gcode.label + '_gmap',
                                       {gcode.label + '__gmapi': '0:1'},
                                       schedule=dtypes.ScheduleType.GPU_Device)
                # Store in/out edges in lists so that they don't get corrupted
                # when they are removed from the graph
                in_edges = list(state.in_edges(gcode))
                out_edges = list(state.out_edges(gcode))
                me.in_connectors = set('IN_' + e.dst_conn for e in in_edges)
                me.out_connectors = set('OUT_' + e.dst_conn for e in in_edges)
                mx.in_connectors = set('IN_' + e.src_conn for e in out_edges)
                mx.out_connectors = set('OUT_' + e.src_conn for e in out_edges)

                # Create memlets through map
                for e in in_edges:
                    state.remove_edge(e)
                    state.add_edge(e.src, e.src_conn, me, 'IN_' + e.dst_conn,
                                   e.data)
                    state.add_edge(me, 'OUT_' + e.dst_conn, e.dst, e.dst_conn,
                                   e.data)
                for e in out_edges:
                    state.remove_edge(e)
                    state.add_edge(e.src, e.src_conn, mx, 'IN_' + e.src_conn,
                                   e.data)
                    state.add_edge(mx, 'OUT_' + e.src_conn, e.dst, e.dst_conn,
                                   e.data)

                # Map without inputs
                if len(in_edges) == 0:
                    state.add_nedge(me, gcode, memlet.EmptyMemlet())
        #######################################################
        # Step 6: Change all top-level maps and Reduce nodes to GPU schedule

        for i, state in enumerate(sdfg.nodes()):
            sdict = state.scope_dict()
            for node in state.nodes():
                if isinstance(node, (nodes.EntryNode, nodes.Reduce)):
                    if sdict[node] is None:
                        node.schedule = dtypes.ScheduleType.GPU_Device
                    elif (isinstance(node, nodes.EntryNode)
                          and self.sequential_innermaps):
                        node.schedule = dtypes.ScheduleType.Sequential

        #######################################################
        # Step 7: Introduce copy-out if data used in outgoing interstate edges

        for state in list(sdfg.nodes()):
            arrays_used = set()
            for e in sdfg.out_edges(state):
                # Used arrays = intersection between symbols and cloned arrays
                arrays_used.update(
                    set(e.data.condition_symbols())
                    & set(cloned_arrays.keys()))

            # Create a state and copy out used arrays
            if len(arrays_used) > 0:
                co_state = sdfg.add_state(state.label + '_icopyout')

                # Reconnect outgoing edges to after interim copyout state
                for e in sdfg.out_edges(state):
                    nxutil.change_edge_src(sdfg, state, co_state)
                # Add unconditional edge to interim state
                sdfg.add_edge(state, co_state, ed.InterstateEdge())

                # Add copy-out nodes
                for nname in arrays_used:
                    desc = sdfg.arrays[nname]
                    src_array = nodes.AccessNode(cloned_arrays[nname],
                                                 debuginfo=desc.debuginfo)
                    dst_array = nodes.AccessNode(nname,
                                                 debuginfo=desc.debuginfo)
                    co_state.add_node(src_array)
                    co_state.add_node(dst_array)
                    co_state.add_nedge(
                        src_array, dst_array,
                        memlet.Memlet.from_array(dst_array.data,
                                                 dst_array.desc(sdfg)))

        #######################################################
        # Step 8: Strict transformations
        if not self.strict_transform:
            return

        # Apply strict state fusions greedily.
        sdfg.apply_strict_transformations()
Beispiel #10
0
class TensorflowRedundantArray(pm.Transformation):
    """ Implements the redundant array removal transformation, applied
        to remove ReadVariableOps and control dependencies. """

    _arrays_removed = 0
    _in_array = nodes.AccessNode("_")
    _out_array = nodes.AccessNode("_")

    @staticmethod
    def expressions():
        return [
            nxutil.node_path_graph(TensorflowRedundantArray._in_array,
                                   TensorflowRedundantArray._out_array)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        in_array = graph.nodes()[candidate[TensorflowRedundantArray._in_array]]
        out_array = graph.nodes()[candidate[
            TensorflowRedundantArray._out_array]]

        # Just to be sure, check for the OP name in the out array
        if not ("ReadVariable" in out_array.data
                or "control_dependency" in out_array.data):
            return False

        # Make sure that the candidate is a transient variable
        if not in_array.desc(sdfg).transient:
            return False

        # Make sure that both arrays are using the same storage location
        if in_array.desc(sdfg).storage != out_array.desc(sdfg).storage:
            return False

        # Only apply if arrays are of same shape (no need to modify subset)
        if len(in_array.desc(sdfg).shape) != len(
                out_array.desc(sdfg).shape) or any(i != o for i, o in zip(
                    in_array.desc(sdfg).shape,
                    out_array.desc(sdfg).shape)):
            return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        out_array = graph.nodes()[candidate[
            TensorflowRedundantArray._out_array]]

        return "Remove " + str(out_array)

    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        graph = sdfg.nodes()[self.state_id]
        in_array = gnode(TensorflowRedundantArray._in_array)
        out_array = gnode(TensorflowRedundantArray._out_array)

        for e in graph.out_edges(out_array):
            # Modify all outgoing edges to point to in_array
            path = graph.memlet_tree(e)
            for pe in path:
                if pe.data.data == out_array.data:
                    pe.data.data = in_array.data

            # Preemptively add edge from in_array to out_array's adjacent
            # nodes.
            new_memlet = e.data
            new_memlet.data = in_array.data
            graph.add_edge(in_array, e.src_conn, e.dst, e.dst_conn, new_memlet)
            graph.remove_edge(e)

        try:
            assert len(graph.in_edges(out_array)) == 1
        except AssertionError:
            print("Multiple in-edges for ", str(out_array))
        e = graph.in_edges(out_array)[0]
        graph.remove_edge(e)

        # Finally, remove out_array node
        graph.remove_node(out_array)
        if Config.get_bool("debugprint"):
            TensorflowRedundantArray._arrays_removed += 1
Beispiel #11
0
class MapFusion(pattern_matching.Transformation):
    """ Implements the MapFusion transformation.
        It wil check for all patterns MapExit -> AccessNode -> MapEntry, and
        based on the following rules, fuse them and remove the transient in
        between. There are several possibilities of what it does to this
        transient in between. 

        Essentially, if there is some other place in the
        sdfg where it is required, or if it is not a transient, then it will
        not be removed. In such a case, it will be linked to the MapExit node
        of the new fused map.

        Rules for fusing maps:
          0. The map range of the second map should be a permutation of the
             first map range.
          1. Each of the access nodes that are adjacent to the first map exit
             should have an edge to the second map entry. If it doesn't, then the
             second map entry should not be reachable from this access node.
          2. Any node that has a wcr from the first map exit should not be
             adjacent to the second map entry.
          3. Access pattern for the access nodes in the second map should be
             the same permutation of the map parameters as the map ranges of the
             two maps. Alternatively, this access node should not be adjacent to
             the first map entry.
    """

    _first_map_exit = nodes.ExitNode()
    _some_array = nodes.AccessNode("_")
    _second_map_entry = nodes.EntryNode()

    @staticmethod
    def annotates_memlets():
        return False

    @staticmethod
    def expressions():
        return [
            nxutil.node_path_graph(
                MapFusion._first_map_exit,
                MapFusion._some_array,
                MapFusion._second_map_entry,
            )
        ]

    @staticmethod
    def find_permutation(first_map: nodes.Map,
                         second_map: nodes.Map) -> Union[List[int], None]:
        """ Find permutation between two map ranges.
            @param first_map: First map.
            @param second_map: Second map.
            @return: None if no such permutation exists, otherwise a list of
                     indices L such that L[x]'th parameter of second map has the same range as x'th
                     parameter of the first map.
            """
        result = []

        if len(first_map.range) != len(second_map.range):
            return None

        # Match map ranges with reduce ranges
        for i, tmap_rng in enumerate(first_map.range):
            found = False
            for j, rng in enumerate(second_map.range):
                if tmap_rng == rng and j not in result:
                    result.append(j)
                    found = True
                    break
            if not found:
                break

        # Ensure all map ranges matched
        if len(result) != len(first_map.range):
            return None

        return result

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        first_map_exit = graph.nodes()[candidate[MapFusion._first_map_exit]]
        first_map_entry = graph.entry_node(first_map_exit)
        second_map_entry = graph.nodes()[candidate[
            MapFusion._second_map_entry]]

        for _in_e in graph.in_edges(first_map_exit):
            if _in_e.data.wcr is not None:
                for _out_e in graph.out_edges(second_map_entry):
                    if _out_e.data.data == _in_e.data.data:
                        # wcr is on a node that is used in the second map, quit
                        return False
        # Check whether there is a pattern map -> access -> map.
        intermediate_nodes = set()
        intermediate_data = set()
        for _, _, dst, _, _ in graph.out_edges(first_map_exit):
            if isinstance(dst, nodes.AccessNode):
                intermediate_nodes.add(dst)
                intermediate_data.add(dst.data)
            else:
                return False
        # Check map ranges
        perm = MapFusion.find_permutation(first_map_entry.map,
                                          second_map_entry.map)
        if perm is None:
            return False

        # Create a dict that maps parameters of the first map to those of the
        # second map.
        params_dict = {}
        for _index, _param in enumerate(first_map_entry.map.params):
            params_dict[_param] = second_map_entry.map.params[perm[_index]]

        out_memlets = [e.data for e in graph.in_edges(first_map_exit)]

        # Check that input set of second map is provided by the output set
        # of the first map, or other unrelated maps
        for _, _, _, _, second_memlet in graph.out_edges(second_map_entry):
            # Memlets that do not come from one of the intermediate arrays
            if second_memlet.data not in intermediate_data:
                # however, if intermediate_data eventually leads to
                # second_memlet.data, need to fail.
                for _n in intermediate_nodes:
                    source_node = _n  # graph.find_node(_n.data)
                    destination_node = graph.find_node(second_memlet.data)
                    # NOTE: Assumes graph has networkx version
                    if destination_node in nx.descendants(
                            graph._nx, source_node):
                        return False
                continue

            provided = False
            for first_memlet in out_memlets:
                if first_memlet.data != second_memlet.data:
                    continue
                # If there is an equivalent subset, it is provided
                expected_second_subset = []
                for _tup in first_memlet.subset:
                    new_tuple = []
                    if isinstance(_tup, symbolic.symbol):
                        new_tuple = symbolic.symbol(params_dict[str(_tup)])
                    elif isinstance(_tup, (list, tuple)):
                        for _sym in _tup:
                            if isinstance(_sym, symbolic.symbol):
                                new_tuple.append(
                                    symbolic.symbol(params_dict[str(_sym)]))
                            else:
                                new_tuple.append(_sym)
                        new_tuple = tuple(new_tuple)
                    else:
                        new_tuple = _tup
                    expected_second_subset.append(new_tuple)
                if expected_second_subset == list(second_memlet.subset):
                    provided = True
                    break

            # If none of the output memlets of the first map provide the info,
            # fail.
            if provided is False:
                return False

        # Success
        return True

    @staticmethod
    def match_to_str(graph, candidate):
        first_exit = graph.nodes()[candidate[MapFusion._first_map_exit]]
        second_entry = graph.nodes()[candidate[MapFusion._second_map_entry]]

        return " -> ".join(entry.map.label + ": " + str(entry.map.params)
                           for entry in [first_exit, second_entry])

    def apply(self, sdfg):
        """
            This method applies the mapfusion transformation. 
            Other than the removal of the second map entry node (SME), and the first
            map exit (FME) node, it has the following side effects:

            1.  Any transient adjacent to both FME and SME with degree = 2 will be removed. 
                The tasklets that use/produce it shall be connected directly with a 
                scalar/new transient (if the dataflow is more than a single scalar)

            2.  If this transient is adjacent to FME and SME and has other
                uses, it will be adjacent to the new map exit post fusion.
                Tasklet-> Tasklet edges will ALSO be added as mentioned above.

            3.  If an access node is adjacent to FME but not SME, it will be
                adjacent to new map exit post fusion.

            4.  If an access node is adjacent to SME but not FME, it will be
                adjacent to the new map entry node post fusion.

        """
        graph = sdfg.nodes()[self.state_id]
        first_exit = graph.nodes()[self.subgraph[MapFusion._first_map_exit]]
        first_entry = graph.entry_node(first_exit)
        second_entry = graph.nodes()[self.subgraph[
            MapFusion._second_map_entry]]
        second_exit = graph.exit_nodes(second_entry)[0]

        intermediate_nodes = set()
        for _, _, dst, _, _ in graph.out_edges(first_exit):
            intermediate_nodes.add(dst)
            assert isinstance(dst, nodes.AccessNode)

        # Check if an access node refers to non transient memory, or transient
        # is used at another location (cannot erase)
        do_not_erase = set()
        for node in intermediate_nodes:
            if sdfg.arrays[node.data].transient is False:
                do_not_erase.add(node)
            else:
                # If array is used anywhere else in this state.
                num_occurrences = len([
                    n for n in graph.nodes()
                    if isinstance(n, nodes.AccessNode) and n.data == node.data
                ])
                if num_occurrences > 1:
                    return False

                for edge in graph.in_edges(node):
                    if edge.src != first_exit:
                        do_not_erase.add(node)
                        break
                else:
                    for edge in graph.out_edges(node):
                        if edge.dst != second_entry:
                            do_not_erase.add(node)
                            break

        # Find permutation between first and second scopes
        if first_entry.map.params != second_entry.map.params:
            perm = MapFusion.find_permutation(first_entry.map,
                                              second_entry.map)
            params_dict = {}
            for _index, _param in enumerate(first_entry.map.params):
                params_dict[_param] = second_entry.map.params[perm[_index]]

            # Hopefully replaces (in memlets and tasklet) the second scope map
            # indices with the permuted first map indices
            second_scope = graph.scope_subgraph(second_entry)
            for _firstp, _secondp in params_dict.items():
                replace(second_scope, _secondp, _firstp)

        ########Isolate First MapExit node###########
        for _edge in graph.in_edges(first_exit):
            __some_str = _edge.data.data
            _access_node = graph.find_node(__some_str)
            # all outputs of first_exit are in intermediate_nodes set, so all inputs to
            # first_exit should also be!
            if _access_node not in do_not_erase:
                _new_dst = None
                _new_dst_conn = None
                # look at the second map entry out-edges to get the new destination
                for _e in graph.out_edges(second_entry):
                    if _e.data.data == _access_node.data:
                        _new_dst = _e.dst
                        _new_dst_conn = _e.dst_conn
                        break
                if _new_dst is None:
                    # Access node is not even used in the second map
                    graph.remove_node(_access_node)
                    continue
                if _edge.data.data == _access_node.data and isinstance(
                        _edge._src, nodes.AccessNode):
                    _edge.data.data = _edge._src.data
                    _edge.data.subset = "0"
                    graph.add_edge(
                        _edge._src,
                        _edge.src_conn,
                        _new_dst,
                        _new_dst_conn,
                        dcpy(_edge.data),
                    )
                else:
                    if _edge.data.subset.num_elements() == 1:
                        # We will add a scalar
                        local_name = "__s%d_n%d%s_n%d%s" % (
                            self.state_id,
                            graph.node_id(_edge._src),
                            _edge.src_conn,
                            graph.node_id(_edge._dst),
                            _edge.dst_conn,
                        )
                        local_node = sdfg.add_scalar(
                            local_name,
                            dtype=_access_node.desc(graph).dtype,
                            toplevel=False,
                            transient=True,
                            storage=dtypes.StorageType.Register,
                        )
                        _edge.data.data = (
                            local_name)  # graph.add_access(local_name).data
                        _edge.data.subset = "0"
                        graph.add_edge(
                            _edge._src,
                            _edge.src_conn,
                            _new_dst,
                            _new_dst_conn,
                            dcpy(_edge.data),
                        )
                    else:
                        # We will add a transient of size = memlet subset
                        # size
                        local_name = "__s%d_n%d%s_n%d%s" % (
                            self.state_id,
                            graph.node_id(_edge._src),
                            _edge.src_conn,
                            graph.node_id(_edge._dst),
                            _edge.dst_conn,
                        )
                        local_node = graph.add_transient(
                            local_name,
                            _edge.data.subset.size(),
                            dtype=_access_node.desc(graph).dtype,
                            toplevel=False,
                        )
                        _edge.data.data = (
                            local_name)  # graph.add_access(local_name).data
                        _edge.data.subset = ",".join([
                            "0:" + str(_s) for _s in _edge.data.subset.size()
                        ])
                        graph.add_edge(
                            _edge._src,
                            _edge.src_conn,
                            local_node,
                            None,
                            dcpy(_edge.data),
                        )
                        graph.add_edge(local_node, None, _new_dst,
                                       _new_dst_conn, dcpy(_edge.data))
                graph.remove_edge(_edge)
                ####Isolate this node#####
                for _in_e in graph.in_edges(_access_node):
                    graph.remove_edge(_in_e)
                for _out_e in graph.out_edges(_access_node):
                    graph.remove_edge(_out_e)
                graph.remove_node(_access_node)
            else:
                # _access_node will become an output of the second map exit
                for _out_e in graph.out_edges(first_exit):
                    if _out_e.data.data == _access_node.data:
                        graph.add_edge(
                            second_exit,
                            None,
                            _out_e._dst,
                            _out_e.dst_conn,
                            dcpy(_out_e.data),
                        )

                        graph.remove_edge(_out_e)
                        break
                else:
                    raise AssertionError(
                        "No out-edge was found that leads to {}".format(
                            _access_node))
                graph.add_edge(_edge._src, _edge.src_conn, second_exit, None,
                               dcpy(_edge.data))
                ### If the second map needs this node then link the connector
                # that generated this to the place where it is needed, with a
                # temp transient/scalar for memlet to be generated
                for _out_e in graph.out_edges(second_entry):
                    if _out_e.data.data == _access_node.data:
                        if _edge.data.subset.num_elements() == 1:
                            # We will add a scalar
                            local_name = "__s%d_n%d%s_n%d%s" % (
                                self.state_id,
                                graph.node_id(_edge._src),
                                _edge.src_conn,
                                graph.node_id(_edge._dst),
                                _edge.dst_conn,
                            )
                            local_node = sdfg.add_scalar(
                                local_name,
                                dtype=_access_node.desc(graph).dtype,
                                storage=dtypes.StorageType.Register,
                                toplevel=False,
                                transient=True,
                            )
                            _edge.data.data = (
                                local_name
                            )  # graph.add_access(local_name).data
                            _edge.data.subset = "0"
                            graph.add_edge(
                                _edge._src,
                                _edge.src_conn,
                                _out_e._dst,
                                _out_e.dst_conn,
                                dcpy(_edge.data),
                            )
                        else:
                            # We will add a transient of size = memlet subset
                            # size
                            local_name = "__s%d_n%d%s_n%d%s" % (
                                self.state_id,
                                graph.node_id(_edge._src),
                                _edge.src_conn,
                                graph.node_id(_edge._dst),
                                _edge.dst_conn,
                            )
                            local_node = sdfg.add_transient(
                                local_name,
                                _edge.data.subset.size(),
                                dtype=_access_node.desc(graph).dtype,
                                toplevel=False,
                            )
                            _edge.data.data = (
                                local_name
                            )  # graph.add_access(local_name).data
                            _edge.data.subset = ",".join([
                                "0:" + str(_s)
                                for _s in _edge.data.subset.size()
                            ])
                            graph.add_edge(
                                _edge._src,
                                _edge.src_conn,
                                local_node,
                                None,
                                dcpy(_edge.data),
                            )
                            graph.add_edge(
                                local_node,
                                None,
                                _out_e._dst,
                                _out_e.dst_conn,
                                dcpy(_edge.data),
                            )
                        break
                graph.remove_edge(_edge)
        graph.remove_node(first_exit)  # Take a leap of faith

        #############Isolate second_entry node################
        for _edge in graph.in_edges(second_entry):
            _access_node = graph.find_node(_edge.data.data)
            if _access_node in intermediate_nodes:
                # Already handled above, just remove this
                graph.remove_edge(_edge)
                continue
            else:
                # This is an external input to the second map which will now go through the first
                # map.
                graph.add_edge(_edge._src, _edge.src_conn, first_entry, None,
                               dcpy(_edge.data))
                graph.remove_edge(_edge)
                for _out_e in graph.out_edges(second_entry):
                    if _out_e.data.data == _access_node.data:
                        graph.add_edge(
                            first_entry,
                            None,
                            _out_e._dst,
                            _out_e.dst_conn,
                            dcpy(_out_e.data),
                        )
                        graph.remove_edge(_out_e)
                        break
                else:
                    raise AssertionError(
                        "No out-edge was found that leads to {}".format(
                            _access_node))

        graph.remove_node(second_entry)

        # Fix scope exit
        second_exit.map = first_entry.map
        graph.fill_scope_connectors()
Beispiel #12
0
    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        outer_map_entry = graph.nodes()[self.subgraph[
            InLocalStorage._outer_map_entry]]
        inner_map_entry = graph.nodes()[self.subgraph[
            InLocalStorage._inner_map_entry]]

        array = self.array
        if array is None:
            array = graph.edges_between(outer_map_entry,
                                        inner_map_entry)[0].data.data

        original_edge = None
        invariant_memlet = None
        for edge in graph.in_edges(inner_map_entry):
            src = edge.src
            if src != outer_map_entry:
                continue
            memlet = edge.data
            if array == memlet.data:
                original_edge = edge
                invariant_memlet = memlet
                break
        if invariant_memlet is None:
            for edge in graph.in_edges(inner_map_entry):
                src = edge.src
                if src != outer_map_entry:
                    continue
                original_edge = edge
                invariant_memlet = edge.data
                print('WARNING: Array %s not found! Using array %s instead.' %
                      (array, invariant_memlet.data))
                array = invariant_memlet.data
                break
        if invariant_memlet is None:
            raise KeyError('Array %s not found!' % array)

        new_data = sdfg.add_array('trans_' + invariant_memlet.data, [
            symbolic.overapproximate(r)
            for r in invariant_memlet.bounding_box_size()
        ],
                                  sdfg.arrays[invariant_memlet.data].dtype,
                                  transient=True)
        data_node = nodes.AccessNode('trans_' + invariant_memlet.data)

        to_data_mm = copy.deepcopy(invariant_memlet)
        from_data_mm = copy.deepcopy(invariant_memlet)
        from_data_mm.data = data_node.data
        offset = []
        for ind, r in enumerate(invariant_memlet.subset):
            offset.append(r[0])
            if isinstance(invariant_memlet.subset[ind], tuple):
                begin = invariant_memlet.subset[ind][0] - r[0]
                end = invariant_memlet.subset[ind][1] - r[0]
                step = invariant_memlet.subset[ind][2]
                from_data_mm.subset[ind] = (begin, end, step)
            else:
                from_data_mm.subset[ind] -= r[0]
        to_data_mm.other_subset = copy.deepcopy(from_data_mm.subset)

        # Reconnect, assuming one edge to the stream
        graph.remove_edge(original_edge)
        graph.add_edge(outer_map_entry, original_edge.src_conn, data_node,
                       None, to_data_mm)
        graph.add_edge(data_node, None, inner_map_entry,
                       original_edge.dst_conn, from_data_mm)

        for _parent, _, _child, _, memlet in graph.bfs_edges(inner_map_entry,
                                                             reverse=False):
            if memlet.data != array:
                continue
            for ind, r in enumerate(memlet.subset):
                if isinstance(memlet.subset[ind], tuple):
                    begin = r[0] - offset[ind]
                    end = r[1] - offset[ind]
                    step = r[2]
                    memlet.subset[ind] = (begin, end, step)
                else:
                    memlet.subset[ind] -= offset[ind]
            memlet.data = 'trans_' + invariant_memlet.data

        return
Beispiel #13
0
    def apply(self, sdfg):
        # Retrieve map entry and exit nodes.
        graph = sdfg.nodes()[self.state_id]
        map_entry = graph.nodes()[self.subgraph[MapToForLoop._map_entry]]
        map_exits = graph.exit_nodes(map_entry)
        loop_idx = map_entry.map.params[0]
        loop_from, loop_to, loop_step = map_entry.map.range[0]

        nested_sdfg = dace.SDFG(graph.label + '_' + map_entry.map.label)

        # Construct nested SDFG
        begin = nested_sdfg.add_state('begin')
        guard = nested_sdfg.add_state('guard')
        body = nested_sdfg.add_state('body')
        end = nested_sdfg.add_state('end')

        nested_sdfg.add_edge(
            begin, guard,
            edges.InterstateEdge(assignments={str(loop_idx): str(loop_from)}))
        nested_sdfg.add_edge(
            guard,
            body,
            edges.InterstateEdge(condition = str(loop_idx) + ' <= ' + \
                                             str(loop_to))
        )
        nested_sdfg.add_edge(
            guard,
            end,
            edges.InterstateEdge(condition = str(loop_idx) + ' > ' + \
                                             str(loop_to))
        )
        nested_sdfg.add_edge(
            body,
            guard,
            edges.InterstateEdge(assignments = {str(loop_idx): str(loop_idx) + \
                                                ' + ' +str(loop_step)})
        )

        # Add map contents
        map_subgraph = graph.scope_subgraph(map_entry)
        for node in map_subgraph.nodes():
            if node is not map_entry and node not in map_exits:
                body.add_node(node)
        for src, src_conn, dst, dst_conn, memlet in map_subgraph.edges():
            if src is not map_entry and dst not in map_exits:
                body.add_edge(src, src_conn, dst, dst_conn, memlet)

        # Reconnect inputs
        nested_in_data_nodes = {}
        nested_in_connectors = {}
        nested_in_memlets = {}
        for i, edge in enumerate(graph.in_edges(map_entry)):
            src, src_conn, dst, dst_conn, memlet = edge
            data_label = '_in_' + memlet.data
            memdata = sdfg.arrays[memlet.data]
            if isinstance(memdata, data.Array):
                data_array = sdfg.add_array(data_label, memdata.dtype, [
                    symbolic.overapproximate(r)
                    for r in memlet.bounding_box_size()
                ])
            elif isinstance(memdata, data.Scalar):
                data_array = sdfg.add_scalar(data_label, memdata.dtype)
            else:
                raise NotImplementedError()
            data_node = nodes.AccessNode(data_label)
            body.add_node(data_node)
            nested_in_data_nodes.update({i: data_node})
            nested_in_connectors.update({i: data_label})
            nested_in_memlets.update({i: memlet})
            for _, _, _, _, old_memlet in body.edges():
                if old_memlet.data == memlet.data:
                    old_memlet.data = data_label
            #body.add_edge(data_node, None, dst, dst_conn, memlet)

        # Reconnect outputs
        nested_out_data_nodes = {}
        nested_out_connectors = {}
        nested_out_memlets = {}
        for map_exit in map_exits:
            for i, edge in enumerate(graph.out_edges(map_exit)):
                src, src_conn, dst, dst_conn, memlet = edge
                data_label = '_out_' + memlet.data
                memdata = sdfg.arrays[memlet.data]
                if isinstance(memdata, data.Array):
                    data_array = sdfg.add_array(data_label, memdata.dtype, [
                        symbolic.overapproximate(r)
                        for r in memlet.bounding_box_size()
                    ])
                elif isinstance(memdata, data.Scalar):
                    data_array = sdfg.add_scalar(data_label, memdata.dtype)
                else:
                    raise NotImplementedError()
                data_node = nodes.AccessNode(data_label)
                body.add_node(data_node)
                nested_out_data_nodes.update({i: data_node})
                nested_out_connectors.update({i: data_label})
                nested_out_memlets.update({i: memlet})
                for _, _, _, _, old_memlet in body.edges():
                    if old_memlet.data == memlet.data:
                        old_memlet.data = data_label
                #body.add_edge(src, src_conn, data_node, None, memlet)

        # Add nested SDFG and reconnect it
        nested_node = graph.add_nested_sdfg(
            nested_sdfg, sdfg, set(nested_in_connectors.values()),
            set(nested_out_connectors.values()))

        for i, edge in enumerate(graph.in_edges(map_entry)):
            src, src_conn, dst, dst_conn, memlet = edge
            graph.add_edge(src, src_conn, nested_node, nested_in_connectors[i],
                           nested_in_memlets[i])

        for map_exit in map_exits:
            for i, edge in enumerate(graph.out_edges(map_exit)):
                src, src_conn, dst, dst_conn, memlet = edge
                graph.add_edge(nested_node, nested_out_connectors[i], dst,
                               dst_conn, nested_out_memlets[i])

        for src, src_conn, dst, dst_conn, memlet in graph.out_edges(map_entry):
            i = int(src_conn[4:]) - 1
            new_memlet = dcpy(memlet)
            new_memlet.data = nested_in_data_nodes[i].data
            body.add_edge(nested_in_data_nodes[i], None, dst, dst_conn,
                          new_memlet)

        for map_exit in map_exits:
            for src, src_conn, dst, dst_conn, memlet in graph.in_edges(
                    map_exit):
                i = int(dst_conn[3:]) - 1
                new_memlet = dcpy(memlet)
                new_memlet.data = nested_out_data_nodes[i].data
                body.add_edge(src, src_conn, nested_out_data_nodes[i], None,
                              new_memlet)

        for node in map_subgraph:
            graph.remove_node(node)
Beispiel #14
0
    def apply(self, sdfg):
        state = sdfg.nodes()[self.state_id]
        nested_sdfg = state.nodes()[self.subgraph[CopyToDevice._nested_sdfg]]
        storage = self.storage

        for _, edge in enumerate(state.in_edges(nested_sdfg)):

            src, src_conn, dst, dst_conn, memlet = edge
            dataname = memlet.data
            memdata = sdfg.arrays[dataname]

            if isinstance(memdata, data.Array):
                new_data = sdfg.add_array(
                    'device_' + dataname + '_in',
                    memdata.dtype, [
                        symbolic.overapproximate(r)
                        for r in memlet.bounding_box_size()
                    ],
                    transient=True,
                    storage=storage)
            elif isinstance(memdata, data.Scalar):
                new_data = sdfg.add_scalar(
                    'device_' + dataname + '_in',
                    memdata.dtype,
                    transient=True,
                    storage=storage)
            else:
                raise NotImplementedError

            data_node = nodes.AccessNode('device_' + dataname + '_in')

            to_data_mm = dcpy(memlet)
            from_data_mm = dcpy(memlet)
            from_data_mm.data = 'device_' + dataname + '_in'
            offset = []
            for ind, r in enumerate(memlet.subset):
                offset.append(r[0])
                if isinstance(memlet.subset[ind], tuple):
                    begin = memlet.subset[ind][0] - r[0]
                    end = memlet.subset[ind][1] - r[0]
                    step = memlet.subset[ind][2]
                    from_data_mm.subset[ind] = (begin, end, step)
                else:
                    from_data_mm.subset[ind] -= r[0]

            state.remove_edge(edge)
            state.add_edge(src, src_conn, data_node, None, to_data_mm)
            state.add_edge(data_node, None, dst, dst_conn, from_data_mm)

        for _, edge in enumerate(state.out_edges(nested_sdfg)):

            src, src_conn, dst, dst_conn, memlet = edge
            dataname = memlet.data
            memdata = sdfg.arrays[dataname]

            if isinstance(memdata, data.Array):
                new_data = data.Array(
                    'device_' + dataname + '_out',
                    memdata.dtype, [
                        symbolic.overapproximate(r)
                        for r in memlet.bounding_box_size()
                    ],
                    transient=True,
                    storage=storage)
            elif isinstance(memdata, data.Scalar):
                new_data = sdfg.add_scalar(
                    'device_' + dataname + '_out',
                    memdata.dtype,
                    transient=True,
                    storage=storage)
            else:
                raise NotImplementedError

            data_node = nodes.AccessNode('device_' + dataname + '_out')

            to_data_mm = dcpy(memlet)
            from_data_mm = dcpy(memlet)
            to_data_mm.data = 'device_' + dataname + '_out'
            offset = []
            for ind, r in enumerate(memlet.subset):
                offset.append(r[0])
                if isinstance(memlet.subset[ind], tuple):
                    begin = memlet.subset[ind][0] - r[0]
                    end = memlet.subset[ind][1] - r[0]
                    step = memlet.subset[ind][2]
                    to_data_mm.subset[ind] = (begin, end, step)
                else:
                    to_data_mm.subset[ind] -= r[0]

            state.remove_edge(edge)
            state.add_edge(src, src_conn, data_node, None, to_data_mm)
            state.add_edge(data_node, None, dst, dst_conn, from_data_mm)

        # Change storage for all data inside nested SDFG to device.
        change_storage(nested_sdfg.sdfg, storage)
Beispiel #15
0
    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        map_entry = graph.nodes()[self.subgraph[FPGATransformMap._map_entry]]
        map_entry.map._schedule = dtypes.ScheduleType.FPGA_Device

        # Find map exit nodes
        exit_nodes = graph.exit_nodes(map_entry)

        fpga_storage_types = [
            dtypes.StorageType.FPGA_Global, dtypes.StorageType.FPGA_Local,
            dtypes.StorageType.CPU_Pinned
        ]

        #######################################################
        # Add FPGA copies of CPU arrays (i.e., not already on FPGA)

        # First, understand which arrays to clone
        all_out_edges = []
        for enode in exit_nodes:
            all_out_edges.extend(list(graph.out_edges(enode)))
        in_arrays_to_clone = set()
        out_arrays_to_clone = set()
        for e in graph.in_edges(map_entry):
            data_node = sd.find_input_arraynode(graph, e)
            if data_node.desc(sdfg).storage not in fpga_storage_types:
                in_arrays_to_clone.add(data_node)
        for e in all_out_edges:
            data_node = sd.find_output_arraynode(graph, e)
            if data_node.desc(sdfg).storage not in fpga_storage_types:
                out_arrays_to_clone.add(data_node)

        # Second, create a FPGA clone of each array
        cloned_arrays = {}
        in_cloned_arraynodes = {}
        out_cloned_arraynodes = {}
        for array_node in in_arrays_to_clone:
            array = array_node.desc(sdfg)
            if array_node.data in cloned_arrays:
                pass
            elif 'fpga_' + array_node.data in sdfg.arrays:
                pass
            else:
                sdfg.add_array('fpga_' + array_node.data,
                               dtype=array.dtype,
                               shape=array.shape,
                               materialize_func=array.materialize_func,
                               transient=True,
                               storage=dtypes.StorageType.FPGA_Global,
                               allow_conflicts=array.allow_conflicts,
                               access_order=array.access_order,
                               strides=array.strides,
                               offset=array.offset)
                cloned_arrays[array_node.data] = 'fpga_' + array_node.data
            cloned_node = nodes.AccessNode('fpga_' + array_node.data)

            in_cloned_arraynodes[array_node.data] = cloned_node
        for array_node in out_arrays_to_clone:
            array = array_node.desc(sdfg)
            if array_node.data in cloned_arrays:
                pass
            elif 'fpga_' + array_node.data in sdfg.arrays:
                pass
            else:
                sdfg.add_array('fpga_' + array_node.data,
                               dtype=array.dtype,
                               shape=array.shape,
                               materialize_func=array.materialize_func,
                               transient=True,
                               storage=dtypes.StorageType.FPGA_Global,
                               allow_conflicts=array.allow_conflicts,
                               access_order=array.access_order,
                               strides=array.strides,
                               offset=array.offset)
                cloned_arrays[array_node.data] = 'fpga_' + array_node.data
            cloned_node = nodes.AccessNode('fpga_' + array_node.data)

            out_cloned_arraynodes[array_node.data] = cloned_node

        # Third, connect the cloned arrays to the originals
        # TODO(later): Shift indices and create only the necessary sub-arrays
        for array_name, node in in_cloned_arraynodes.items():
            graph.add_node(node)
            for edge in graph.in_edges(map_entry):
                if edge.data.data == array_name:
                    graph.remove_edge(edge)
                    graph.add_edge(edge.src, None, node, None, edge.data)
                    newmemlet = copy.copy(edge.data)
                    newmemlet.data = node.data
                    graph.add_edge(node, edge.src_conn, edge.dst,
                                   edge.dst_conn, newmemlet)
        for array_name, node in out_cloned_arraynodes.items():
            graph.add_node(node)
            for edge in all_out_edges:
                if edge.data.data == array_name:
                    graph.remove_edge(edge)
                    graph.add_edge(node, None, edge.dst, None, edge.data)
                    newmemlet = copy.copy(edge.data)
                    newmemlet.data = node.data
                    graph.add_edge(edge.src, edge.src_conn, node,
                                   edge.dst_conn, newmemlet)

        # Fourth, replace memlet arrays as necessary
        scope_subgraph = graph.scope_subgraph(map_entry)
        for edge in scope_subgraph.edges():
            if (edge.data.data is not None
                    and edge.data.data in cloned_arrays):
                edge.data.data = cloned_arrays[edge.data.data]
Beispiel #16
0
    def apply(self, sdfg: sd.SDFG):

        #######################################################
        # Step 0: SDFG metadata

        # Find all input and output data descriptors
        input_nodes = []
        output_nodes = []
        global_code_nodes = [[] for _ in sdfg.nodes()]

        for i, state in enumerate(sdfg.nodes()):
            sdict = state.scope_dict()
            for node in state.nodes():
                if (isinstance(node, nodes.AccessNode)
                        and node.desc(sdfg).transient == False):
                    if (state.out_degree(node) > 0
                            and node.data not in input_nodes):
                        input_nodes.append((node.data, node.desc(sdfg)))
                    if (state.in_degree(node) > 0
                            and node.data not in output_nodes):
                        output_nodes.append((node.data, node.desc(sdfg)))
                elif isinstance(node, nodes.CodeNode) and sdict[node] is None:
                    if not isinstance(node, nodes.EmptyTasklet):
                        global_code_nodes[i].append(node)

            # Input nodes may also be nodes with WCR memlets and no identity
            for e in state.edges():
                if e.data.wcr is not None and e.data.wcr_identity is None:
                    if (e.data.data not in input_nodes
                            and sdfg.arrays[e.data.data].transient == False):
                        input_nodes.append(e.data.data)

        start_state = sdfg.start_state
        end_states = sdfg.sink_nodes()

        #######################################################
        # Step 1: Create cloned GPU arrays and replace originals

        cloned_arrays = {}
        for inodename, inode in input_nodes:
            newdesc = inode.clone()
            newdesc.storage = types.StorageType.GPU_Global
            newdesc.transient = True
            sdfg.add_datadesc('gpu_' + inodename, newdesc)
            cloned_arrays[inodename] = 'gpu_' + inodename

        for onodename, onode in output_nodes:
            if onodename in cloned_arrays:
                continue
            newdesc = onode.clone()
            newdesc.storage = types.StorageType.GPU_Global
            newdesc.transient = True
            sdfg.add_datadesc('gpu_' + onodename, newdesc)
            cloned_arrays[onodename] = 'gpu_' + onodename

        # Replace nodes
        for state in sdfg.nodes():
            for node in state.nodes():
                if (isinstance(node, nodes.AccessNode)
                        and node.data in cloned_arrays):
                    node.data = cloned_arrays[node.data]

        # Replace memlets
        for state in sdfg.nodes():
            for edge in state.edges():
                if edge.data.data in cloned_arrays:
                    edge.data.data = cloned_arrays[edge.data.data]

        #######################################################
        # Step 2: Create copy-in state

        copyin_state = sdfg.add_state(sdfg.label + '_copyin')
        sdfg.add_edge(copyin_state, start_state, ed.InterstateEdge())

        for nname, desc in input_nodes:
            src_array = nodes.AccessNode(nname, debuginfo=desc.debuginfo)
            dst_array = nodes.AccessNode(cloned_arrays[nname],
                                         debuginfo=desc.debuginfo)
            copyin_state.add_node(src_array)
            copyin_state.add_node(dst_array)
            copyin_state.add_nedge(
                src_array, dst_array,
                memlet.Memlet.from_array(src_array.data, src_array.desc(sdfg)))

        #######################################################
        # Step 3: Create copy-out state

        copyout_state = sdfg.add_state(sdfg.label + '_copyout')
        for state in end_states:
            sdfg.add_edge(state, copyout_state, ed.InterstateEdge())

        for nname, desc in output_nodes:
            src_array = nodes.AccessNode(cloned_arrays[nname],
                                         debuginfo=desc.debuginfo)
            dst_array = nodes.AccessNode(nname, debuginfo=desc.debuginfo)
            copyout_state.add_node(src_array)
            copyout_state.add_node(dst_array)
            copyout_state.add_nedge(
                src_array, dst_array,
                memlet.Memlet.from_array(dst_array.data, dst_array.desc(sdfg)))

        #######################################################
        # Step 4: Modify transient data storage

        for state in sdfg.nodes():
            sdict = state.scope_dict()
            for node in state.nodes():
                if isinstance(node,
                              nodes.AccessNode) and node.desc(sdfg).transient:
                    nodedesc = node.desc(sdfg)
                    if sdict[node] is None:
                        # NOTE: the cloned arrays match too but it's the same
                        # storage so we don't care
                        nodedesc.storage = types.StorageType.GPU_Global

                        # Try to move allocation/deallocation out of loops
                        if self.toplevel_trans:
                            nodedesc.toplevel = True
                    else:
                        # Make internal transients registers
                        if self.register_trans:
                            nodedesc.storage = types.StorageType.Register

        #######################################################
        # Step 5: Wrap free tasklets and nested SDFGs with a GPU map

        for state, gcodes in zip(sdfg.nodes(), global_code_nodes):
            for gcode in gcodes:
                # Create map and connectors
                me, mx = state.add_map(gcode.label + '_gmap',
                                       {gcode.label + '__gmapi': '0:1'},
                                       schedule=types.ScheduleType.GPU_Device)
                # Store in/out edges in lists so that they don't get corrupted
                # when they are removed from the graph
                in_edges = list(state.in_edges(gcode))
                out_edges = list(state.out_edges(gcode))
                me.in_connectors = set('IN_' + e.dst_conn for e in in_edges)
                me.out_connectors = set('OUT_' + e.dst_conn for e in in_edges)
                mx.in_connectors = set('IN_' + e.src_conn for e in out_edges)
                mx.out_connectors = set('OUT_' + e.src_conn for e in out_edges)

                # Create memlets through map
                for e in in_edges:
                    state.remove_edge(e)
                    state.add_edge(e.src, e.src_conn, me, 'IN_' + e.dst_conn,
                                   e.data)
                    state.add_edge(me, 'OUT_' + e.dst_conn, e.dst, e.dst_conn,
                                   e.data)
                for e in out_edges:
                    state.remove_edge(e)
                    state.add_edge(e.src, e.src_conn, mx, 'IN_' + e.src_conn,
                                   e.data)
                    state.add_edge(mx, 'OUT_' + e.src_conn, e.dst, e.dst_conn,
                                   e.data)

                # Map without inputs
                if len(in_edges) == 0:
                    state.add_nedge(me, gcode, memlet.EmptyMemlet())
        #######################################################
        # Step 6: Change all top-level maps to GPU maps

        for i, state in enumerate(sdfg.nodes()):
            sdict = state.scope_dict()
            for node in state.nodes():
                if isinstance(node, nodes.EntryNode):
                    if sdict[node] is None:
                        node.schedule = types.ScheduleType.GPU_Device
                    elif self.sequential_innermaps:
                        node.schedule = types.ScheduleType.Sequential

        #######################################################
        # Step 7: Strict transformations
        if not self.strict_transform:
            return

        # Apply strict state fusions greedily.
        opt = optimizer.SDFGOptimizer(sdfg, inplace=True)
        fusions = 0
        arrays = 0
        options = [
            match for match in opt.get_pattern_matches(strict=True)
            if isinstance(match, (StateFusion, RedundantArray))
        ]
        while options:
            ssdfg = sdfg.sdfg_list[options[0].sdfg_id]
            options[0].apply(ssdfg)
            ssdfg.validate()
            if isinstance(options[0], StateFusion):
                fusions += 1
            if isinstance(options[0], RedundantArray):
                arrays += 1

            options = [
                match for match in opt.get_pattern_matches(strict=True)
                if isinstance(match, (StateFusion, RedundantArray))
            ]

        if Config.get_bool('debugprint') and (fusions > 0 or arrays > 0):
            print('Automatically applied {} strict state fusions and removed'
                  ' {} redundant arrays.'.format(fusions, arrays))
Beispiel #17
0
class DoubleBuffering(pattern_matching.Transformation):
    """ Implements the double buffering pattern, which pipelines reading
        and processing data by creating a second copy of the memory.
        In particular, the transformation takes a 1D map and all internal
        (directly connected) transients, adds an additional dimension of size 2,
        and turns the map into a for loop that processes and reads the data in a
        double-buffered manner. Other memlets will not be transformed.
    """

    _map_entry = nodes.MapEntry(nodes.Map('_', [], []))
    _transient = nodes.AccessNode('_')

    @staticmethod
    def expressions():
        return [
            nxutil.node_path_graph(DoubleBuffering._map_entry,
                                   DoubleBuffering._transient)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        map_entry = graph.nodes()[candidate[DoubleBuffering._map_entry]]
        transient = graph.nodes()[candidate[DoubleBuffering._transient]]

        # Only one dimensional maps are allowed
        if len(map_entry.map.params) != 1:
            return False

        # Verify the map can be transformed to a for-loop
        if not MapToForLoop.can_be_applied(
                graph,
            {MapToForLoop._map_entry: candidate[DoubleBuffering._map_entry]},
                expr_index, sdfg, strict):
            return False

        # Verify that all directly-connected internal access nodes point to
        # transient arrays
        first = True
        for edge in graph.out_edges(map_entry):
            if isinstance(edge.dst, nodes.AccessNode):
                desc = sdfg.arrays[edge.dst.data]
                if not isinstance(desc, data.Array) or not desc.transient:
                    return False
                else:
                    # To avoid duplicate matches, only match the first transient
                    if first and edge.dst != transient:
                        return False
                    first = False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        return str(graph.node(candidate[DoubleBuffering._map_entry]))

    def apply(self, sdfg: sd.SDFG):
        graph: sd.SDFGState = sdfg.nodes()[self.state_id]
        map_entry = graph.node(self.subgraph[DoubleBuffering._map_entry])

        map_param = map_entry.map.params[0]  # Assuming one dimensional

        ##############################
        # Change condition of loop to one fewer iteration (so that the
        # final one reads from the last buffer)
        map_rstart, map_rend, map_rstride = map_entry.map.range[0]
        map_rend = symbolic.pystr_to_symbolic('(%s) - (%s)' %
                                              (map_rend, map_rstride))
        map_entry.map.range = subsets.Range([(map_rstart, map_rend,
                                              map_rstride)])

        ##############################
        # Gather transients to modify
        transients_to_modify = set(edge.dst.data
                                   for edge in graph.out_edges(map_entry)
                                   if isinstance(edge.dst, nodes.AccessNode))

        # Add dimension to transients and modify memlets
        for transient in transients_to_modify:
            desc: data.Array = sdfg.arrays[transient]
            # Using non-python syntax to ensure properties change
            desc.strides = [desc.total_size] + list(desc.strides)
            desc.shape = [2] + list(desc.shape)
            desc.offset = [0] + list(desc.offset)
            desc.total_size = desc.total_size * 2

        ##############################
        # Modify memlets to use map parameter as buffer index
        modified_subsets = []  # Store modified memlets for final state
        for edge in graph.scope_subgraph(map_entry).edges():
            if edge.data.data in transients_to_modify:
                edge.data.subset = self._modify_memlet(sdfg, edge.data.subset,
                                                       edge.data.data)
                modified_subsets.append(edge.data.subset)
            else:  # Could be other_subset
                path = graph.memlet_path(edge)
                src_node = path[0].src
                dst_node = path[-1].dst

                # other_subset could be None. In that case, recreate from array
                dataname = None
                if (isinstance(src_node, nodes.AccessNode)
                        and src_node.data in transients_to_modify):
                    dataname = src_node.data
                elif (isinstance(dst_node, nodes.AccessNode)
                      and dst_node.data in transients_to_modify):
                    dataname = dst_node.data
                if dataname is not None:
                    subset = (edge.data.other_subset or
                              subsets.Range.from_array(sdfg.arrays[dataname]))
                    edge.data.other_subset = self._modify_memlet(
                        sdfg, subset, dataname)
                    modified_subsets.append(edge.data.other_subset)

        ##############################
        # Turn map into for loop
        map_to_for = MapToForLoop(self.sdfg_id, self.state_id, {
            MapToForLoop._map_entry:
            self.subgraph[DoubleBuffering._map_entry]
        }, self.expr_index)
        nsdfg_node, nstate = map_to_for.apply(sdfg)

        ##############################
        # Gather node copies and remove memlets
        edges_to_replace = []
        for node in nstate.source_nodes():
            for edge in nstate.out_edges(node):
                if (isinstance(edge.dst, nodes.AccessNode)
                        and edge.dst.data in transients_to_modify):
                    edges_to_replace.append(edge)
                    nstate.remove_edge(edge)
            if nstate.out_degree(node) == 0:
                nstate.remove_node(node)

        ##############################
        # Add initial reads to initial nested state
        initial_state: sd.SDFGState = nsdfg_node.sdfg.start_state
        initial_state.set_label('%s_init' % map_entry.map.label)
        for edge in edges_to_replace:
            initial_state.add_node(edge.src)
            rnode = edge.src
            wnode = initial_state.add_write(edge.dst.data)
            initial_state.add_edge(rnode, edge.src_conn, wnode, edge.dst_conn,
                                   copy.deepcopy(edge.data))

        # All instances of the map parameter in this state become the loop start
        sd.replace(initial_state, map_param, map_rstart)
        # Initial writes go to the first buffer
        sd.replace(initial_state, '__dace_db_param', '0')

        ##############################
        # Modify main state's memlets

        # Divide by loop stride
        new_expr = symbolic.pystr_to_symbolic('(%s / %s) %% 2' %
                                              (map_param, map_rstride))
        sd.replace(nstate, '__dace_db_param', new_expr)

        ##############################
        # Add the main state's contents to the last state, modifying
        # memlets appropriately.
        final_state: sd.SDFGState = nsdfg_node.sdfg.sink_nodes()[0]
        final_state.set_label('%s_final_computation' % map_entry.map.label)
        dup_nstate = copy.deepcopy(nstate)
        final_state.add_nodes_from(dup_nstate.nodes())
        for e in dup_nstate.edges():
            final_state.add_edge(e.src, e.src_conn, e.dst, e.dst_conn, e.data)

        ##############################
        # Add reads into next buffers to main state
        for edge in edges_to_replace:
            rnode = copy.deepcopy(edge.src)
            nstate.add_node(rnode)
            wnode = nstate.add_write(edge.dst.data)
            new_memlet = copy.deepcopy(edge.data)
            if new_memlet.data in transients_to_modify:
                new_memlet.other_subset = self._replace_in_subset(
                    new_memlet.other_subset, map_param,
                    '(%s + %s)' % (map_param, map_rstride))
            else:
                new_memlet.subset = self._replace_in_subset(
                    new_memlet.subset, map_param,
                    '(%s + %s)' % (map_param, map_rstride))

            nstate.add_edge(rnode, edge.src_conn, wnode, edge.dst_conn,
                            new_memlet)

        nstate.set_label('%s_double_buffered' % map_entry.map.label)
        # Divide by loop stride
        new_expr = symbolic.pystr_to_symbolic('((%s / %s) + 1) %% 2' %
                                              (map_param, map_rstride))
        sd.replace(nstate, '__dace_db_param', new_expr)

    @staticmethod
    def _modify_memlet(sdfg, subset, data_name):
        desc = sdfg.arrays[data_name]
        if len(subset) == len(desc.shape):
            # Already in the right shape, modify new dimension
            subset = list(subset)[1:]

        new_subset = subsets.Range([('__dace_db_param', '__dace_db_param',
                                     1)] + list(subset))
        return new_subset

    @staticmethod
    def _replace_in_subset(subset, string_or_symbol, new_string_or_symbol):
        new_subset = copy.deepcopy(subset)

        repldict = {
            symbolic.pystr_to_symbolic(string_or_symbol):
            symbolic.pystr_to_symbolic(new_string_or_symbol)
        }

        for i, dim in enumerate(new_subset):
            try:
                new_subset[i] = tuple(d.subs(repldict) for d in dim)
            except TypeError:
                new_subset[i] = (dim.subs(repldict)
                                 if symbolic.issymbolic(dim) else dim)

        return new_subset
Beispiel #18
0
class RedundantArray(pm.Transformation):
    """ Implements the redundant array removal transformation, applied
        when a transient array is copied to and from (to another array),
        but never used anywhere else. """

    _arrays_removed = 0
    _in_array = nodes.AccessNode("_")
    _out_array = nodes.AccessNode("_")

    @staticmethod
    def expressions():
        return [
            nxutil.node_path_graph(RedundantArray._in_array,
                                   RedundantArray._out_array)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        in_array = graph.nodes()[candidate[RedundantArray._in_array]]
        out_array = graph.nodes()[candidate[RedundantArray._out_array]]

        # Ensure out degree is one (only one target, which is out_array)
        if graph.out_degree(in_array) != 1:
            return False

        # Make sure that the candidate is a transient variable
        if not in_array.desc(sdfg).transient:
            return False

        # Make sure that both arrays are using the same storage location
        if in_array.desc(sdfg).storage != out_array.desc(sdfg).storage:
            return False

        # Find occurrences in this and other states
        occurrences = []
        for state in sdfg.nodes():
            occurrences.extend([
                n for n in state.nodes() if isinstance(n, nodes.AccessNode)
                and n.desc(sdfg) == in_array.desc(sdfg)
            ])

        if len(occurrences) > 1:
            return False

        # Only apply if arrays are of same shape (no need to modify subset)
        if len(in_array.desc(sdfg).shape) != len(
                out_array.desc(sdfg).shape) or any(i != o for i, o in zip(
                    in_array.desc(sdfg).shape,
                    out_array.desc(sdfg).shape)):
            return False

        if strict:
            # In strict mode, make sure the memlet covers the removed array
            edge = graph.edges_between(in_array, out_array)[0]
            if any(m != a for m, a in zip(edge.data.subset.size(),
                                          in_array.desc(sdfg).shape)):
                return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        in_array = graph.nodes()[candidate[RedundantArray._in_array]]

        return "Remove " + str(in_array)

    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        graph = sdfg.nodes()[self.state_id]
        in_array = gnode(RedundantArray._in_array)
        out_array = gnode(RedundantArray._out_array)

        for e in graph.in_edges(in_array):
            # Modify all incoming edges to point to out_array
            path = graph.memlet_path(e)
            for pe in path:
                if pe.data.data == in_array.data:
                    pe.data.data = out_array.data

            # Redirect edge to out_array
            graph.remove_edge(e)
            graph.add_edge(e.src, e.src_conn, out_array, e.dst_conn, e.data)

        # Finally, remove in_array node
        graph.remove_node(in_array)
        # TODO: Should the array be removed from the SDFG?
        # del sdfg.arrays[in_array]
        if Config.get_bool("debugprint"):
            RedundantArray._arrays_removed += 1
Beispiel #19
0
class MapWCRFusion(pm.Transformation):
    """ Implements the map expanded-reduce fusion transformation.
        Fuses a map with an immediately following reduction, where the array
        between the map and the reduction is not used anywhere else, and the
        reduction is divided to two maps with a WCR, denoting partial reduction.
    """

    _tasklet = nodes.Tasklet('_')
    _tmap_exit = nodes.MapExit(nodes.Map("", [], []))
    _in_array = nodes.AccessNode('_')
    _rmap_in_entry = nodes.MapEntry(nodes.Map("", [], []))
    _rmap_in_tasklet = nodes.Tasklet('_')
    _rmap_in_cr = nodes.MapExit(nodes.Map("", [], []))
    _rmap_out_entry = nodes.MapEntry(nodes.Map("", [], []))
    _rmap_out_exit = nodes.MapExit(nodes.Map("", [], []))
    _out_array = nodes.AccessNode('_')

    @staticmethod
    def expressions():
        return [
            # Map, then partial reduction of axes
            nxutil.node_path_graph(
                MapWCRFusion._tasklet, MapWCRFusion._tmap_exit,
                MapWCRFusion._in_array, MapWCRFusion._rmap_out_entry,
                MapWCRFusion._rmap_in_entry, MapWCRFusion._rmap_in_tasklet,
                MapWCRFusion._rmap_in_cr, MapWCRFusion._rmap_out_exit,
                MapWCRFusion._out_array)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        tmap_exit = graph.nodes()[candidate[MapWCRFusion._tmap_exit]]
        in_array = graph.nodes()[candidate[MapWCRFusion._in_array]]
        rmap_entry = graph.nodes()[candidate[MapWCRFusion._rmap_out_entry]]

        # Make sure that the array is only accessed by the map and the reduce
        if any([
                src != tmap_exit
                for src, _, _, _, memlet in graph.in_edges(in_array)
        ]):
            return False
        if any([
                dest != rmap_entry
                for _, _, dest, _, memlet in graph.out_edges(in_array)
        ]):
            return False

        # Make sure that there is a reduction in the second map
        rmap_cr = graph.nodes()[candidate[MapWCRFusion._rmap_in_cr]]
        reduce_edge = graph.in_edges(rmap_cr)[0]
        if reduce_edge.data.wcr is None:
            return False

        # (strict) Make sure that the transient is not accessed anywhere else
        # in this state or other states
        if strict and (len([
                n for n in graph.nodes()
                if isinstance(n, nodes.AccessNode) and n.data == in_array.data
        ]) > 1 or in_array.data in sdfg.shared_transients()):
            return False

        # Verify that reduction ranges match tasklet map
        tout_memlet = graph.in_edges(in_array)[0].data
        rin_memlet = graph.out_edges(in_array)[0].data
        if tout_memlet.subset != rin_memlet.subset:
            return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        tasklet = candidate[MapWCRFusion._tasklet]
        map_exit = candidate[MapWCRFusion._tmap_exit]
        reduce = candidate[MapWCRFusion._rmap_in_cr]

        return ' -> '.join(str(node) for node in [tasklet, map_exit, reduce])

    def apply(self, sdfg):
        graph = sdfg.node(self.state_id)

        # To apply, collapse the second map and then fuse the two resulting maps
        map_collapse = MapCollapse(
            self.sdfg_id, self.state_id, {
                MapCollapse._outer_map_entry:
                self.subgraph[MapWCRFusion._rmap_out_entry],
                MapCollapse._inner_map_entry:
                self.subgraph[MapWCRFusion._rmap_in_entry]
            }, 0)
        map_entry, _ = map_collapse.apply(sdfg)

        map_fusion = MapFusion(
            self.sdfg_id, self.state_id, {
                MapFusion._first_map_exit:
                self.subgraph[MapWCRFusion._tmap_exit],
                MapFusion._second_map_entry: graph.node_id(map_entry)
            }, 0)
        map_fusion.apply(sdfg)
Beispiel #20
0
class MergeArrays(pattern_matching.Transformation):
    """ Merge duplicate arrays connected to the same scope entry. """

    _array1 = nodes.AccessNode("_")
    _array2 = nodes.AccessNode("_")
    _map_entry = nodes.EntryNode()

    @staticmethod
    def expressions():
        # Matching
        #   o  o
        #   |  |
        # /======\

        g = SDFGState()
        g.add_node(MergeArrays._array1)
        g.add_node(MergeArrays._array2)
        g.add_node(MergeArrays._map_entry)
        g.add_edge(MergeArrays._array1, None, MergeArrays._map_entry, None,
                   memlet.EmptyMemlet())
        g.add_edge(MergeArrays._array2, None, MergeArrays._map_entry, None,
                   memlet.EmptyMemlet())
        return [g]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        arr1_id = candidate[MergeArrays._array1]
        arr2_id = candidate[MergeArrays._array2]

        # Ensure both arrays contain the same data
        arr1 = graph.node(arr1_id)
        arr2 = graph.node(arr2_id)
        if arr1.data != arr2.data:
            return False

        # Ensure only arr1's node ID contains incoming edges
        if graph.in_degree(arr1) == 0 and graph.in_degree(arr2) > 0:
            return False

        # Ensure arr1 and arr2's node IDs are ordered (avoid duplicates)
        if (graph.in_degree(arr1) == 0 and graph.in_degree(arr2) == 0
                and arr1_id >= arr2_id):
            return False

        map = graph.node(candidate[MergeArrays._map_entry])

        # If arr1's connector leads directly to map, skip it
        if all(
                e.dst_conn and not e.dst_conn.startswith('IN_')
                for e in graph.edges_between(arr1, map)):
            return False

        if (any(e.dst != map for e in graph.out_edges(arr1))
                or any(e.dst != map for e in graph.out_edges(arr2))):
            return False

        # Ensure arr1 and arr2 are the first two incoming nodes (avoid further
        # duplicates)
        all_source_nodes = set(
            graph.node_id(e.src) for e in graph.in_edges(map)
            if e.src != arr1 and e.src != arr2 and e.dst_conn
            and e.dst_conn.startswith('IN_') and graph.in_degree(e.src) == 0)
        if any(nid < arr1_id or nid < arr2_id for nid in all_source_nodes):
            return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        arr = graph.node(candidate[MergeArrays._array1])
        map = graph.node(candidate[MergeArrays._map_entry])
        return '%s (%d, %d) -> %s' % (arr.data, candidate[MergeArrays._array1],
                                      candidate[MergeArrays._array2],
                                      map.label)

    def apply(self, sdfg):
        graph = sdfg.node(self.state_id)
        array = graph.node(self.subgraph[MergeArrays._array1])
        map = graph.node(self.subgraph[MergeArrays._map_entry])
        map_edge = next(e for e in graph.out_edges(array) if e.dst == map)
        result_connector = map_edge.dst_conn[3:]

        # Find all other incoming access nodes without incoming edges
        source_edges = [
            e for e in graph.in_edges(map)
            if isinstance(e.src, nodes.AccessNode) and e.src.data == array.data
            and e.src != array and e.dst_conn and e.dst_conn.startswith('IN_')
            and graph.in_degree(e.src) == 0
        ]

        # Modify connectors to point to first array
        connectors_to_remove = set()
        for e in source_edges:
            connector = e.dst_conn[3:]
            connectors_to_remove.add(connector)
            for inner_edge in graph.out_edges(map):
                if inner_edge.src_conn[4:] == connector:
                    inner_edge._src_conn = 'OUT_' + result_connector

        # Remove other nodes from state
        graph.remove_nodes_from(set(e.src for e in source_edges))

        # Remove connectors from scope entry
        map.in_connectors -= set('IN_' + c for c in connectors_to_remove)
        map.out_connectors -= set('OUT_' + c for c in connectors_to_remove)
Beispiel #21
0
class MapFusion(pattern_matching.Transformation):
    """ Implements the MapFusion transformation.
        It wil check for all patterns MapExit -> AccessNode -> MapEntry, and
        based on the following rules, fuse them and remove the transient in
        between. There are several possibilities of what it does to this
        transient in between. 

        Essentially, if there is some other place in the
        sdfg where it is required, or if it is not a transient, then it will
        not be removed. In such a case, it will be linked to the MapExit node
        of the new fused map.

        Rules for fusing maps:
          0. The map range of the second map should be a permutation of the
             first map range.
          1. Each of the access nodes that are adjacent to the first map exit
             should have an edge to the second map entry. If it doesn't, then the
             second map entry should not be reachable from this access node.
          2. Any node that has a wcr from the first map exit should not be
             adjacent to the second map entry.
          3. Access pattern for the access nodes in the second map should be
             the same permutation of the map parameters as the map ranges of the
             two maps. Alternatively, this access node should not be adjacent to
             the first map entry.
    """
    _first_map_exit = nodes.ExitNode()
    _some_array = nodes.AccessNode("_")
    _second_map_entry = nodes.EntryNode()

    @staticmethod
    def annotates_memlets():
        return False

    @staticmethod
    def expressions():
        return [
            nxutil.node_path_graph(
                MapFusion._first_map_exit,
                MapFusion._some_array,
                MapFusion._second_map_entry,
            )
        ]

    @staticmethod
    def find_permutation(first_map: nodes.Map,
                         second_map: nodes.Map) -> Union[List[int], None]:
        """ Find permutation between two map ranges.
            :param first_map: First map.
            :param second_map: Second map.
            :return: None if no such permutation exists, otherwise a list of
                     indices L such that L[x]'th parameter of second map has the same range as x'th
                     parameter of the first map.
            """
        result = []

        if len(first_map.range) != len(second_map.range):
            return None

        # Match map ranges with reduce ranges
        for i, tmap_rng in enumerate(first_map.range):
            found = False
            for j, rng in enumerate(second_map.range):
                if tmap_rng == rng and j not in result:
                    result.append(j)
                    found = True
                    break
            if not found:
                break

        # Ensure all map ranges matched
        if len(result) != len(first_map.range):
            return None

        return result

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        first_map_exit = graph.nodes()[candidate[MapFusion._first_map_exit]]
        first_map_entry = graph.entry_node(first_map_exit)
        second_map_entry = graph.nodes()[candidate[
            MapFusion._second_map_entry]]

        for _in_e in graph.in_edges(first_map_exit):
            if _in_e.data.wcr is not None:
                for _out_e in graph.out_edges(second_map_entry):
                    if _out_e.data.data == _in_e.data.data:
                        # wcr is on a node that is used in the second map, quit
                        return False
        # Check whether there is a pattern map -> access -> map.
        intermediate_nodes = set()
        intermediate_data = set()
        for _, _, dst, _, _ in graph.out_edges(first_map_exit):
            if isinstance(dst, nodes.AccessNode):
                intermediate_nodes.add(dst)
                intermediate_data.add(dst.data)

                # If array is used anywhere else in this state.
                num_occurrences = len([
                    n for n in graph.nodes()
                    if isinstance(n, nodes.AccessNode) and n.data == dst.data
                ])
                if num_occurrences > 1:
                    return False
            else:
                return False
        # Check map ranges
        perm = MapFusion.find_permutation(first_map_entry.map,
                                          second_map_entry.map)
        if perm is None:
            return False

        # Create a dict that maps parameters of the first map to those of the
        # second map.
        params_dict = {}
        for _index, _param in enumerate(first_map_entry.map.params):
            params_dict[_param] = second_map_entry.map.params[perm[_index]]

        out_memlets = [e.data for e in graph.in_edges(first_map_exit)]

        # Check that input set of second map is provided by the output set
        # of the first map, or other unrelated maps
        for _, _, _, _, second_memlet in graph.out_edges(second_map_entry):
            # Memlets that do not come from one of the intermediate arrays
            if second_memlet.data not in intermediate_data:
                # however, if intermediate_data eventually leads to
                # second_memlet.data, need to fail.
                for _n in intermediate_nodes:
                    source_node = _n  # graph.find_node(_n.data)
                    destination_node = graph.find_node(second_memlet.data)
                    # NOTE: Assumes graph has networkx version
                    if destination_node in nx.descendants(
                            graph._nx, source_node):
                        return False
                continue

            provided = False
            for first_memlet in out_memlets:
                if first_memlet.data != second_memlet.data:
                    continue
                # If there is an equivalent subset, it is provided
                expected_second_subset = []
                for _tup in first_memlet.subset:
                    new_tuple = []
                    if isinstance(_tup, symbolic.symbol):
                        new_tuple = symbolic.symbol(params_dict[str(_tup)])
                    elif isinstance(_tup, (list, tuple)):
                        for _sym in _tup:
                            if (isinstance(_sym, symbolic.symbol)
                                    and str(_sym) in params_dict):
                                new_tuple.append(
                                    symbolic.symbol(params_dict[str(_sym)]))
                            else:
                                new_tuple.append(_sym)
                        new_tuple = tuple(new_tuple)
                    else:
                        new_tuple = _tup
                    expected_second_subset.append(new_tuple)
                if expected_second_subset == list(second_memlet.subset):
                    provided = True
                    break

            # If none of the output memlets of the first map provide the info,
            # fail.
            if provided is False:
                return False

        # Success
        return True

    @staticmethod
    def match_to_str(graph, candidate):
        first_exit = graph.nodes()[candidate[MapFusion._first_map_exit]]
        second_entry = graph.nodes()[candidate[MapFusion._second_map_entry]]

        return " -> ".join(entry.map.label + ": " + str(entry.map.params)
                           for entry in [first_exit, second_entry])

    def apply(self, sdfg):
        """
            This method applies the mapfusion transformation. 
            Other than the removal of the second map entry node (SME), and the first
            map exit (FME) node, it has the following side effects:

            1.  Any transient adjacent to both FME and SME with degree = 2 will be removed. 
                The tasklets that use/produce it shall be connected directly with a 
                scalar/new transient (if the dataflow is more than a single scalar)

            2.  If this transient is adjacent to FME and SME and has other
                uses, it will be adjacent to the new map exit post fusion.
                Tasklet-> Tasklet edges will ALSO be added as mentioned above.

            3.  If an access node is adjacent to FME but not SME, it will be
                adjacent to new map exit post fusion.

            4.  If an access node is adjacent to SME but not FME, it will be
                adjacent to the new map entry node post fusion.

        """
        graph = sdfg.nodes()[self.state_id]
        first_exit = graph.nodes()[self.subgraph[MapFusion._first_map_exit]]
        first_entry = graph.entry_node(first_exit)
        second_entry = graph.nodes()[self.subgraph[
            MapFusion._second_map_entry]]
        second_exit = graph.exit_nodes(second_entry)[0]

        intermediate_nodes = set()
        for _, _, dst, _, _ in graph.out_edges(first_exit):
            intermediate_nodes.add(dst)
            assert isinstance(dst, nodes.AccessNode)

        # Check if an access node refers to non transient memory, or transient
        # is used at another location (cannot erase)
        do_not_erase = set()
        for node in intermediate_nodes:
            if sdfg.arrays[node.data].transient is False:
                do_not_erase.add(node)
            else:
                for edge in graph.in_edges(node):
                    if edge.src != first_exit:
                        do_not_erase.add(node)
                        break
                else:
                    for edge in graph.out_edges(node):
                        if edge.dst != second_entry:
                            do_not_erase.add(node)
                            break

        # Find permutation between first and second scopes
        perm = MapFusion.find_permutation(first_entry.map, second_entry.map)
        params_dict = {}
        for index, param in enumerate(first_entry.map.params):
            params_dict[param] = second_entry.map.params[perm[index]]

        # Replaces (in memlets and tasklet) the second scope map
        # indices with the permuted first map indices.
        # This works in two passes to avoid problems when e.g., exchanging two
        # parameters (instead of replacing (j,i) and (i,j) to (j,j) and then
        # i,i).
        second_scope = graph.scope_subgraph(second_entry)
        for firstp, secondp in params_dict.items():
            if firstp != secondp:
                replace(second_scope, secondp, '__' + secondp + '_fused')
        for firstp, secondp in params_dict.items():
            if firstp != secondp:
                replace(second_scope, '__' + secondp + '_fused', firstp)

        # Isolate First exit node
        ############################
        edges_to_remove = set()
        nodes_to_remove = set()
        for edge in graph.in_edges(first_exit):
            memlet_path = graph.memlet_path(edge)
            edge_index = next(i for i, e in enumerate(memlet_path)
                              if e == edge)
            access_node = memlet_path[-1].dst
            if access_node not in do_not_erase:
                out_edges = [
                    e for e in graph.out_edges(access_node)
                    if e.dst == second_entry
                ]
                # In this transformation, there can only be one edge to the
                # second map
                assert len(out_edges) == 1
                # Get source connector to the second map
                connector = out_edges[0].dst_conn[3:]

                new_dst = None
                new_dst_conn = None
                # Look at the second map entry out-edges to get the new
                # destination
                for _e in graph.out_edges(second_entry):
                    if _e.src_conn[4:] == connector:
                        new_dst = _e.dst
                        new_dst_conn = _e.dst_conn
                        break
                if new_dst is None:
                    # Access node is not used in the second map
                    nodes_to_remove.add(access_node)
                    continue
                # If the source is an access node, modify the memlet to point
                # to it
                if (isinstance(edge.src, nodes.AccessNode)
                        and edge.data.data != edge.src.data):
                    edge.data.data = edge.src.data
                    edge.data.subset = ("0" if edge.data.other_subset is None
                                        else edge.data.other_subset)
                    edge.data.other_subset = None

                else:
                    # Add a transient scalar/array
                    self.fuse_nodes(sdfg, graph, edge, new_dst, new_dst_conn)

                edges_to_remove.add(edge)

                # Remove transient node between the two maps
                nodes_to_remove.add(access_node)
            else:  # The case where intermediate array node cannot be removed
                # Node will become an output of the second map exit
                out_e = memlet_path[edge_index + 1]
                conn = second_exit.next_connector()
                graph.add_edge(
                    second_exit,
                    'OUT_' + conn,
                    out_e.dst,
                    out_e.dst_conn,
                    dcpy(out_e.data),
                )
                second_exit.add_out_connector('OUT_' + conn)

                graph.add_edge(edge.src, edge.src_conn, second_exit,
                               'IN_' + conn, dcpy(edge.data))
                second_exit.add_in_connector('IN_' + conn)

                edges_to_remove.add(out_e)

                # If the second map needs this node, link the connector
                # that generated this to the place where it is needed, with a
                # temp transient/scalar for memlet to be generated
                for out_e in graph.out_edges(second_entry):
                    second_memlet_path = graph.memlet_path(out_e)
                    source_node = second_memlet_path[0].src
                    if source_node == access_node:
                        self.fuse_nodes(sdfg, graph, edge, out_e.dst,
                                        out_e.dst_conn)

                edges_to_remove.add(edge)
        ###
        # First scope exit is isolated and can now be safely removed
        for e in edges_to_remove:
            graph.remove_edge(e)
        graph.remove_nodes_from(nodes_to_remove)
        graph.remove_node(first_exit)

        # Isolate second_entry node
        ###########################
        for edge in graph.in_edges(second_entry):
            memlet_path = graph.memlet_path(edge)
            edge_index = next(i for i, e in enumerate(memlet_path)
                              if e == edge)
            access_node = memlet_path[0].src
            if access_node in intermediate_nodes:
                # Already handled above, can be safely removed
                graph.remove_edge(edge)
                continue

            # This is an external input to the second map which will now go
            # through the first map.
            conn = first_entry.next_connector()
            graph.add_edge(edge.src, edge.src_conn, first_entry, 'IN_' + conn,
                           dcpy(edge.data))
            first_entry.add_in_connector('IN_' + conn)
            graph.remove_edge(edge)
            out_e = memlet_path[edge_index + 1]
            graph.add_edge(
                first_entry,
                'OUT_' + conn,
                out_e.dst,
                out_e.dst_conn,
                dcpy(out_e.data),
            )
            first_entry.add_out_connector('OUT_' + conn)

            graph.remove_edge(out_e)
        ###
        # Second node is isolated and can now be safely removed
        graph.remove_node(second_entry)

        # Fix scope exit to point to the right map
        second_exit.map = first_entry.map

    def fuse_nodes(self, sdfg, graph, edge, new_dst, new_dst_conn):
        """ Fuses two nodes via memlets and possibly transient arrays. """
        memlet_path = graph.memlet_path(edge)
        access_node = memlet_path[-1].dst

        local_name = "__s%d_n%d%s_n%d%s" % (
            self.state_id,
            graph.node_id(edge.src),
            edge.src_conn,
            graph.node_id(edge.dst),
            edge.dst_conn,
        )
        # Add intermediate memory between subgraphs. If a scalar,
        # uses direct connection. If an array, adds a transient node
        if edge.data.subset.num_elements() == 1:
            sdfg.add_scalar(
                local_name,
                dtype=access_node.desc(graph).dtype,
                transient=True,
                storage=dtypes.StorageType.Register,
            )
            edge.data.data = local_name
            edge.data.subset = "0"
            local_node = edge.src
            src_connector = edge.src_conn
        else:
            sdfg.add_transient(local_name,
                               edge.data.subset.size(),
                               dtype=access_node.desc(graph).dtype)
            local_node = graph.add_access(local_name)
            src_connector = None
            edge.data.data = local_name
            edge.data.subset = ",".join(
                ["0:" + str(s) for s in edge.data.subset.size()])
            # Add edge that leads to transient node
            graph.add_edge(
                edge.src,
                edge.src_conn,
                local_node,
                None,
                dcpy(edge.data),
            )
        ########
        # Add edge that leads to the second node
        graph.add_edge(local_node, src_connector, new_dst, new_dst_conn,
                       dcpy(edge.data))
Beispiel #22
0
class RedundantArrayCopying(pm.Transformation):
    """ Implements the redundant array removal transformation. Removes array B
        in pattern A -> B -> A.
    """

    _arrays_removed = 0
    _in_array = nodes.AccessNode("_")
    _med_array = nodes.AccessNode("_")
    _out_array = nodes.AccessNode("_")

    @staticmethod
    def expressions():
        return [
            nxutil.node_path_graph(
                RedundantArrayCopying._in_array,
                RedundantArrayCopying._med_array,
                RedundantArrayCopying._out_array,
            )
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        in_array = graph.nodes()[candidate[RedundantArrayCopying._in_array]]
        med_array = graph.nodes()[candidate[RedundantArrayCopying._med_array]]
        out_array = graph.nodes()[candidate[RedundantArrayCopying._out_array]]

        # Ensure out degree is one (only one target, which is out_array)
        if graph.out_degree(in_array) != 1:
            return False

        # Make sure that the candidate is a transient variable
        # if not in_array.desc.transient:
        #     return False

        # Make sure that both arrays are using the same storage location
        if in_array.desc(sdfg).storage != out_array.desc(sdfg).storage:
            return False

        # Find occurrences in this and other states
        # (This could be relaxed)
        # occurrences = []
        # for state in sdfg.nodes():
        #     occurrences.extend([
        #         n for n in state.nodes()
        #         if isinstance(n, nodes.AccessNode) and n.desc == med_array.desc
        #     ])

        # if len(occurrences) > 1:
        #     return False

        # Only apply if arrays are of same shape (no need to modify memlet subset)
        if len(in_array.desc(sdfg).shape) != len(
                out_array.desc(sdfg).shape) or any(i != o for i, o in zip(
                    in_array.desc(sdfg).shape,
                    out_array.desc(sdfg).shape)):
            return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        med_array = graph.nodes()[candidate[RedundantArrayCopying._med_array]]

        return "Remove " + str(med_array)

    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        graph = sdfg.nodes()[self.state_id]
        in_array = gnode(RedundantArrayCopying._in_array)
        med_array = gnode(RedundantArrayCopying._med_array)
        out_array = gnode(RedundantArrayCopying._out_array)

        med_edges = len(graph.out_edges(med_array))
        med_out_edges = 0
        for med_e in graph.out_edges(med_array):
            if (isinstance(med_e.dst, nodes.AccessNode)
                    and med_e.dst.data == out_array.data):
                # Modify all outcoming edges to point to in_array
                for out_e in graph.out_edges(med_e.dst):
                    path = graph.memlet_path(out_e)
                    for pe in path:
                        if pe.data.data == out_array.data:
                            pe.data.data = in_array.data
                    # Redirect edge to in_array
                    graph.remove_edge(out_e)
                    graph.add_edge(in_array, out_e.src_conn, out_e.dst,
                                   out_e.dst_conn, out_e.data)
                # Remove out_array
                for e in graph.edges_between(med_e, med_e.dst):
                    graph.remove_edge(e)
                graph.remove_node(med_e.dst)
                med_out_edges += 1

        # Finally, med_array node
        if med_array.desc(sdfg).transient and med_edges == med_out_edges:
            for e in graph.edges_between(in_array, med_array):
                graph.remove_edge(e)
            graph.remove_node(med_array)
            if Config.get_bool("debugprint"):
                RedundantArrayCopying._arrays_removed += 1
Beispiel #23
0
    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        inner_map_exit = graph.nodes()[self.subgraph[
            OutLocalStorage._inner_map_exit]]
        outer_map_exit = graph.nodes()[self.subgraph[
            OutLocalStorage._outer_map_exit]]

        original_edge = None
        invariant_memlet = None
        array = None
        for edge in graph.in_edges(outer_map_exit):
            src = edge.src
            if src != inner_map_exit:
                continue
            memlet = edge.data
            original_edge = edge
            invariant_memlet = memlet
            array = memlet.data
            break

        new_data = sdfg.add_array(
            graph.label + '_trans_' + invariant_memlet.data, [
                symbolic.overapproximate(r)
                for r in invariant_memlet.bounding_box_size()
            ],
            sdfg.arrays[invariant_memlet.data].dtype,
            transient=True)
        data_node = nodes.AccessNode(graph.label + '_trans_' +
                                     invariant_memlet.data)
        data_node.setzero = True

        from_data_mm = copy.deepcopy(invariant_memlet)
        to_data_mm = copy.deepcopy(invariant_memlet)
        to_data_mm.data = data_node.data
        offset = []
        for ind, r in enumerate(invariant_memlet.subset):
            offset.append(r[0])
            if isinstance(invariant_memlet.subset[ind], tuple):
                begin = invariant_memlet.subset[ind][0] - r[0]
                end = invariant_memlet.subset[ind][1] - r[0]
                step = invariant_memlet.subset[ind][2]
                to_data_mm.subset[ind] = (begin, end, step)
            else:
                to_data_mm.subset[ind] -= r[0]

        # Reconnect, assuming one edge to the stream
        graph.remove_edge(original_edge)
        graph.add_edge(inner_map_exit, original_edge.src_conn, data_node, None,
                       to_data_mm)
        graph.add_edge(data_node, None, outer_map_exit, original_edge.dst_conn,
                       from_data_mm)

        for _parent, _, _child, _, memlet in graph.bfs_edges(inner_map_exit,
                                                             reverse=True):
            if isinstance(_child, nodes.CodeNode):
                break
            if memlet.data != array:
                continue
            for ind, r in enumerate(memlet.subset):
                if isinstance(memlet.subset[ind], tuple):
                    begin = r[0] - offset[ind]
                    end = r[1] - offset[ind]
                    step = r[2]
                    memlet.subset[ind] = (begin, end, step)
                else:
                    memlet.subset[ind] -= offset[ind]
            memlet.data = graph.label + '_trans_' + invariant_memlet.data

        return
Beispiel #24
0
class MapReduceFusion(pm.Transformation):
    """ Implements the map-reduce-fusion transformation.
        Fuses a map with an immediately following reduction, where the array
        between the map and the reduction is not used anywhere else.
    """

    _tasklet = nodes.Tasklet('_')
    _tmap_exit = nodes.MapExit(nodes.Map("", [], []))
    _in_array = nodes.AccessNode('_')
    _rmap_in_entry = nodes.MapEntry(nodes.Map("", [], []))
    _rmap_in_tasklet = nodes.Tasklet('_')
    _rmap_in_cr = nodes.MapExit(nodes.Map("", [], []))
    _rmap_out_entry = nodes.MapEntry(nodes.Map("", [], []))
    _rmap_out_exit = nodes.MapExit(nodes.Map("", [], []))
    _out_array = nodes.AccessNode('_')
    _reduce = nodes.Reduce('lambda: None', None)

    @staticmethod
    def expressions():
        return [
            # Map, then reduce of all axes
            nxutil.node_path_graph(
                MapReduceFusion._tasklet, MapReduceFusion._tmap_exit,
                MapReduceFusion._in_array, MapReduceFusion._rmap_in_entry,
                MapReduceFusion._rmap_in_tasklet, MapReduceFusion._rmap_in_cr,
                MapReduceFusion._out_array),
            # Map, then partial reduction of axes
            nxutil.node_path_graph(
                MapReduceFusion._tasklet, MapReduceFusion._tmap_exit,
                MapReduceFusion._in_array, MapReduceFusion._rmap_out_entry,
                MapReduceFusion._rmap_in_entry,
                MapReduceFusion._rmap_in_tasklet, MapReduceFusion._rmap_in_cr,
                MapReduceFusion._rmap_out_exit, MapReduceFusion._out_array),
            # Map, then reduce node
            nxutil.node_path_graph(
                MapReduceFusion._tasklet, MapReduceFusion._tmap_exit,
                MapReduceFusion._in_array, MapReduceFusion._reduce,
                MapReduceFusion._out_array)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        tmap_exit = graph.nodes()[candidate[MapReduceFusion._tmap_exit]]
        in_array = graph.nodes()[candidate[MapReduceFusion._in_array]]
        if expr_index == 0:  # Reduce without outer map
            rmap_entry = graph.nodes()[candidate[
                MapReduceFusion._rmap_in_entry]]
            # rmap_in_entry = rmap_entry
        elif expr_index == 1:  # Reduce with outer map
            rmap_entry = graph.nodes()[candidate[
                MapReduceFusion._rmap_out_entry]]
            # rmap_in_entry = graph.nodes()[candidate[
            #     MapReduceFusion._rmap_in_entry]]
        else:  # Reduce node
            rmap_entry = graph.nodes()[candidate[MapReduceFusion._reduce]]

        # Make sure that the array is only accessed by the map and the reduce
        if any([
                src != tmap_exit
                for src, _, _, _, memlet in graph.in_edges(in_array)
        ]):
            return False
        if any([
                dest != rmap_entry
                for _, _, dest, _, memlet in graph.out_edges(in_array)
        ]):
            return False

        # Make sure that there is a reduction in the second map
        if expr_index < 2:
            rmap_cr = graph.nodes()[candidate[MapReduceFusion._rmap_in_cr]]
            reduce_edge = graph.in_edges(rmap_cr)[0]
            if reduce_edge.data.wcr is None:
                return False

        # Make sure that the transient is not accessed by other states
        # if garr.get_unique_name() in cgen_state.sdfg.shared_transients():
        #     return False

        # reduce_inarr = reduce.in_array
        # reduce_outarr = reduce.out_array
        # reduce_inslice = reduce.inslice
        # reduce_outslice = reduce.outslice

        # insize = cgen_state.var_sizes[reduce_inarr]
        # outsize = cgen_state.var_sizes[reduce_outarr]

        # Currently only supports full-range arrays
        # TODO(later): Support fusion of partial reductions and refactor slice/subarray handling
        #if not nxutil.fullrange(reduce_inslice, insize) or \
        #   not nxutil.fullrange(reduce_outslice, outsize):
        #    return False

        # Verify acceses from tasklet through MapExit
        #already_found = False
        #for _src, _, _dest, _, memlet in graph.in_edges(map_exit):
        #    if isinstance(memlet.subset, subsets.Indices):
        #        # Make sure that only one value is reduced at a time
        #        if memlet.data == in_array.desc:
        #            if already_found:
        #                return False
        #            already_found = True

        ## Find axes after reduction
        #indims = len(reduce.inslice)
        #axis_after_reduce = [None] * indims
        #ctr = 0
        #for i in range(indims):
        #    if reduce.axes is not None and i in reduce.axes:
        #        axis_after_reduce[i] = None
        #    else:
        #        axis_after_reduce[i] = ctr
        #        ctr += 1

        ## Match map ranges with reduce ranges
        #curaxis = 0
        #for dim, var in enumerate(memlet.subset):
        #    # Make sure that indices are direct symbols
        #    #if not isinstance(symbolic.pystr_to_symbolic(var), sympy.Symbol):
        #    #    return False
        #    perm = None
        #    for i, mapvar in enumerate(map_exit.map.params):
        #        if symbolic.pystr_to_symbolic(mapvar) == var:
        #            perm = i
        #            break
        #    if perm is None:  # If symbol is not found in map range
        #        return False

        #    # Make sure that map ranges match output slice after reduction
        #    map_range = map_exit.map.range[perm]
        #    if map_range[0] != 0:
        #        return False  # Disallow start from middle
        #    if map_range[2] is not None and map_range[2] != 1:
        #        return False  # Disallow skip
        #    if reduce.axes is not None and dim not in reduce.axes:
        #        if map_range[1] != symbolic.pystr_to_symbolic(
        #                reduce.outslice[axis_after_reduce[dim]][1]):
        #            return False  # Range check (output axis)
        #    else:
        #        if map_range[1] != symbolic.pystr_to_symbolic(reduce.inslice[dim][1]):
        #            return False  # Range check (reduction axis)

        # Verify that reduction ranges match tasklet map
        tout_memlet = graph.in_edges(in_array)[0].data
        rin_memlet = graph.out_edges(in_array)[0].data
        if tout_memlet.subset != rin_memlet.subset:
            return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        tasklet = candidate[MapReduceFusion._tasklet]
        map_exit = candidate[MapReduceFusion._tmap_exit]
        if len(candidate) == 5:  # Expression 2
            reduce = candidate[MapReduceFusion._reduce]
        else:
            reduce = candidate[MapReduceFusion._rmap_in_cr]

        return ' -> '.join(str(node) for node in [tasklet, map_exit, reduce])

    @staticmethod
    def find_memlet_map_permutation(memlet: Memlet, map: nodes.Map):
        perm = [None] * len(memlet.subset)
        indices = set()
        for i, dim in enumerate(memlet.subset):
            for j, mapdim in enumerate(map.params):
                if symbolic.pystr_to_symbolic(
                        mapdim) == dim and j not in indices:
                    perm[i] = j
                    indices.add(j)
                    break
        return perm

    @staticmethod
    def find_permutation(tasklet_map: nodes.Map, red_outer_map: nodes.Map,
                         red_inner_map: nodes.Map, tmem: Memlet):
        """ Find permutation between tasklet-exit memlet and tasklet map. """
        result = [], []

        assert len(tasklet_map.range) == len(red_inner_map.range) + len(
            red_outer_map.range)

        # Match map ranges with reduce ranges
        unavailable_ranges_out = set()
        unavailable_ranges_in = set()
        for i, tmap_rng in enumerate(tasklet_map.range):
            found = False
            for j, rng in enumerate(red_outer_map.range):
                if tmap_rng == rng and j not in unavailable_ranges_out:
                    result[0].append(i)
                    unavailable_ranges_out.add(j)
                    found = True
                    break
            if found: continue
            for j, rng in enumerate(red_inner_map.range):
                if tmap_rng == rng and j not in unavailable_ranges_in:
                    result[1].append(i)
                    unavailable_ranges_in.add(j)
                    found = True
                    break
            if not found: break

        # Ensure all map variables matched with reduce variables
        assert len(result[0]) + len(result[1]) == len(tasklet_map.range)

        # Returns ([outer map indices], [inner (CR) map indices])
        return result

    @staticmethod
    def find_permutation_reduce(tasklet_map: nodes.Map,
                                reduce_node: nodes.Reduce, graph: SDFGState,
                                tmem: Memlet):

        in_memlet = graph.in_edges(reduce_node)[0].data
        out_memlet = graph.out_edges(reduce_node)[0].data
        assert len(tasklet_map.range) == in_memlet.subset.dims()

        # Find permutation between tasklet-exit memlet and tasklet map
        tmem_perm = MapReduceFusion.find_memlet_map_permutation(
            tmem, tasklet_map)
        mapred_perm = []

        # Match map ranges with reduce ranges
        unavailable_ranges = set()
        for i, tmap_rng in enumerate(tasklet_map.range):
            found = False

            for j, in_rng in enumerate(in_memlet.subset):
                if tmap_rng == in_rng and j not in unavailable_ranges:
                    mapred_perm.append(i)
                    unavailable_ranges.add(j)
                    found = True
                    break
            if not found: break

        # Ensure all map variables matched with reduce variables
        assert len(tmem_perm) == len(tmem.subset)
        assert len(mapred_perm) == len(in_memlet.subset)

        # Prepare result from the two permutations and the reduction axes
        result = []
        for i in range(len(mapred_perm)):
            if reduce_node.axes is None or i in reduce_node.axes:
                continue
            result.append(mapred_perm[tmem_perm[i]])

        return result

    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        expr_index = self.expr_index
        graph = sdfg.nodes()[self.state_id]
        tasklet = gnode(MapReduceFusion._tasklet)
        tmap_exit = graph.nodes()[self.subgraph[MapReduceFusion._tmap_exit]]
        in_array = graph.nodes()[self.subgraph[MapReduceFusion._in_array]]
        if expr_index == 0:  # Reduce without outer map
            rmap_entry = graph.nodes()[self.subgraph[
                MapReduceFusion._rmap_in_entry]]
        elif expr_index == 1:  # Reduce with outer map
            rmap_out_entry = graph.nodes()[self.subgraph[
                MapReduceFusion._rmap_out_entry]]
            rmap_out_exit = graph.nodes()[self.subgraph[
                MapReduceFusion._rmap_out_exit]]
            rmap_in_entry = graph.nodes()[self.subgraph[
                MapReduceFusion._rmap_in_entry]]
            rmap_tasklet = graph.nodes()[self.subgraph[
                MapReduceFusion._rmap_in_tasklet]]

        if expr_index == 2:
            rmap_cr = graph.nodes()[self.subgraph[MapReduceFusion._reduce]]
        else:
            rmap_cr = graph.nodes()[self.subgraph[MapReduceFusion._rmap_in_cr]]
        out_array = gnode(MapReduceFusion._out_array)

        # Set nodes to remove according to the expression index
        nodes_to_remove = [in_array]
        if expr_index == 0:
            nodes_to_remove.append(gnode(MapReduceFusion._rmap_in_entry))
        elif expr_index == 1:
            nodes_to_remove.append(gnode(MapReduceFusion._rmap_out_entry))
            nodes_to_remove.append(gnode(MapReduceFusion._rmap_in_entry))
            nodes_to_remove.append(gnode(MapReduceFusion._rmap_out_exit))
        else:
            nodes_to_remove.append(gnode(MapReduceFusion._reduce))

        # If no other edges lead to mapexit, remove it. Otherwise, keep
        # it and remove reduction incoming/outgoing edges
        if expr_index != 2 and len(graph.in_edges(tmap_exit)) == 1:
            nodes_to_remove.append(tmap_exit)

        memlet_edge = None
        for edge in graph.in_edges(tmap_exit):
            if edge.data.data == in_array.data:
                memlet_edge = edge
                break
        if memlet_edge is None:
            raise RuntimeError('Reduction memlet cannot be None')

        if expr_index == 0:  # Reduce without outer map
            # Index order does not matter, merge as-is
            pass
        elif expr_index == 1:  # Reduce with outer map
            tmap = tmap_exit.map
            perm_outer, perm_inner = MapReduceFusion.find_permutation(
                tmap, rmap_out_entry.map, rmap_in_entry.map, memlet_edge.data)

            # Split tasklet map into tmap_out -> tmap_in (according to
            # reduction)
            omap = nodes.Map(
                tmap.label + '_nonreduce',
                [p for i, p in enumerate(tmap.params) if i in perm_outer],
                [r for i, r in enumerate(tmap.range) if i in perm_outer],
                tmap.schedule, tmap.unroll, tmap.is_async)
            tmap.params = [
                p for i, p in enumerate(tmap.params) if i in perm_inner
            ]
            tmap.range = [
                r for i, r in enumerate(tmap.range) if i in perm_inner
            ]
            omap_entry = nodes.MapEntry(omap)
            omap_exit = rmap_out_exit
            rmap_out_exit.map = omap

            # Reconnect graph to new map
            tmap_entry = graph.entry_node(tmap_exit)
            tmap_in_edges = list(graph.in_edges(tmap_entry))
            for e in tmap_in_edges:
                nxutil.change_edge_dest(graph, tmap_entry, omap_entry)
            for e in tmap_in_edges:
                graph.add_edge(omap_entry, e.src_conn, tmap_entry, e.dst_conn,
                               copy.copy(e.data))
        elif expr_index == 2:  # Reduce node
            # Find correspondence between map indices and array outputs
            tmap = tmap_exit.map
            perm = MapReduceFusion.find_permutation_reduce(
                tmap, rmap_cr, graph, memlet_edge.data)

            output_subset = [tmap.params[d] for d in perm]
            if len(output_subset) == 0:  # Output is a scalar
                output_subset = [0]

            array_edge = graph.out_edges(rmap_cr)[0]

            # Delete relevant edges and nodes
            graph.remove_edge(memlet_edge)
            graph.remove_nodes_from(nodes_to_remove)

            # Add new edges and nodes
            #   From tasklet to map exit
            graph.add_edge(
                memlet_edge.src, memlet_edge.src_conn, memlet_edge.dst,
                memlet_edge.dst_conn,
                Memlet(out_array.data, memlet_edge.data.num_accesses,
                       subsets.Indices(output_subset), memlet_edge.data.veclen,
                       rmap_cr.wcr, rmap_cr.identity))

            #   From map exit to output array
            graph.add_edge(
                memlet_edge.dst, 'OUT_' + memlet_edge.dst_conn[3:],
                array_edge.dst, array_edge.dst_conn,
                Memlet(array_edge.data.data, array_edge.data.num_accesses,
                       array_edge.data.subset, array_edge.data.veclen,
                       rmap_cr.wcr, rmap_cr.identity))

            return

        # Remove tmp array node prior to the others, so that a new one
        # can be created in its stead (see below)
        graph.remove_node(nodes_to_remove[0])
        nodes_to_remove = nodes_to_remove[1:]

        # Create tasklet -> tmp -> tasklet connection
        tmp = graph.add_array(
            'tmp',
            memlet_edge.data.subset.bounding_box_size(),
            sdfg.arrays[memlet_edge.data.data].dtype,
            transient=True)
        tasklet_tmp_memlet = copy.deepcopy(memlet_edge.data)
        tasklet_tmp_memlet.data = tmp.data
        tasklet_tmp_memlet.subset = ShapeProperty.to_string(tmp.shape)

        # Modify memlet to point to output array
        memlet_edge.data.data = out_array.data

        # Recover reduction axes from CR reduce subset
        reduce_cr_subset = graph.in_edges(rmap_tasklet)[0].data.subset
        reduce_axes = []
        for ind, crvar in enumerate(reduce_cr_subset.indices):
            if '__i' in str(crvar):
                reduce_axes.append(ind)

        # Modify memlet access index by filtering out reduction axes
        if True:  # expr_index == 0:
            newindices = []
            for ind, ovar in enumerate(memlet_edge.data.subset.indices):
                if ind not in reduce_axes:
                    newindices.append(ovar)
        if len(newindices) == 0:
            newindices = [0]

        memlet_edge.data.subset = subsets.Indices(newindices)

        graph.remove_edge(memlet_edge)

        graph.add_edge(memlet_edge.src, memlet_edge.src_conn, tmp,
                       memlet_edge.dst_conn, tasklet_tmp_memlet)

        red_edges = list(graph.in_edges(rmap_tasklet))
        if len(red_edges) != 1:
            raise RuntimeError('CR edge must be unique')

        tmp_tasklet_memlet = copy.deepcopy(tasklet_tmp_memlet)
        graph.add_edge(tmp, None, rmap_tasklet, red_edges[0].dst_conn,
                       tmp_tasklet_memlet)

        for e in graph.edges_between(rmap_tasklet, rmap_cr):
            e.data.subset = memlet_edge.data.subset

        # Move output edges to point directly to CR node
        if expr_index == 1:
            # Set output memlet between CR node and outer reduction map to
            # contain the same subset as the one pointing to the CR node
            for e in graph.out_edges(rmap_cr):
                e.data.subset = memlet_edge.data.subset

            rmap_out = gnode(MapReduceFusion._rmap_out_exit)
            nxutil.change_edge_src(graph, rmap_out, omap_exit)

        # Remove nodes
        graph.remove_nodes_from(nodes_to_remove)

        # For unrelated outputs, connect original output to rmap_out
        if expr_index == 1 and tmap_exit not in nodes_to_remove:
            other_out_edges = list(graph.out_edges(tmap_exit))
            for e in other_out_edges:
                graph.remove_edge(e)
                graph.add_edge(e.src, e.src_conn, omap_exit, None, e.data)
                graph.add_edge(omap_exit, None, e.dst, e.dst_conn,
                               copy.copy(e.data))

    def modifies_graph(self):
        return True
Beispiel #25
0
class RedundantSecondArray(pm.Transformation):
    """ Implements the redundant array removal transformation, applied
        when a transient array is copied from and to (from another array),
        but never used anywhere else. This transformation removes the second
        array. """

    _arrays_removed = 0
    _in_array = nodes.AccessNode("_")
    _out_array = nodes.AccessNode("_")

    @staticmethod
    def expressions():
        return [
            nxutil.node_path_graph(RedundantSecondArray._in_array,
                                   RedundantSecondArray._out_array)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        in_array = graph.nodes()[candidate[RedundantSecondArray._in_array]]
        out_array = graph.nodes()[candidate[RedundantSecondArray._out_array]]

        # Ensure in degree is one (only one source, which is in_array)
        if graph.in_degree(out_array) != 1:
            return False

        # Make sure that the candidate is a transient variable
        if not out_array.desc(sdfg).transient:
            return False

        # Make sure that both arrays are using the same storage location
        if in_array.desc(sdfg).storage != out_array.desc(sdfg).storage:
            return False

        # Find occurrences in this and other states
        occurrences = []
        for state in sdfg.nodes():
            occurrences.extend([
                n for n in state.nodes() if isinstance(n, nodes.AccessNode)
                and n.desc(sdfg) == out_array.desc(sdfg)
            ])

        if len(occurrences) > 1:
            return False

        # Only apply if arrays are of same shape (no need to modify memlet subset)
        # if len(in_array.desc(sdfg).shape) != len(
        #         out_array.desc(sdfg).shape) or any(i != o for i, o in zip(
        #             in_array.desc(sdfg).shape,
        #             out_array.desc(sdfg).shape)):
        #     return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        out_array = graph.nodes()[candidate[RedundantSecondArray._out_array]]

        return "Remove " + str(out_array)

    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        graph = sdfg.nodes()[self.state_id]
        in_array = gnode(RedundantSecondArray._in_array)
        out_array = gnode(RedundantSecondArray._out_array)
        memlet = graph.edges_between(in_array, out_array)[0].data
        if memlet.data == in_array.data:
            subset = memlet.subset
        else:
            subset = memlet.other_subset

        for e in graph.out_edges(out_array):
            # Modify all outgoing edges to point to in_array
            path = graph.memlet_tree(e)
            for pe in path:
                if pe.data.data == out_array.data:
                    pe.data.data = in_array.data
                    if isinstance(subset, subsets.Indices):
                        pe.data.subset.offset(subset, False)
                    else:
                        pe.data.subset = subset.compose(pe.data.subset)
                elif pe.data.other_subset:
                    if isinstance(subset, subsets.Indices):
                        pe.data.other_subset.offset(subset, False)
                    else:
                        pe.data.other_subset = subset.compose(
                            pe.data.other_subset)

            # Redirect edge to out_array
            graph.remove_edge(e)
            graph.add_edge(in_array, e.src_conn, e.dst, e.dst_conn, e.data)

        # Finally, remove out_array node
        graph.remove_node(out_array)
        # TODO: Should the array be removed from the SDFG?
        # del sdfg.arrays[out_array]
        if Config.get_bool("debugprint"):
            RedundantSecondArray._arrays_removed += 1
Beispiel #26
0
    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        node_a = graph.nodes()[self.subgraph[LocalStorage._node_a]]
        node_b = graph.nodes()[self.subgraph[LocalStorage._node_b]]

        # Determine direction of new memlet
        scope_dict = graph.scope_dict()
        propagate_forward = sd.scope_contains_scope(scope_dict, node_a, node_b)

        array = self.array
        if array is None or len(array) == 0:
            array = graph.edges_between(node_a, node_b)[0].data.data

        original_edge = None
        invariant_memlet = None
        for edge in graph.edges_between(node_a, node_b):
            if array == edge.data.data:
                original_edge = edge
                invariant_memlet = edge.data
                break
        if invariant_memlet is None:
            for edge in graph.edges_between(node_a, node_b):
                original_edge = edge
                invariant_memlet = edge.data
                warnings.warn('Array %s not found! Using array %s instead.' %
                              (array, invariant_memlet.data))
                array = invariant_memlet.data
                break
        if invariant_memlet is None:
            raise NameError('Array %s not found!' % array)

        # Add transient array
        new_data, _ = sdfg.add_array(
            'trans_' + invariant_memlet.data, [
                symbolic.overapproximate(r)
                for r in invariant_memlet.bounding_box_size()
            ],
            sdfg.arrays[invariant_memlet.data].dtype,
            transient=True,
            find_new_name=True)
        data_node = nodes.AccessNode(new_data)

        # Store as fields so that other transformations can use them
        self._local_name = new_data
        self._data_node = data_node

        to_data_mm = copy.deepcopy(invariant_memlet)
        from_data_mm = copy.deepcopy(invariant_memlet)
        offset = subsets.Indices([r[0] for r in invariant_memlet.subset])

        # Reconnect, assuming one edge to the access node
        graph.remove_edge(original_edge)
        if propagate_forward:
            graph.add_edge(node_a, original_edge.src_conn, data_node, None,
                           to_data_mm)
            new_edge = graph.add_edge(data_node, None, node_b,
                                      original_edge.dst_conn, from_data_mm)
        else:
            new_edge = graph.add_edge(node_a, original_edge.src_conn,
                                      data_node, None, to_data_mm)
            graph.add_edge(data_node, None, node_b, original_edge.dst_conn,
                           from_data_mm)

        # Offset all edges in the memlet tree (including the new edge)
        for edge in graph.memlet_tree(new_edge):
            edge.data.subset.offset(offset, True)
            edge.data.data = new_data