Exemple #1
0
def test_gpu_dma():
    sdfg: dace.SDFG = gpu_dma.to_sdfg(strict=True)
    sdfg.name = 'gpu_dma'
    sdfg.apply_transformations(GPUTransformSDFG,
                               options={'strict_transform': False})

    map_ = next(n for n, _ in sdfg.all_nodes_recursive()
                if isinstance(n, nodes.MapEntry))

    add_gpu_location(sdfg, map_, 0)

    sdfg.arrays['gpu_X'].location = {'gpu': 1}

    # clone GPU scalar
    inodename = 'alpha'
    inode = sdfg.arrays['alpha']
    newdesc = inode.clone()
    newdesc.location = {'gpu': 0}
    newdesc.storage = StorageType.GPU_Global
    newdesc.transient = True
    name = sdfg.add_datadesc('gpu_' + inodename, newdesc, find_new_name=True)
    # Replace original scalar
    for state in sdfg.nodes():
        for node in state.nodes():
            if (isinstance(node, nodes.AccessNode) and node.data == inodename):
                node.data = name
    # Replace memlets
    for state in sdfg.nodes():
        for edge in state.edges():
            if edge.data.data == inodename:
                edge.data.data = name

    # add GPU scalar to the copyin state
    copyin_state = sdfg.start_state

    src_array = nodes.AccessNode(inodename, debuginfo=inode.debuginfo)
    dst_array = nodes.AccessNode(name, debuginfo=inode.debuginfo)
    copyin_state.add_node(src_array)
    copyin_state.add_node(dst_array)
    copyin_state.add_nedge(
        src_array, dst_array,
        Memlet.from_array(src_array.data, src_array.desc(sdfg)))

    sdfg.apply_strict_transformations()

    np.random.seed(0)
    n = 16
    X = np.ndarray(shape=n, dtype=np_dtype)
    alpha = np.ndarray(shape=1, dtype=np_dtype)
    alpha.fill(np.random.rand())

    a_times_X = sdfg(X=X, alpha=alpha[0], N=n)
    res = X * alpha
    idx = zip(*np.where(~np.isclose(res, a_times_X, atol=0, rtol=1e-7)))
    for i in idx:
        print(i, res[i], a_times_X, X[i] * alpha, X[i], alpha)
    assert np.allclose(res, a_times_X)
    print('PASS')
class RedundantArrayCopying2(pm.Transformation):
    """ Implements the redundant array removal transformation. Removes
        multiples of array B in pattern A -> B.
    """
    _arrays_removed = 0
    _in_array = nodes.AccessNode("_")
    _out_array = nodes.AccessNode("_")

    @staticmethod
    def expressions():
        return [
            sdutil.node_path_graph(RedundantArrayCopying2._in_array,
                                   RedundantArrayCopying2._out_array)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        in_array = graph.nodes()[candidate[RedundantArrayCopying2._in_array]]
        out_array = graph.nodes()[candidate[RedundantArrayCopying2._out_array]]

        # Ensure out degree is one (only one target, which is out_array)
        found = 0
        for _, _, dst, _, _ in graph.out_edges(in_array):
            if (isinstance(dst, nodes.AccessNode) and dst != out_array
                    and dst.data == out_array.data):
                found += 1

        return found > 0

    @staticmethod
    def match_to_str(graph, candidate):
        out_array = graph.nodes()[candidate[RedundantArrayCopying2._out_array]]

        return "Remove " + str(out_array)

    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        graph = sdfg.nodes()[self.state_id]
        in_array = gnode(RedundantArrayCopying2._in_array)
        out_array = gnode(RedundantArrayCopying2._out_array)

        for e1 in graph.out_edges(in_array):
            dst = e1.dst
            if (isinstance(dst, nodes.AccessNode) and dst != out_array
                    and dst.data == out_array.data):
                for e2 in graph.out_edges(dst):
                    graph.add_edge(out_array, None, e2.dst, e2.dst_conn,
                                   e2.data)
                    graph.remove_edge(e2)
                graph.remove_edge(e1)
                graph.remove_node(dst)
                if Config.get_bool("debugprint"):
                    RedundantArrayCopying2._arrays_removed += 1
Exemple #3
0
class MapWCRFusion(pm.Transformation):
    """ Implements the map expanded-reduce fusion transformation.
        Fuses a map with an immediately following reduction, where the array
        between the map and the reduction is not used anywhere else, and the
        reduction is divided to two maps with a WCR, denoting partial reduction.
    """

    _tasklet = nodes.Tasklet('_')
    _tmap_exit = nodes.MapExit(nodes.Map("", [], []))
    _in_array = nodes.AccessNode('_')
    _rmap_in_entry = nodes.MapEntry(nodes.Map("", [], []))
    _rmap_in_tasklet = nodes.Tasklet('_')
    _rmap_in_cr = nodes.MapExit(nodes.Map("", [], []))
    _rmap_out_entry = nodes.MapEntry(nodes.Map("", [], []))
    _rmap_out_exit = nodes.MapExit(nodes.Map("", [], []))
    _out_array = nodes.AccessNode('_')

    @staticmethod
    def expressions():
        return [
            # Map, then partial reduction of axes
            sdutil.node_path_graph(
                MapWCRFusion._tasklet, MapWCRFusion._tmap_exit,
                MapWCRFusion._in_array, MapWCRFusion._rmap_out_entry,
                MapWCRFusion._rmap_in_entry, MapWCRFusion._rmap_in_tasklet,
                MapWCRFusion._rmap_in_cr, MapWCRFusion._rmap_out_exit,
                MapWCRFusion._out_array)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        tmap_exit = graph.nodes()[candidate[MapWCRFusion._tmap_exit]]
        in_array = graph.nodes()[candidate[MapWCRFusion._in_array]]
        rmap_entry = graph.nodes()[candidate[MapWCRFusion._rmap_out_entry]]

        # Make sure that the array is only accessed by the map and the reduce
        if any([
                src != tmap_exit
                for src, _, _, _, memlet in graph.in_edges(in_array)
        ]):
            return False
        if any([
                dest != rmap_entry
                for _, _, dest, _, memlet in graph.out_edges(in_array)
        ]):
            return False

        # Make sure that there is a reduction in the second map
        rmap_cr = graph.nodes()[candidate[MapWCRFusion._rmap_in_cr]]
        reduce_edge = graph.in_edges(rmap_cr)[0]
        if reduce_edge.data.wcr is None:
            return False

        # (strict) Make sure that the transient is not accessed anywhere else
        # in this state or other states
        if strict and (len([
                n for n in graph.nodes()
                if isinstance(n, nodes.AccessNode) and n.data == in_array.data
        ]) > 1 or in_array.data in sdfg.shared_transients()):
            return False

        # Verify that reduction ranges match tasklet map
        tout_memlet = graph.in_edges(in_array)[0].data
        rin_memlet = graph.out_edges(in_array)[0].data
        if tout_memlet.subset != rin_memlet.subset:
            return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        tasklet = candidate[MapWCRFusion._tasklet]
        map_exit = candidate[MapWCRFusion._tmap_exit]
        reduce = candidate[MapWCRFusion._rmap_in_cr]

        return ' -> '.join(str(node) for node in [tasklet, map_exit, reduce])

    def apply(self, sdfg):
        graph = sdfg.node(self.state_id)

        # To apply, collapse the second map and then fuse the two resulting maps
        map_collapse = MapCollapse(
            self.sdfg_id, self.state_id, {
                MapCollapse._outer_map_entry:
                self.subgraph[MapWCRFusion._rmap_out_entry],
                MapCollapse._inner_map_entry:
                self.subgraph[MapWCRFusion._rmap_in_entry]
            }, 0)
        map_entry, _ = map_collapse.apply(sdfg)

        map_fusion = MapFusion(
            self.sdfg_id, self.state_id, {
                MapFusion._first_map_exit:
                self.subgraph[MapWCRFusion._tmap_exit],
                MapFusion._second_map_entry: graph.node_id(map_entry)
            }, 0)
        map_fusion.apply(sdfg)
class RedundantArrayCopying(pm.Transformation):
    """ Implements the redundant array removal transformation. Removes the last access node
        in pattern A -> B -> A, and the second (if possible)
    """

    _arrays_removed = 0
    _in_array = nodes.AccessNode("_")
    _med_array = nodes.AccessNode("_")
    _out_array = nodes.AccessNode("_")

    @staticmethod
    def expressions():
        return [
            sdutil.node_path_graph(
                RedundantArrayCopying._in_array,
                RedundantArrayCopying._med_array,
                RedundantArrayCopying._out_array,
            )
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        in_array = graph.nodes()[candidate[RedundantArrayCopying._in_array]]
        med_array = graph.nodes()[candidate[RedundantArrayCopying._med_array]]
        out_array = graph.nodes()[candidate[RedundantArrayCopying._out_array]]

        # Ensure out degree is one (only one target, which is out_array)
        if graph.out_degree(in_array) != 1:
            return False

        # Make sure that the removal candidate is a transient variable
        if strict and not out_array.desc(sdfg).transient:
            return False

        # Make sure that the middle access node is not transient. We do this to ensure that everything copied from
        # B -> A is either copied in from A, or uninitialized memory.
        if strict and not med_array.desc(sdfg).transient:
            return False

        # Make sure that both arrays are using the same storage location
        if in_array.desc(sdfg).storage != out_array.desc(sdfg).storage:
            return False

        # Find occurrences in this and other states
        # (This could be relaxed)
        # occurrences = []
        # for state in sdfg.nodes():
        #     occurrences.extend([
        #         n for n in state.nodes()
        #         if isinstance(n, nodes.AccessNode) and n.desc == med_array.desc
        #     ])

        # if len(occurrences) > 1:
        #     return False

        # Only apply if arrays are of same shape (no need to modify memlet subset)
        if len(in_array.desc(sdfg).shape) != len(
                out_array.desc(sdfg).shape) or any(i != o for i, o in zip(
                    in_array.desc(sdfg).shape,
                    out_array.desc(sdfg).shape)):
            return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        med_array = graph.nodes()[candidate[RedundantArrayCopying._med_array]]
        out_array = graph.nodes()[candidate[RedundantArrayCopying._out_array]]

        return "Remove " + str(out_array) + " and (maybe) " + str(med_array)

    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        graph = sdfg.nodes()[self.state_id]
        in_array = gnode(RedundantArrayCopying._in_array)
        med_array = gnode(RedundantArrayCopying._med_array)
        out_array = gnode(RedundantArrayCopying._out_array)

        med_edges = len(graph.out_edges(med_array))
        med_out_edges = 0
        for med_e in graph.out_edges(med_array):
            if med_e.dst == out_array:
                # Modify all outcoming edges to point to in_array
                for out_e in graph.out_edges(med_e.dst):
                    path = graph.memlet_path(out_e)
                    for pe in path:
                        if pe.data.data == out_array.data or pe.data.data == med_array.data:
                            pe.data.data = in_array.data
                    # Redirect edge to in_array
                    graph.remove_edge(out_e)
                    graph.add_edge(in_array, out_e.src_conn, out_e.dst,
                                   out_e.dst_conn, out_e.data)
                # Remove out_array
                for e in graph.edges_between(med_e, med_e.dst):
                    graph.remove_edge(e)
                graph.remove_node(med_e.dst)
                med_out_edges += 1

        # Finally, med_array node
        if med_array.desc(sdfg).transient and med_edges == med_out_edges:
            for e in graph.edges_between(in_array, med_array):
                graph.remove_edge(e)
            graph.remove_node(med_array)
            if Config.get_bool("debugprint"):
                RedundantArrayCopying._arrays_removed += 1
Exemple #5
0
class DoubleBuffering(transformation.Transformation):
    """ Implements the double buffering pattern, which pipelines reading
        and processing data by creating a second copy of the memory.
        In particular, the transformation takes a 1D map and all internal
        (directly connected) transients, adds an additional dimension of size 2,
        and turns the map into a for loop that processes and reads the data in a
        double-buffered manner. Other memlets will not be transformed.
    """

    _map_entry = nodes.MapEntry(nodes.Map('_', [], []))
    _transient = nodes.AccessNode('_')

    @staticmethod
    def expressions():
        return [
            sdutil.node_path_graph(DoubleBuffering._map_entry,
                                   DoubleBuffering._transient)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        map_entry = graph.nodes()[candidate[DoubleBuffering._map_entry]]
        transient = graph.nodes()[candidate[DoubleBuffering._transient]]

        # Only one dimensional maps are allowed
        if len(map_entry.map.params) != 1:
            return False

        # Verify the map can be transformed to a for-loop
        if not MapToForLoop.can_be_applied(
                graph,
            {MapToForLoop._map_entry: candidate[DoubleBuffering._map_entry]},
                expr_index, sdfg, strict):
            return False

        # Verify that all directly-connected internal access nodes point to
        # transient arrays
        first = True
        for edge in graph.out_edges(map_entry):
            if isinstance(edge.dst, nodes.AccessNode):
                desc = sdfg.arrays[edge.dst.data]
                if not isinstance(desc, data.Array) or not desc.transient:
                    return False
                else:
                    # To avoid duplicate matches, only match the first transient
                    if first and edge.dst != transient:
                        return False
                    first = False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        return str(graph.node(candidate[DoubleBuffering._map_entry]))

    def apply(self, sdfg: sd.SDFG):
        graph: sd.SDFGState = sdfg.nodes()[self.state_id]
        map_entry = graph.node(self.subgraph[DoubleBuffering._map_entry])

        map_param = map_entry.map.params[0]  # Assuming one dimensional

        ##############################
        # Change condition of loop to one fewer iteration (so that the
        # final one reads from the last buffer)
        map_rstart, map_rend, map_rstride = map_entry.map.range[0]
        map_rend = symbolic.pystr_to_symbolic('(%s) - (%s)' %
                                              (map_rend, map_rstride))
        map_entry.map.range = subsets.Range([(map_rstart, map_rend,
                                              map_rstride)])

        ##############################
        # Gather transients to modify
        transients_to_modify = set(edge.dst.data
                                   for edge in graph.out_edges(map_entry)
                                   if isinstance(edge.dst, nodes.AccessNode))

        # Add dimension to transients and modify memlets
        for transient in transients_to_modify:
            desc: data.Array = sdfg.arrays[transient]
            # Using non-python syntax to ensure properties change
            desc.strides = [desc.total_size] + list(desc.strides)
            desc.shape = [2] + list(desc.shape)
            desc.offset = [0] + list(desc.offset)
            desc.total_size = desc.total_size * 2

        ##############################
        # Modify memlets to use map parameter as buffer index
        modified_subsets = []  # Store modified memlets for final state
        for edge in graph.scope_subgraph(map_entry).edges():
            if edge.data.data in transients_to_modify:
                edge.data.subset = self._modify_memlet(sdfg, edge.data.subset,
                                                       edge.data.data)
                modified_subsets.append(edge.data.subset)
            else:  # Could be other_subset
                path = graph.memlet_path(edge)
                src_node = path[0].src
                dst_node = path[-1].dst

                # other_subset could be None. In that case, recreate from array
                dataname = None
                if (isinstance(src_node, nodes.AccessNode)
                        and src_node.data in transients_to_modify):
                    dataname = src_node.data
                elif (isinstance(dst_node, nodes.AccessNode)
                      and dst_node.data in transients_to_modify):
                    dataname = dst_node.data
                if dataname is not None:
                    subset = (edge.data.other_subset or
                              subsets.Range.from_array(sdfg.arrays[dataname]))
                    edge.data.other_subset = self._modify_memlet(
                        sdfg, subset, dataname)
                    modified_subsets.append(edge.data.other_subset)

        ##############################
        # Turn map into for loop
        map_to_for = MapToForLoop(self.sdfg_id, self.state_id, {
            MapToForLoop._map_entry:
            self.subgraph[DoubleBuffering._map_entry]
        }, self.expr_index)
        nsdfg_node, nstate = map_to_for.apply(sdfg)

        ##############################
        # Gather node copies and remove memlets
        edges_to_replace = []
        for node in nstate.source_nodes():
            for edge in nstate.out_edges(node):
                if (isinstance(edge.dst, nodes.AccessNode)
                        and edge.dst.data in transients_to_modify):
                    edges_to_replace.append(edge)
                    nstate.remove_edge(edge)
            if nstate.out_degree(node) == 0:
                nstate.remove_node(node)

        ##############################
        # Add initial reads to initial nested state
        initial_state: sd.SDFGState = nsdfg_node.sdfg.start_state
        initial_state.set_label('%s_init' % map_entry.map.label)
        for edge in edges_to_replace:
            initial_state.add_node(edge.src)
            rnode = edge.src
            wnode = initial_state.add_write(edge.dst.data)
            initial_state.add_edge(rnode, edge.src_conn, wnode, edge.dst_conn,
                                   copy.deepcopy(edge.data))

        # All instances of the map parameter in this state become the loop start
        sd.replace(initial_state, map_param, map_rstart)
        # Initial writes go to the first buffer
        sd.replace(initial_state, '__dace_db_param', 0)

        ##############################
        # Modify main state's memlets

        # Divide by loop stride
        new_expr = symbolic.pystr_to_symbolic('(%s / %s) %% 2' %
                                              (map_param, map_rstride))
        sd.replace(nstate, '__dace_db_param', new_expr)

        ##############################
        # Add the main state's contents to the last state, modifying
        # memlets appropriately.
        final_state: sd.SDFGState = nsdfg_node.sdfg.sink_nodes()[0]
        final_state.set_label('%s_final_computation' % map_entry.map.label)
        dup_nstate = copy.deepcopy(nstate)
        final_state.add_nodes_from(dup_nstate.nodes())
        for e in dup_nstate.edges():
            final_state.add_edge(e.src, e.src_conn, e.dst, e.dst_conn, e.data)

        # If there is a WCR output with transient, only output in last state
        nstate: sd.SDFGState
        for node in nstate.sink_nodes():
            for e in list(nstate.in_edges(node)):
                if e.data.wcr is not None:
                    path = nstate.memlet_path(e)
                    if isinstance(path[0].src, nodes.AccessNode):
                        nstate.remove_memlet_path(e)

        ##############################
        # Add reads into next buffers to main state
        for edge in edges_to_replace:
            rnode = copy.deepcopy(edge.src)
            nstate.add_node(rnode)
            wnode = nstate.add_write(edge.dst.data)
            new_memlet = copy.deepcopy(edge.data)
            if new_memlet.data in transients_to_modify:
                new_memlet.other_subset = self._replace_in_subset(
                    new_memlet.other_subset, map_param,
                    '(%s + %s)' % (map_param, map_rstride))
            else:
                new_memlet.subset = self._replace_in_subset(
                    new_memlet.subset, map_param,
                    '(%s + %s)' % (map_param, map_rstride))

            nstate.add_edge(rnode, edge.src_conn, wnode, edge.dst_conn,
                            new_memlet)

        nstate.set_label('%s_double_buffered' % map_entry.map.label)
        # Divide by loop stride
        new_expr = symbolic.pystr_to_symbolic('((%s / %s) + 1) %% 2' %
                                              (map_param, map_rstride))
        sd.replace(nstate, '__dace_db_param', new_expr)

        # Remove symbol once done
        del nsdfg_node.sdfg.symbols['__dace_db_param']
        del nsdfg_node.symbol_mapping['__dace_db_param']

        return nsdfg_node

    @staticmethod
    def _modify_memlet(sdfg, subset, data_name):
        desc = sdfg.arrays[data_name]
        if len(subset) == len(desc.shape):
            # Already in the right shape, modify new dimension
            subset = list(subset)[1:]

        new_subset = subsets.Range([('__dace_db_param', '__dace_db_param',
                                     1)] + list(subset))
        return new_subset

    @staticmethod
    def _replace_in_subset(subset, string_or_symbol, new_string_or_symbol):
        new_subset = copy.deepcopy(subset)

        repldict = {
            symbolic.pystr_to_symbolic(string_or_symbol):
            symbolic.pystr_to_symbolic(new_string_or_symbol)
        }

        for i, dim in enumerate(new_subset):
            try:
                new_subset[i] = tuple(d.subs(repldict) for d in dim)
            except TypeError:
                new_subset[i] = (dim.subs(repldict)
                                 if symbolic.issymbolic(dim) else dim)

        return new_subset
Exemple #6
0
class MapFusion(pattern_matching.Transformation):
    """ Implements the MapFusion transformation.
        It wil check for all patterns MapExit -> AccessNode -> MapEntry, and
        based on the following rules, fuse them and remove the transient in
        between. There are several possibilities of what it does to this
        transient in between.

        Essentially, if there is some other place in the
        sdfg where it is required, or if it is not a transient, then it will
        not be removed. In such a case, it will be linked to the MapExit node
        of the new fused map.

        Rules for fusing maps:
          0. The map range of the second map should be a permutation of the
             first map range.
          1. Each of the access nodes that are adjacent to the first map exit
             should have an edge to the second map entry. If it doesn't, then the
             second map entry should not be reachable from this access node.
          2. Any node that has a wcr from the first map exit should not be
             adjacent to the second map entry.
          3. Access pattern for the access nodes in the second map should be
             the same permutation of the map parameters as the map ranges of the
             two maps. Alternatively, this access node should not be adjacent to
             the first map entry.
    """
    _first_map_exit = nodes.ExitNode()
    _some_array = nodes.AccessNode("_")
    _second_map_entry = nodes.EntryNode()

    @staticmethod
    def annotates_memlets():
        return False

    @staticmethod
    def expressions():
        return [
            sdutil.node_path_graph(
                MapFusion._first_map_exit,
                MapFusion._some_array,
                MapFusion._second_map_entry,
            )
        ]

    @staticmethod
    def find_permutation(first_map: nodes.Map,
                         second_map: nodes.Map) -> Union[List[int], None]:
        """ Find permutation between two map ranges.
            :param first_map: First map.
            :param second_map: Second map.
            :return: None if no such permutation exists, otherwise a list of
                     indices L such that L[x]'th parameter of second map has the same range as x'th
                     parameter of the first map.
            """
        result = []

        if len(first_map.range) != len(second_map.range):
            return None

        # Match map ranges with reduce ranges
        for i, tmap_rng in enumerate(first_map.range):
            found = False
            for j, rng in enumerate(second_map.range):
                if tmap_rng == rng and j not in result:
                    result.append(j)
                    found = True
                    break
            if not found:
                break

        # Ensure all map ranges matched
        if len(result) != len(first_map.range):
            return None

        return result

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        first_map_exit = graph.nodes()[candidate[MapFusion._first_map_exit]]
        first_map_entry = graph.entry_node(first_map_exit)
        second_map_entry = graph.nodes()[candidate[
            MapFusion._second_map_entry]]

        for _in_e in graph.in_edges(first_map_exit):
            if _in_e.data.wcr is not None:
                for _out_e in graph.out_edges(second_map_entry):
                    if _out_e.data.data == _in_e.data.data:
                        # wcr is on a node that is used in the second map, quit
                        return False
        # Check whether there is a pattern map -> access -> map.
        intermediate_nodes = set()
        intermediate_data = set()
        for _, _, dst, _, _ in graph.out_edges(first_map_exit):
            if isinstance(dst, nodes.AccessNode):
                intermediate_nodes.add(dst)
                intermediate_data.add(dst.data)

                # If array is used anywhere else in this state.
                num_occurrences = len([
                    n for n in graph.nodes()
                    if isinstance(n, nodes.AccessNode) and n.data == dst.data
                ])
                if num_occurrences > 1:
                    return False
            else:
                return False
        # Check map ranges
        perm = MapFusion.find_permutation(first_map_entry.map,
                                          second_map_entry.map)
        if perm is None:
            return False

        # Check if any intermediate transient is also going to another location
        second_inodes = set(e.src for e in graph.in_edges(second_map_entry)
                            if isinstance(e.src, nodes.AccessNode))
        transients_to_remove = intermediate_nodes & second_inodes
        # if any(e.dst != second_map_entry for n in transients_to_remove
        #        for e in graph.out_edges(n)):
        if any(graph.out_degree(n) > 1 for n in transients_to_remove):
            return False

        # Create a dict that maps parameters of the first map to those of the
        # second map.
        params_dict = {}
        for _index, _param in enumerate(first_map_entry.map.params):
            params_dict[_param] = second_map_entry.map.params[perm[_index]]

        out_memlets = [e.data for e in graph.in_edges(first_map_exit)]

        # Check that input set of second map is provided by the output set
        # of the first map, or other unrelated maps
        for second_edge in graph.out_edges(second_map_entry):
            # Memlets that do not come from one of the intermediate arrays
            if second_edge.data.data not in intermediate_data:
                # however, if intermediate_data eventually leads to
                # second_memlet.data, need to fail.
                for _n in intermediate_nodes:
                    source_node = _n
                    destination_node = graph.memlet_path(second_edge)[0].src
                    # NOTE: Assumes graph has networkx version
                    if destination_node in nx.descendants(
                            graph._nx, source_node):
                        return False
                continue

            provided = False

            # Compute second subset with respect to first subset's symbols
            sbs_permuted = dcpy(second_edge.data.subset)
            sbs_permuted.replace({
                symbolic.pystr_to_symbolic(k): symbolic.pystr_to_symbolic(v)
                for k, v in params_dict.items()
            })

            for first_memlet in out_memlets:
                if first_memlet.data != second_edge.data.data:
                    continue

                # If there is a covered subset, it is provided
                if first_memlet.subset.covers(sbs_permuted):
                    provided = True
                    break

            # If none of the output memlets of the first map provide the info,
            # fail.
            if provided is False:
                return False

        # Success
        return True

    @staticmethod
    def match_to_str(graph, candidate):
        first_exit = graph.nodes()[candidate[MapFusion._first_map_exit]]
        second_entry = graph.nodes()[candidate[MapFusion._second_map_entry]]

        return " -> ".join(entry.map.label + ": " + str(entry.map.params)
                           for entry in [first_exit, second_entry])

    def apply(self, sdfg):
        """
            This method applies the mapfusion transformation.
            Other than the removal of the second map entry node (SME), and the first
            map exit (FME) node, it has the following side effects:

            1.  Any transient adjacent to both FME and SME with degree = 2 will be removed.
                The tasklets that use/produce it shall be connected directly with a
                scalar/new transient (if the dataflow is more than a single scalar)

            2.  If this transient is adjacent to FME and SME and has other
                uses, it will be adjacent to the new map exit post fusion.
                Tasklet-> Tasklet edges will ALSO be added as mentioned above.

            3.  If an access node is adjacent to FME but not SME, it will be
                adjacent to new map exit post fusion.

            4.  If an access node is adjacent to SME but not FME, it will be
                adjacent to the new map entry node post fusion.

        """
        graph = sdfg.nodes()[self.state_id]
        first_exit = graph.nodes()[self.subgraph[MapFusion._first_map_exit]]
        first_entry = graph.entry_node(first_exit)
        second_entry = graph.nodes()[self.subgraph[
            MapFusion._second_map_entry]]
        second_exit = graph.exit_node(second_entry)

        intermediate_nodes = set()
        for _, _, dst, _, _ in graph.out_edges(first_exit):
            intermediate_nodes.add(dst)
            assert isinstance(dst, nodes.AccessNode)

        # Check if an access node refers to non transient memory, or transient
        # is used at another location (cannot erase)
        do_not_erase = set()
        for node in intermediate_nodes:
            if sdfg.arrays[node.data].transient is False:
                do_not_erase.add(node)
            else:
                for edge in graph.in_edges(node):
                    if edge.src != first_exit:
                        do_not_erase.add(node)
                        break
                else:
                    for edge in graph.out_edges(node):
                        if edge.dst != second_entry:
                            do_not_erase.add(node)
                            break

        # Find permutation between first and second scopes
        perm = MapFusion.find_permutation(first_entry.map, second_entry.map)
        params_dict = {}
        for index, param in enumerate(first_entry.map.params):
            params_dict[param] = second_entry.map.params[perm[index]]

        # Replaces (in memlets and tasklet) the second scope map
        # indices with the permuted first map indices.
        # This works in two passes to avoid problems when e.g., exchanging two
        # parameters (instead of replacing (j,i) and (i,j) to (j,j) and then
        # i,i).
        second_scope = graph.scope_subgraph(second_entry)
        for firstp, secondp in params_dict.items():
            if firstp != secondp:
                replace(second_scope, secondp, '__' + secondp + '_fused')
        for firstp, secondp in params_dict.items():
            if firstp != secondp:
                replace(second_scope, '__' + secondp + '_fused', firstp)

        # Isolate First exit node
        ############################
        edges_to_remove = set()
        nodes_to_remove = set()
        for edge in graph.in_edges(first_exit):
            tree = graph.memlet_tree(edge)
            access_node = tree.root().edge.dst
            if access_node not in do_not_erase:
                out_edges = [
                    e for e in graph.out_edges(access_node)
                    if e.dst == second_entry
                ]
                # In this transformation, there can only be one edge to the
                # second map
                assert len(out_edges) == 1

                # Get source connector to the second map
                connector = out_edges[0].dst_conn[3:]

                new_dsts = []
                # Look at the second map entry out-edges to get the new
                # destinations
                for e in graph.out_edges(second_entry):
                    if e.src_conn[4:] == connector:
                        new_dsts.append(e)
                if not new_dsts:  # Access node is not used in the second map
                    nodes_to_remove.add(access_node)
                    continue

                # If the source is an access node, modify the memlet to point
                # to it
                if (isinstance(edge.src, nodes.AccessNode)
                        and edge.data.data != edge.src.data):
                    edge.data.data = edge.src.data
                    edge.data.subset = ("0" if edge.data.other_subset is None
                                        else edge.data.other_subset)
                    edge.data.other_subset = None

                else:
                    # Add a transient scalar/array
                    self.fuse_nodes(sdfg, graph, edge, new_dsts[0].dst,
                                    new_dsts[0].dst_conn, new_dsts[1:])

                edges_to_remove.add(edge)

                # Remove transient node between the two maps
                nodes_to_remove.add(access_node)
            else:  # The case where intermediate array node cannot be removed
                # Node will become an output of the second map exit
                out_e = tree.parent.edge
                conn = second_exit.next_connector()
                graph.add_edge(
                    second_exit,
                    'OUT_' + conn,
                    out_e.dst,
                    out_e.dst_conn,
                    dcpy(out_e.data),
                )
                second_exit.add_out_connector('OUT_' + conn)

                graph.add_edge(edge.src, edge.src_conn, second_exit,
                               'IN_' + conn, dcpy(edge.data))
                second_exit.add_in_connector('IN_' + conn)

                edges_to_remove.add(out_e)
                edges_to_remove.add(edge)

                # If the second map needs this node, link the connector
                # that generated this to the place where it is needed, with a
                # temp transient/scalar for memlet to be generated
                for out_e in graph.out_edges(second_entry):
                    second_memlet_path = graph.memlet_path(out_e)
                    source_node = second_memlet_path[0].src
                    if source_node == access_node:
                        self.fuse_nodes(sdfg, graph, edge, out_e.dst,
                                        out_e.dst_conn)

        ###
        # First scope exit is isolated and can now be safely removed
        for e in edges_to_remove:
            graph.remove_edge(e)
        graph.remove_nodes_from(nodes_to_remove)
        graph.remove_node(first_exit)

        # Isolate second_entry node
        ###########################
        for edge in graph.in_edges(second_entry):
            tree = graph.memlet_tree(edge)
            access_node = tree.root().edge.src
            if access_node in intermediate_nodes:
                # Already handled above, can be safely removed
                graph.remove_edge(edge)
                continue

            # This is an external input to the second map which will now go
            # through the first map.
            conn = first_entry.next_connector()
            graph.add_edge(edge.src, edge.src_conn, first_entry, 'IN_' + conn,
                           dcpy(edge.data))
            first_entry.add_in_connector('IN_' + conn)
            graph.remove_edge(edge)
            for out_enode in tree.children:
                out_e = out_enode.edge
                graph.add_edge(
                    first_entry,
                    'OUT_' + conn,
                    out_e.dst,
                    out_e.dst_conn,
                    dcpy(out_e.data),
                )
                graph.remove_edge(out_e)
            first_entry.add_out_connector('OUT_' + conn)

        ###
        # Second node is isolated and can now be safely removed
        graph.remove_node(second_entry)

        # Fix scope exit to point to the right map
        second_exit.map = first_entry.map

    def fuse_nodes(self,
                   sdfg,
                   graph,
                   edge,
                   new_dst,
                   new_dst_conn,
                   other_edges=None):
        """ Fuses two nodes via memlets and possibly transient arrays. """
        other_edges = other_edges or []
        memlet_path = graph.memlet_path(edge)
        access_node = memlet_path[-1].dst

        local_name = "__s%d_n%d%s_n%d%s" % (
            self.state_id,
            graph.node_id(edge.src),
            edge.src_conn,
            graph.node_id(edge.dst),
            edge.dst_conn,
        )
        # Add intermediate memory between subgraphs. If a scalar,
        # uses direct connection. If an array, adds a transient node
        if edge.data.subset.num_elements() == 1:
            sdfg.add_scalar(
                local_name,
                dtype=access_node.desc(graph).dtype,
                transient=True,
                storage=dtypes.StorageType.Register,
            )
            edge.data.data = local_name
            edge.data.subset = "0"
            local_node = edge.src
            src_connector = edge.src_conn

            # Add edge that leads to the second node
            graph.add_edge(local_node, src_connector, new_dst, new_dst_conn,
                           dcpy(edge.data))

            for e in other_edges:
                graph.add_edge(local_node, src_connector, e.dst, e.dst_conn,
                               dcpy(edge.data))
        else:
            sdfg.add_transient(local_name,
                               edge.data.subset.size(),
                               dtype=access_node.desc(graph).dtype)
            old_edge = dcpy(edge)
            local_node = graph.add_access(local_name)
            src_connector = None
            edge.data.data = local_name
            edge.data.subset = ",".join(
                ["0:" + str(s) for s in edge.data.subset.size()])
            # Add edge that leads to transient node
            graph.add_edge(
                edge.src,
                edge.src_conn,
                local_node,
                None,
                dcpy(edge.data),
            )

            # Add edge that leads to the second node
            graph.add_edge(local_node, src_connector, new_dst, new_dst_conn,
                           dcpy(edge.data))

            for e in other_edges:
                graph.add_edge(local_node, src_connector, e.dst, e.dst_conn,
                               dcpy(edge.data))

            # Modify data and memlets on all surrounding edges to match array
            for neighbor in graph.all_edges(local_node):
                for e in graph.memlet_tree(neighbor):
                    e.data.data = local_name
                    e.data.subset.offset(old_edge.data.subset, negative=True)
Exemple #7
0
class BufferTiling(transformation.Transformation):
    """ Implements the buffer tiling transformation.

        BufferTiling tiles a buffer that is in between two maps, where the preceding map
        writes to the buffer and the succeeding map reads from it.
        It introduces additional computations in exchange for reduced memory footprint.
        Commonly used to make use of shared memory on GPUs.
    """

    _map1_exit = nodes.MapExit(nodes.Map('', [], []))
    _array = nodes.AccessNode('')
    _map2_entry = nodes.MapEntry(nodes.Map('', [], []))

    tile_sizes = ShapeProperty(dtype=tuple,
                               default=(128, 128, 128),
                               desc="Tile size per dimension")

    # Returns a list of graphs that represent the pattern
    @staticmethod
    def expressions():
        return [
            sdutil.node_path_graph(
                BufferTiling._map1_exit,
                BufferTiling._array,
                BufferTiling._map2_entry,
            )
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        map1_exit = graph.nodes()[candidate[BufferTiling._map1_exit]]
        map2_entry = graph.nodes()[candidate[BufferTiling._map2_entry]]

        for buf in graph.all_nodes_between(map1_exit, map2_entry):
            # Check that buffers are AccessNodes.
            if not isinstance(buf, nodes.AccessNode):
                return False

            # Check that buffers are transient.
            if not sdfg.arrays[buf.data].transient:
                return False

            # Check that buffers have exactly 1 input and 1 output edge.
            if graph.in_degree(buf) != 1:
                return False
            if graph.out_degree(buf) != 1:
                return False

            # Check that buffers are next to the maps.
            if graph.in_edges(buf)[0].src != map1_exit:
                return False
            if graph.out_edges(buf)[0].dst != map2_entry:
                return False

            # Check that the data consumed is provided.
            provided = graph.in_edges(buf)[0].data.subset
            consumed = graph.out_edges(buf)[0].data.subset
            if not provided.covers(consumed):
                return False

            # Check that buffers occur only once in this state.
            num_occurrences = len([
                n for n in graph.nodes()
                if isinstance(n, nodes.AccessNode) and n.data == buf
            ])
            if num_occurrences > 1:
                return False
        return True

    @staticmethod
    def match_to_str(graph, candidate):
        map1_exit = graph.nodes()[candidate[BufferTiling._map1_exit]]
        map2_entry = graph.nodes()[candidate[BufferTiling._map2_entry]]

        return " -> ".join(entry.map.label + ": " + str(entry.map.params)
                           for entry in [map1_exit, map2_entry])

    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        map1_exit = graph.nodes()[self.subgraph[self._map1_exit]]
        map1_entry = graph.entry_node(map1_exit)
        map2_entry = graph.nodes()[self.subgraph[self._map2_entry]]
        buffers = graph.all_nodes_between(map1_exit, map2_entry)
        # Situation:
        # -> map1_entry -> ... -> map1_exit -> buffers -> map2_entry -> ...

        lower_extents = tuple(b - a for a, b in zip(
            map1_entry.range.min_element(), map2_entry.range.min_element()))
        upper_extents = tuple(a - b for a, b in zip(
            map1_entry.range.max_element(), map2_entry.range.max_element()))

        # Tile the first map with overlap
        MapTilingWithOverlap.apply_to(sdfg,
                                      map_entry=map1_entry,
                                      options={
                                          'tile_sizes': self.tile_sizes,
                                          'lower_overlap': lower_extents,
                                          'upper_overlap': upper_extents
                                      })
        tile_map1_exit = graph.out_edges(map1_exit)[0].dst
        tile_map1_entry = graph.entry_node(tile_map1_exit)
        tile_map1_entry.label = 'BufferTiling'

        # Tile the second map
        MapTiling.apply_to(sdfg,
                           map_entry=map2_entry,
                           options={
                               'tile_sizes': self.tile_sizes,
                               'tile_trivial': True
                           })
        tile_map2_entry = graph.in_edges(map2_entry)[0].src

        # Fuse maps
        some_buffer = next(
            iter(buffers))  # some dummy to pass to MapFusion.apply_to()
        MapFusion.apply_to(sdfg,
                           first_map_exit=tile_map1_exit,
                           array=some_buffer,
                           second_map_entry=tile_map2_entry)

        # Optimize the simple cases
        map1_entry.range.ranges = [
            (r[0], r[0], r[2]) if l_ext == 0 and u_ext == 0 and ts == 1 else r
            for r, l_ext, u_ext, ts in zip(map1_entry.range.ranges,
                                           lower_extents, upper_extents,
                                           self.tile_sizes)
        ]

        map2_entry.range.ranges = [
            (r[0], r[0], r[2]) if ts == 1 else r
            for r, ts in zip(map2_entry.range.ranges, self.tile_sizes)
        ]

        if any(ts == 1 for ts in self.tile_sizes):
            if any(r[0] == r[1] for r in map1_entry.map.range):
                TrivialMapElimination.apply_to(sdfg, _map_entry=map1_entry)
            if any(r[0] == r[1] for r in map2_entry.map.range):
                TrivialMapElimination.apply_to(sdfg, _map_entry=map2_entry)
Exemple #8
0
class TensorflowRedundantArray(pm.Transformation):
    """ Implements the redundant array removal transformation, applied
        to remove ReadVariableOps and control dependencies. """

    _arrays_removed = 0
    _in_array = nodes.AccessNode("_")
    _out_array = nodes.AccessNode("_")

    @staticmethod
    def expressions():
        return [
            sdutil.node_path_graph(TensorflowRedundantArray._in_array,
                                   TensorflowRedundantArray._out_array)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        in_array = graph.nodes()[candidate[TensorflowRedundantArray._in_array]]
        out_array = graph.nodes()[candidate[
            TensorflowRedundantArray._out_array]]

        # Just to be sure, check for the OP name in the out array
        if not ("ReadVariable" in out_array.data
                or "control_dependency" in out_array.data):
            return False

        # Make sure that the candidate is a transient variable
        if not in_array.desc(sdfg).transient:
            return False

        # Make sure that both arrays are using the same storage location
        if in_array.desc(sdfg).storage != out_array.desc(sdfg).storage:
            return False

        # Only apply if arrays are of same shape (no need to modify subset)
        if len(in_array.desc(sdfg).shape) != len(
                out_array.desc(sdfg).shape) or any(i != o for i, o in zip(
                    in_array.desc(sdfg).shape,
                    out_array.desc(sdfg).shape)):
            return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        out_array = graph.nodes()[candidate[
            TensorflowRedundantArray._out_array]]

        return "Remove " + str(out_array)

    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        graph = sdfg.nodes()[self.state_id]
        in_array = gnode(TensorflowRedundantArray._in_array)
        out_array = gnode(TensorflowRedundantArray._out_array)

        for e in graph.out_edges(out_array):
            # Modify all outgoing edges to point to in_array
            path = graph.memlet_tree(e)
            for pe in path:
                if pe.data.data == out_array.data:
                    pe.data.data = in_array.data

            # Preemptively add edge from in_array to out_array's adjacent
            # nodes.
            new_memlet = e.data
            new_memlet.data = in_array.data
            graph.add_edge(in_array, e.src_conn, e.dst, e.dst_conn, new_memlet)
            graph.remove_edge(e)

        try:
            assert len(graph.in_edges(out_array)) == 1
        except AssertionError:
            print("Multiple in-edges for ", str(out_array))
        e = graph.in_edges(out_array)[0]
        graph.remove_edge(e)

        # Finally, remove out_array node
        graph.remove_node(out_array)
        if Config.get_bool("debugprint"):
            TensorflowRedundantArray._arrays_removed += 1
Exemple #9
0
class InMergeArrays(pattern_matching.Transformation):
    """ Merge duplicate arrays connected to the same scope entry. """

    _array1 = nodes.AccessNode("_")
    _array2 = nodes.AccessNode("_")
    _map_entry = nodes.EntryNode()

    @staticmethod
    def expressions():
        # Matching
        #   o  o
        #   |  |
        # /======\

        g = SDFGState()
        g.add_node(InMergeArrays._array1)
        g.add_node(InMergeArrays._array2)
        g.add_node(InMergeArrays._map_entry)
        g.add_edge(InMergeArrays._array1, None, InMergeArrays._map_entry, None,
                   memlet.Memlet())
        g.add_edge(InMergeArrays._array2, None, InMergeArrays._map_entry, None,
                   memlet.Memlet())
        return [g]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        arr1_id = candidate[InMergeArrays._array1]
        arr2_id = candidate[InMergeArrays._array2]

        # Ensure both arrays contain the same data
        arr1 = graph.node(arr1_id)
        arr2 = graph.node(arr2_id)
        if arr1.data != arr2.data:
            return False

        # Ensure only arr1's node ID contains incoming edges
        if graph.in_degree(arr2) > 0:
            return False

        # Ensure arr1 and arr2's node IDs are ordered (avoid duplicates)
        if (graph.in_degree(arr1) == 0 and graph.in_degree(arr2) == 0
                and arr1_id >= arr2_id):
            return False

        map = graph.node(candidate[InMergeArrays._map_entry])

        # If arr1's connector leads directly to map, skip it
        if all(e.dst_conn and not e.dst_conn.startswith('IN_')
               for e in graph.edges_between(arr1, map)):
            return False

        if (any(e.dst != map for e in graph.out_edges(arr1))
                or any(e.dst != map for e in graph.out_edges(arr2))):
            return False

        # Ensure arr1 and arr2 are the first two incoming nodes (avoid further
        # duplicates)
        all_source_nodes = set(
            graph.node_id(e.src) for e in graph.in_edges(map) if e.src != arr1
            and e.src != arr2 and e.src.data == arr1.data and e.dst_conn
            and e.dst_conn.startswith('IN_') and graph.in_degree(e.src) == 0)
        if any(nid < arr1_id or nid < arr2_id for nid in all_source_nodes):
            return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        arr = graph.node(candidate[InMergeArrays._array1])
        map = graph.node(candidate[InMergeArrays._map_entry])
        return '%s (%d, %d) -> %s' % (
            arr.data, candidate[InMergeArrays._array1],
            candidate[InMergeArrays._array2], map.label)

    def apply(self, sdfg):
        graph = sdfg.node(self.state_id)
        array = graph.node(self.subgraph[InMergeArrays._array1])
        map = graph.node(self.subgraph[InMergeArrays._map_entry])
        map_edge = next(e for e in graph.out_edges(array) if e.dst == map)
        result_connector = map_edge.dst_conn[3:]

        # Find all other incoming access nodes without incoming edges
        source_edges = [
            e for e in graph.in_edges(map)
            if isinstance(e.src, nodes.AccessNode) and e.src.data == array.data
            and e.src != array and e.dst_conn and e.dst_conn.startswith('IN_')
            and graph.in_degree(e.src) == 0
        ]

        # Modify connectors to point to first array
        connectors_to_remove = set()
        for e in source_edges:
            connector = e.dst_conn[3:]
            connectors_to_remove.add(connector)
            for inner_edge in graph.out_edges(map):
                if inner_edge.src_conn[4:] == connector:
                    inner_edge._src_conn = 'OUT_' + result_connector

        # Remove other nodes from state
        graph.remove_nodes_from(set(e.src for e in source_edges))

        # Remove connectors from scope entry
        for c in connectors_to_remove:
            map.remove_in_connector('IN_' + c)
            map.remove_out_connector('OUT_' + c)

        # Re-propagate memlets
        edge_to_propagate = next(e for e in graph.out_edges(map)
                                 if e.src_conn[4:] == result_connector)
        map_edge._data = propagate_memlet(dfg_state=graph,
                                          memlet=edge_to_propagate.data,
                                          scope_node=map,
                                          union_inner_edges=True)
Exemple #10
0
class MergeSourceSinkArrays(transformation.Transformation):
    """ Merge duplicate arrays that are source/sink nodes. """

    _array1 = nodes.AccessNode("_")

    @staticmethod
    def expressions():
        # Matching
        #   o  o

        g = SDFGState()
        g.add_node(MergeSourceSinkArrays._array1)
        return [g]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, permissive=False):
        arr1_id = candidate[MergeSourceSinkArrays._array1]
        arr1 = graph.node(arr1_id)

        # Ensure array is either a source or sink node
        src_nodes = graph.source_nodes()
        sink_nodes = graph.sink_nodes()
        if arr1 in src_nodes:
            nodes_to_consider = src_nodes
        elif arr1 in sink_nodes:
            nodes_to_consider = sink_nodes
        else:
            return False

        # Ensure there are more nodes with the same data
        other_nodes = [
            graph.node_id(n) for n in nodes_to_consider
            if isinstance(n, nodes.AccessNode) and n.data == arr1.data
            and n != arr1
        ]
        if len(other_nodes) == 0:
            return False

        # Ensure arr1 is the first node to avoid further duplicates
        nid = min(other_nodes)
        if nid < arr1_id:
            return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        arr = graph.node(candidate[MergeSourceSinkArrays._array1])
        if arr in graph.source_nodes():
            place = 'source'
        else:
            place = 'sink'
        return '%s array %s' % (place, arr.data)

    def apply(self, sdfg):
        graph = sdfg.node(self.state_id)
        array = graph.node(self.subgraph[MergeSourceSinkArrays._array1])
        if array in graph.source_nodes():
            src_node = True
            nodes_to_consider = graph.source_nodes()
            edges_to_consider = lambda n: graph.out_edges(n)
        else:
            src_node = False
            nodes_to_consider = graph.sink_nodes()
            edges_to_consider = lambda n: graph.in_edges(n)

        for node in nodes_to_consider:
            if node == array:
                continue
            if not isinstance(node, nodes.AccessNode):
                continue
            if node.data != array.data:
                continue
            for edge in list(edges_to_consider(node)):
                if src_node:
                    graph.add_edge(array, edge.src_conn, edge.dst,
                                   edge.dst_conn, edge.data)
                else:
                    graph.add_edge(edge.src, edge.src_conn, array,
                                   edge.dst_conn, edge.data)
                graph.remove_edge(edge)
            graph.remove_node(node)
Exemple #11
0
class RedundantSecondArray(pm.Transformation):
    """ Implements the redundant array removal transformation, applied
        when a transient array is copied from and to (from another array),
        but never used anywhere else. This transformation removes the second
        array. """

    _arrays_removed = 0
    _in_array = nodes.AccessNode("_")
    _out_array = nodes.AccessNode("_")

    @staticmethod
    def expressions():
        return [
            sdutil.node_path_graph(RedundantSecondArray._in_array,
                                   RedundantSecondArray._out_array)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        in_array = graph.nodes()[candidate[RedundantSecondArray._in_array]]
        out_array = graph.nodes()[candidate[RedundantSecondArray._out_array]]

        in_desc = in_array.desc(sdfg)
        out_desc = out_array.desc(sdfg)

        # Ensure in degree is one (only one source, which is in_array)
        if graph.in_degree(out_array) != 1:
            return False

        # Make sure that the candidate is a transient variable
        if not out_desc.transient:
            return False

        # Dimensionality must be the same in strict mode
        if strict and len(in_desc.shape) != len(out_desc.shape):
            return False

        # Make sure that both arrays are using the same storage location
        # and are of the same type (e.g., Stream->Stream)
        if in_desc.storage != out_desc.storage:
            return False
        if type(in_desc) != type(out_desc):
            return False

        # Find occurrences in this and other states
        occurrences = []
        for state in sdfg.nodes():
            occurrences.extend([
                n for n in state.nodes()
                if isinstance(n, nodes.AccessNode) and n.desc(sdfg) == out_desc
            ])
        for isedge in sdfg.edges():
            if out_array.data in isedge.data.free_symbols:
                occurrences.append(isedge)

        if len(occurrences) > 1:
            return False

        # Check whether the data copied from the first datanode cover
        # the subsets of all the output edges of the second datanode.
        # We assume the following pattern: A -- e1 --> B -- e2 --> others

        # 1. Get edge e1 and extract/validate subsets for arrays A and B
        e1 = graph.edges_between(in_array, out_array)[0]
        try:
            _, b1_subset = _validate_subsets(e1, sdfg.arrays)
        except NotImplementedError:
            return False
        # 2. Iterate over the e2 edges
        for e2 in graph.out_edges(out_array):
            # 2-a. Extract/validate subsets for array B and others
            try:
                b2_subset, _ = _validate_subsets(e2, sdfg.arrays)
            except NotImplementedError:
                return False
            # 2-b. Check where b1_subset covers b2_subset
            if not b1_subset.covers(b2_subset):
                return False
            # 2-c. Validate subsets in memlet tree
            # (should not be needed for valid SDGs)
            path = graph.memlet_tree(e2)
            for e3 in path:
                if e3 is not e2:
                    try:
                        _validate_subsets(e3,
                                          sdfg.arrays,
                                          src_name=out_array.data)
                    except NotImplementedError:
                        return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        out_array = graph.nodes()[candidate[RedundantSecondArray._out_array]]

        return "Remove " + str(out_array)

    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        graph = sdfg.nodes()[self.state_id]
        in_array = gnode(RedundantSecondArray._in_array)
        out_array = gnode(RedundantSecondArray._out_array)

        # We assume the following pattern: A -- e1 --> B -- e2 --> others

        # 1. Get edge e1 and extract subsets for arrays A and B
        e1 = graph.edges_between(in_array, out_array)[0]
        a_subset, b1_subset = _validate_subsets(e1, sdfg.arrays)
        # 2. Iterate over the e2 edges and traverse the memlet tree
        for e2 in graph.out_edges(out_array):
            path = graph.memlet_tree(e2)
            for e3 in path:
                # 2-a. Extract subsets for array B and others
                b3_subset, other_subset = _validate_subsets(
                    e3, sdfg.arrays, src_name=out_array.data)
                # 2-b. Modify memlet to match array A. Example:
                # A -- (0, a:b)/(c:c+b) --> B -- (c+d)/None --> others
                # A -- (0, a+d)/None --> others
                e3.data.data = in_array.data
                # (c+d) - (c:c+b) = (d)
                b3_subset.offset(b1_subset, negative=True)
                # (0, a:b)(d) = (0, a+d) (or offset for indices)
                if isinstance(a_subset, subsets.Indices):
                    tmp = copy.deepcopy(a_subset)
                    tmp.offset(b3_subset, negative=False)
                    e3.data.subset = tmp
                else:
                    e3.data.subset = a_subset.compose(b3_subset)
                e3.data.other_subset = other_subset
            # 2-c. Remove edge and add new one
            graph.remove_edge(e2)
            graph.add_edge(in_array, e2.src_conn, e2.dst, e2.dst_conn, e2.data)

        # Finally, remove out_array node
        graph.remove_node(out_array)
        # TODO: Should the array be removed from the SDFG?
        # del sdfg.arrays[out_array]
        if Config.get_bool("debugprint"):
            RedundantSecondArray._arrays_removed += 1
Exemple #12
0
    def __init__(self,
                 name: str,
                 model: onnx.ModelProto,
                 infer_shapes: bool = True,
                 cuda: bool = False,
                 apply_strict: bool = False,
                 auto_optimize: bool = True,
                 parent_pytorch_module: Optional[torch.nn.Module] = None):
        """
        :param name: the name for the SDFG.
        :param model: the model to import.
        :param infer_shapes: whether to infer shapes for the model. If this is ``False``, the model must have
                             value infos (with shapes) for all arrays, including intermediate values.
        :param cuda: if ``True``, the model will be executed on the GPU.
        :param apply_strict: if ``True``, apply strict transformations after all nodes have
                             been expanded calling (warning: this can be very slow!)
        :param auto_optimize: if ``True``, apply automatic optimizations before calling.
        :param parent_pytorch_module: when not None, the weight tensors are loaded from the parameters of this model
                                      rather than the ONNX graph.
        """

        self.do_auto_optimize = auto_optimize
        if infer_shapes:
            model = shape_inference.infer_shapes(model)

        graph: onnx.GraphProto = model.graph

        self.sdfg: SDFG = SDFG(name)  #: the generated SDFG.
        self.sdfg._parent_onnx_model = self
        self.cuda = cuda
        self.apply_strict = apply_strict
        self.state: SDFGState = self.sdfg.add_state(
        )  #: the state containing the model computation.

        # Add all values to the SDFG, check for unsupported ops
        ##########################################

        self.value_infos = {}

        self.inputs: List[str] = []  #: the inputs to the model
        self.outputs: List[str] = []  #: the outputs of the model

        #: hooks that are executed after the sdfg is compiled
        self.post_compile_hooks: Dict[str,
                                      Callable[[compiled_sdfg.CompiledSDFG],
                                               None]] = {}

        for value, is_input in chain(zip(graph.input, repeat(True)),
                                     zip(graph.output, repeat(False))):
            if not value.HasField("name"):
                raise ValueError("Got input or output without name")
            if is_input:
                self.inputs.append(value.name)
            else:
                self.outputs.append(value.name)

            self.value_infos[value.name] = value
            self._add_value_info(value)

        for value in graph.value_info:
            if not value.HasField("name"):
                raise ValueError("Got input or output without name")
            if value.name not in self.value_infos:
                self.value_infos[value.name] = value

        # add weights
        self.weights: Dict[str, torch.Tensor] = {
        }  #: mapping from weight name to array
        for init in graph.initializer:
            self._add_constant_tensor(init, parent_pytorch_module)

        access_nodes = {}
        self._idx_to_node = []
        for i, node in enumerate(graph.node):
            if not has_onnx_node(node.op_type):
                raise ValueError("Unsupported ONNX operator: '{}'".format(
                    node.op_type))

            # extract the op attributes

            op_attributes = {
                attribute_proto.name: convert_attribute_proto(attribute_proto)
                for attribute_proto in node.attribute
            }

            if node.HasField("name"):
                node_name = clean_onnx_name(node.name)
            else:
                node_name = node.op_type + "_" + str(i)

            # construct the dace node
            op_node = get_onnx_node(node.op_type)(node_name, **op_attributes)
            self.state.add_node(op_node)
            self._idx_to_node.append(op_node)

            for param_idx, (name, is_input) in chain(
                    enumerate(zip(node.input, repeat(True))),
                    enumerate(zip(node.output, repeat(False)))):
                if clean_onnx_name(name) not in self.sdfg.arrays:
                    if name not in self.value_infos:
                        raise ValueError(
                            "Could not find array with name '{}'".format(name))
                    self._add_value_info(self.value_infos[name])

                # get the access node
                if name in access_nodes:
                    access = access_nodes[name]
                    self._update_access_type(access, is_input)
                else:
                    access = nd.AccessNode(
                        clean_onnx_name(name), dtypes.AccessType.ReadOnly
                        if is_input else dtypes.AccessType.WriteOnly)
                    self.state.add_node(access)
                    access_nodes[name] = access

                # get the connector name
                params = op_node.schema.inputs if is_input else op_node.schema.outputs
                params_len = len(params)
                if param_idx >= params_len:
                    # this is a variadic parameter. Then the last parameter of the parameter must be variadic.
                    if params[-1].param_type != ONNXParameterType.Variadic:
                        raise ValueError(
                            "Expected the last {i_or_o} parameter to be variadic,"
                            " since the {i_or_o} with idx {param_idx} has more parameters than the schema ({params_len})"
                            .format(i_or_o="input" if is_input else "output",
                                    param_idx=param_idx,
                                    params_len=params_len))
                    conn_name = params[-1].name + "__" + str(param_idx -
                                                             params_len + 1)
                elif params[
                        param_idx].param_type == ONNXParameterType.Variadic:
                    # this is a variadic parameter, and it is within the range of params, so it must be the first
                    # instance of a variadic parameter
                    conn_name = params[param_idx].name + "__0"
                else:
                    conn_name = params[param_idx].name

                data_desc = self.sdfg.arrays[clean_onnx_name(name)]

                # add the connector if required, and add an edge
                if is_input:
                    if conn_name not in op_node.in_connectors:
                        assert op_node.add_in_connector(conn_name)
                    self.state.add_edge(
                        access, None, op_node, conn_name,
                        dace.Memlet.from_array(clean_onnx_name(name),
                                               data_desc))
                else:
                    if conn_name not in op_node.out_connectors:
                        assert op_node.add_out_connector(conn_name)

                    self.state.add_edge(
                        op_node, conn_name, access, None,
                        dace.Memlet.from_array(clean_onnx_name(name),
                                               data_desc))

        if self.cuda:
            self.sdfg.apply_gpu_transformations()
Exemple #13
0
class RedundantSecondArray(pm.Transformation):
    """ Implements the redundant array removal transformation, applied
        when a transient array is copied from and to (from another array),
        but never used anywhere else. This transformation removes the second
        array. """

    _arrays_removed = 0
    _in_array = nodes.AccessNode("_")
    _out_array = nodes.AccessNode("_")

    @staticmethod
    def expressions():
        return [
            sdutil.node_path_graph(RedundantSecondArray._in_array,
                                   RedundantSecondArray._out_array)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        in_array = graph.nodes()[candidate[RedundantSecondArray._in_array]]
        out_array = graph.nodes()[candidate[RedundantSecondArray._out_array]]

        in_desc = in_array.desc(sdfg)
        out_desc = out_array.desc(sdfg)

        # Ensure in degree is one (only one source, which is in_array)
        if graph.in_degree(out_array) != 1:
            return False

        # Make sure that the candidate is a transient variable
        if not out_desc.transient:
            return False

        # 1. Get edge e1 and extract/validate subsets for arrays A and B
        e1 = graph.edges_between(in_array, out_array)[0]
        a_subset, b1_subset = _validate_subsets(e1, sdfg.arrays)

        if strict:
            # In strict mode, make sure the memlet covers the removed array
            if not b1_subset:
                return False
            subset = copy.deepcopy(b1_subset)
            subset.squeeze()
            shape = [sz for sz in out_desc.shape if sz != 1]
            if any(m != a for m, a in zip(subset.size(), shape)):
                return False

            # NOTE: Library node check
            # The transformation must not apply in strict mode if out_array is
            # not a view, is input to a library node, and an access or a view
            # of in_desc is also output to the same library node.
            # The reason is that the application of the transformation will lead
            # to in_desc being both input and output of the library node.
            # We do not know if this is safe.

            # First find the true in_desc (in case in_array is a view).
            true_in_desc = in_desc
            if isinstance(in_desc, data.View):
                e = sdutil.get_view_edge(graph, in_array)
                if not e:
                    return False
                true_in_desc = sdfg.arrays[e.dst.data]

            if not isinstance(out_desc, data.View):

                edges_to_check = []
                for a in graph.out_edges(out_array):
                    if isinstance(a.dst, nodes.LibraryNode):
                        edges_to_check.append(a)
                    elif (isinstance(a.dst, nodes.AccessNode)
                          and isinstance(sdfg.arrays[a.dst.data], data.View)):
                        for b in graph.out_edges(a.dst):
                            edges_to_check.append(graph.memlet_path(b)[-1])

                for a in edges_to_check:
                    if isinstance(a.dst, nodes.LibraryNode):
                        for b in graph.out_edges(a.dst):
                            if isinstance(b.dst, nodes.AccessNode):
                                desc = sdfg.arrays[b.dst.data]
                                if isinstance(desc, data.View):
                                    e = sdutil.get_view_edge(graph, b.dst)
                                    if not e:
                                        return False
                                    desc = sdfg.arrays[e.dst.data]
                                    if desc is true_in_desc:
                                        return False

            # In strict mode, check if the state has two or more access nodes
            # for in_array and at least one of them is a write access. There
            # might be a RW, WR, or WW dependency.
            accesses = [
                n for n in graph.nodes() if isinstance(n, nodes.AccessNode)
                and n.desc(sdfg) == in_desc and n is not in_array
            ]
            if len(accesses) > 0:
                if (graph.in_degree(in_array) > 0
                        or any(graph.in_degree(a) > 0 for a in accesses)):
                    # We need to ensure that a data race will not happen if we
                    # remove in_array.
                    # First, we simplify the graph
                    G = helpers.simplify_state(graph)
                    # Loop over the accesses
                    for a in accesses:
                        subsets_intersect = False
                        for e in graph.in_edges(a):
                            _, subset = _validate_subsets(e,
                                                          sdfg.arrays,
                                                          dst_name=a.data)
                            res = subsets.intersects(a_subset, subset)
                            if res == True or res is None:
                                subsets_intersect = True
                                break
                        if not subsets_intersect:
                            continue
                        try:
                            has_bward_path = nx.has_path(G, a, in_array)
                        except NodeNotFound:
                            has_bward_path = nx.has_path(graph.nx, a, in_array)
                        try:
                            has_fward_path = nx.has_path(G, in_array, a)
                        except NodeNotFound:
                            has_fward_path = nx.has_path(graph.nx, in_array, a)
                        # If there is no path between the access nodes
                        # (disconnected components), then it is definitely
                        # possible to have data races. Abort.
                        if not (has_bward_path or has_fward_path):
                            return False
                        # If there is a forward path then a must not be a direct
                        # successor of in_array.
                        if has_fward_path and a in G.successors(in_array):
                            for src, _ in G.in_edges(a):
                                if src is in_array:
                                    continue
                                if (nx.has_path(G, in_array, src)
                                        and src != out_array):
                                    continue
                                return False

        # Make sure that both arrays are using the same storage location
        # and are of the same type (e.g., Stream->Stream)
        if in_desc.storage != out_desc.storage:
            return False
        if in_desc.location != out_desc.location:
            return False
        if type(in_desc) != type(out_desc):
            if isinstance(in_desc, data.View):
                # Case View -> Access
                # If the View points to the Access (and has a different shape?)
                # then we should (probably) not remove the Access.
                e = sdutil.get_view_edge(graph, in_array)
                if e and e.dst is out_array and in_desc.shape != out_desc.shape:
                    return False
                # Check that the View's immediate ancestors are Accesses.
                # Otherwise, the application of the transformation will result
                # in an ambiguous View.
                view_ancestors_desc = [
                    e.src.desc(sdfg)
                    if isinstance(e.src, nodes.AccessNode) else None
                    for e in graph.in_edges(in_array)
                ]
                if any([
                        not desc or isinstance(desc, data.View)
                        for desc in view_ancestors_desc
                ]):
                    return False
            elif isinstance(out_desc, data.View):
                # Case Access -> View
                # If the View points to the Access and has the same shape,
                # it can be removed
                e = sdutil.get_view_edge(graph, out_array)
                if e and e.src is in_array and in_desc.shape == out_desc.shape:
                    return True
                return False
            else:
                # Something else, for example, Stream
                return False
        else:
            # Two views connected to each other
            if isinstance(in_desc, data.View):
                return False

        # Find occurrences in this and other states
        occurrences = []
        for state in sdfg.nodes():
            occurrences.extend([
                n for n in state.nodes()
                if isinstance(n, nodes.AccessNode) and n.desc(sdfg) == out_desc
            ])
        for isedge in sdfg.edges():
            if out_array.data in isedge.data.free_symbols:
                occurrences.append(isedge)

        if len(occurrences) > 1:
            return False

        # Check whether the data copied from the first datanode cover
        # the subsets of all the output edges of the second datanode.
        # We assume the following pattern: A -- e1 --> B -- e2 --> others

        # 2. Iterate over the e2 edges
        for e2 in graph.out_edges(out_array):
            # 2-a. Extract/validate subsets for array B and others
            try:
                b2_subset, _ = _validate_subsets(e2, sdfg.arrays)
            except NotImplementedError:
                return False
            # 2-b. Check where b1_subset covers b2_subset
            if not b1_subset.covers(b2_subset):
                return False
            # 2-c. Validate subsets in memlet tree
            # (should not be needed for valid SDGs)
            path = graph.memlet_tree(e2)
            for e3 in path:
                if e3 is not e2:
                    try:
                        _validate_subsets(e3,
                                          sdfg.arrays,
                                          src_name=out_array.data)
                    except NotImplementedError:
                        return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        out_array = graph.nodes()[candidate[RedundantSecondArray._out_array]]

        return "Remove " + str(out_array)

    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        graph = sdfg.nodes()[self.state_id]
        in_array = gnode(RedundantSecondArray._in_array)
        out_array = gnode(RedundantSecondArray._out_array)
        in_desc = sdfg.arrays[in_array.data]
        out_desc = sdfg.arrays[out_array.data]

        # We assume the following pattern: A -- e1 --> B -- e2 --> others

        # 1. Get edge e1 and extract subsets for arrays A and B
        e1 = graph.edges_between(in_array, out_array)[0]
        a_subset, b1_subset = _validate_subsets(e1, sdfg.arrays)

        # Find extraneous A or B subset dimensions
        a_dims_to_pop = []
        b_dims_to_pop = []
        aset = a_subset
        popped = []
        if a_subset and b1_subset and a_subset.dims() != b1_subset.dims():
            a_size = a_subset.size_exact()
            b_size = b1_subset.size_exact()
            if a_subset.dims() > b1_subset.dims():
                a_dims_to_pop = find_dims_to_pop(a_size, b_size)
                aset, popped = pop_dims(a_subset, a_dims_to_pop)
            else:
                b_dims_to_pop = find_dims_to_pop(b_size, a_size)

        # If the src subset does not cover the removed array, create a view.
        if a_subset and any(m != a
                            for m, a in zip(a_subset.size(), out_desc.shape)):
            # NOTE: We do not want to create another view, if the immediate
            # successors of out_array are views as well. We just remove it.
            out_successors_desc = [
                e.dst.desc(sdfg)
                if isinstance(e.dst, nodes.AccessNode) else None
                for e in graph.out_edges(out_array)
            ]
            if all([
                    desc and isinstance(desc, data.View)
                    for desc in out_successors_desc
            ]):
                for e in graph.out_edges(out_array):
                    _, b_subset = _validate_subsets(e, sdfg.arrays)
                    graph.add_edge(
                        in_array, None, e.dst, e.dst_conn,
                        mm.Memlet(in_array.data,
                                  subset=a_subset,
                                  other_subset=b_subset,
                                  wcr=e1.data.wcr,
                                  wcr_nonatomic=e1.data.wcr_nonatomic))
                    graph.remove_edge(e)
                graph.remove_edge(e1)
                graph.remove_node(out_array)
                if out_array.data in sdfg.arrays:
                    del sdfg.arrays[out_array.data]
                return
            view_strides = out_desc.strides
            if (a_dims_to_pop and len(a_dims_to_pop)
                    == len(in_desc.shape) - len(out_desc.shape)):
                view_strides = [
                    s for i, s in enumerate(in_desc.strides)
                    if i not in a_dims_to_pop
                ]
            sdfg.arrays[out_array.data] = data.View(
                out_desc.dtype, out_desc.shape, True, out_desc.allow_conflicts,
                in_desc.storage, in_desc.location, view_strides,
                out_desc.offset, in_desc.may_alias,
                dtypes.AllocationLifetime.Scope, out_desc.alignment,
                out_desc.debuginfo, out_desc.total_size)
            return

        # 2. Iterate over the e2 edges and traverse the memlet tree
        for e2 in graph.out_edges(out_array):
            path = graph.memlet_tree(e2)
            wcr = e1.data.wcr
            wcr_nonatomic = e1.data.wcr_nonatomic
            for e3 in path:
                # 2-a. Extract subsets for array B and others
                b3_subset, other_subset = _validate_subsets(
                    e3, sdfg.arrays, src_name=out_array.data)
                # 2-b. Modify memlet to match array A. Example:
                # A -- (0, a:b)/(c:c+b) --> B -- (c+d)/None --> others
                # A -- (0, a+d)/None --> others
                e3.data.data = in_array.data
                # (c+d) - (c:c+b) = (d)
                b3_subset.offset(b1_subset, negative=True)
                # (0, a:b)(d) = (0, a+d) (or offset for indices)

                if b3_subset and b_dims_to_pop:
                    bset, _ = pop_dims(b3_subset, b_dims_to_pop)
                else:
                    bset = b3_subset

                e3.data.subset = compose_and_push_back(aset, bset,
                                                       a_dims_to_pop, popped)
                # NOTE: This fixes the following case:
                # A ----> A[subset] ----> ... -----> Tasklet
                # Tasklet is not data, so it doesn't have an other subset.
                if isinstance(e3.dst, nodes.AccessNode):
                    e3.data.other_subset = other_subset
                else:
                    e3.data.other_subset = None
                wcr = wcr or e3.data.wcr
                wcr_nonatomic = wcr_nonatomic or e3.data.wcr_nonatomic
                e3.data.wcr = wcr
                e3.data.wcr_nonatomic = wcr_nonatomic

            # 2-c. Remove edge and add new one
            graph.remove_edge(e2)
            e2.data.wcr = wcr
            e2.data.wcr_nonatomic = wcr_nonatomic
            graph.add_edge(in_array, e2.src_conn, e2.dst, e2.dst_conn, e2.data)

        # Finally, remove out_array node
        graph.remove_node(out_array)
        if out_array.data in sdfg.arrays:
            try:
                sdfg.remove_data(out_array.data)
            except ValueError:  # Already in use (e.g., with Views)
                pass
Exemple #14
0
class StencilFusion(Transformation):
    """ Transformation that nests a one-dimensional map into a stencil,
        including it in the computational domain. """

    _stencil_a = Stencil('')
    _stencil_b = Stencil('')
    _tmp_array = nodes.AccessNode('_')

    @staticmethod
    def expressions():
        return [
            utils.node_path_graph(StencilFusion._stencil_a,
                                  StencilFusion._tmp_array,
                                  StencilFusion._stencil_b)
        ]

    @staticmethod
    def match_to_str(graph, candidate):
        stencil_a: Stencil = graph.node(candidate[StencilFusion._stencil_a])
        stencil_b: Stencil = graph.node(candidate[StencilFusion._stencil_b])
        return '%s -> %s' % (stencil_a.label, stencil_b.label)

    @staticmethod
    def can_be_applied(graph: dace.SDFGState,
                       candidate: Dict[Any, int],
                       expr_index: int,
                       sdfg: dace.SDFG,
                       strict=False):
        stencil_a: Stencil = graph.node(candidate[StencilFusion._stencil_a])
        stencil_b: Stencil = graph.node(candidate[StencilFusion._stencil_b])
        array: nodes.AccessNode = graph.node(
            candidate[StencilFusion._tmp_array])

        # Ensure the stencil shapes match
        if len(stencil_a.shape) != len(stencil_b.shape):
            return False
        if any(sa != sb for sa, sb in zip(stencil_a.shape, stencil_b.shape)):
            return False

        # Ensure that the transient is not used anywhere else and can be
        # removed
        if len(graph.all_edges(array)) != 2:
            return False
        if not sdfg.arrays[array.data].transient:
            return False
        if (len([
                n for state in sdfg.nodes() for n in state.nodes()
                if isinstance(n, nodes.AccessNode) and n.data == array.data
        ]) > 1):
            return False

        # Ensure that second stencil only has one input access of the
        # candidate transient to remove
        edge = graph.out_edges(array)[0]
        if len(stencil_b.accesses[edge.dst_conn][1]) > 1:
            return False

        # TODO: Remove check once stencils can be offset
        if any(a != 0 for a in stencil_b.accesses[edge.dst_conn][1][0]):
            return False

        # Code languages must match
        if stencil_a.code.language != stencil_b.code.language:
            return False

        # TODO: Boundary condition matching checks

        return True

    def apply(self, sdfg: dace.SDFG):
        graph: dace.SDFGState = sdfg.node(self.state_id)
        stencil_a: Stencil = graph.node(
            self.subgraph[StencilFusion._stencil_a])
        stencil_b: Stencil = graph.node(
            self.subgraph[StencilFusion._stencil_b])
        array: nodes.AccessNode = graph.node(
            self.subgraph[StencilFusion._tmp_array])

        intermediate_name = graph.in_edges(array)[0].src_conn
        intermediate_name_b = graph.out_edges(array)[0].dst_conn

        # Replace outputs of first stencil with outputs of second stencil
        # In node and in connectors, reconnect
        stencil_a.output_fields = stencil_b.output_fields
        stencil_a.boundary_conditions = stencil_b.boundary_conditions
        for edge in list(graph.out_edges(stencil_a)):
            if edge.src_conn == intermediate_name:
                graph.remove_edge(edge)
                del stencil_a._out_connectors[intermediate_name]
        for edge in graph.out_edges(stencil_b):
            stencil_a.add_out_connector(edge.src_conn)
            graph.add_edge(stencil_a, edge.src_conn, edge.dst, edge.dst_conn,
                           edge.data)

        # Add other stencil inputs of the second stencil to the first
        # In node and in connectors, reconnect
        for edge in graph.in_edges(stencil_b):
            # Skip edge to remove
            if edge.dst_conn == intermediate_name_b:
                continue
            if edge.dst_conn not in stencil_a.accesses:
                stencil_a.accesses[edge.dst_conn] = stencil_b.accesses[
                    edge.dst_conn]
                stencil_a.add_in_connector(edge.dst_conn)
                graph.add_edge(edge.src, edge.src_conn, stencil_a,
                               edge.dst_conn, edge.data)
            else:
                # If same input is accessed in both stencils, only append the
                # inputs that are new to stencil_a
                for access in stencil_b.accesses[edge.dst_conn][1]:
                    if access not in stencil_a.accesses[edge.dst_conn][1]:
                        stencil_a.accesses[edge.dst_conn][1].append(access)

        # Add second stencil's statements to first stencil, replacing the input
        # to the second stencil with the name of the output access
        if stencil_a.code.language == dace.Language.Python:
            # Replace first stencil's output with connector name
            for i, stmt in enumerate(stencil_a.code.code):
                stencil_a.code.code[i] = ReplaceSubscript({
                    intermediate_name:
                    intermediate_name_b
                }).visit(stmt)

            # Append second stencil's contents, using connector name instead of
            # accessing the intermediate transient
            # TODO: Use offsetted stencil
            for i, stmt in enumerate(stencil_b.code.code):
                stencil_a.code.code.append(
                    ReplaceSubscript({
                        intermediate_name_b: intermediate_name_b
                    }).visit(stmt))

        elif stencil_a.code.language == dace.Language.CPP:
            raise NotImplementedError
        else:
            raise ValueError('Unrecognized language: %s' %
                             stencil_a.code.language)

        # Remove array from graph
        graph.remove_node(array)
        del sdfg.arrays[array.data]

        # Remove 2nd stencil
        graph.remove_node(stencil_b)
Exemple #15
0
class BankSplit(transformation.Transformation):
    """
    A transformation that allow splitting an array and distribute it on another
    array with one dimension more, or vice versa. Works with arbitrary arrays,
    but its intended use case is to distribute data on many HBM-banks.
    Matches any 2 AccessNodes connected by an edge, if the dimensionality of the two accessed
    arrays differ by exactly one. The sizes of the arrays have to be large enough with
    respect to the split executed, but this is not verified. While it is allowed to use symbolics
    for the shapes of the array, it is expected that each dimension is divisible by the number
    of splits specified.

    When appling an unrolled map is generated around the accessnodes, which copies the parts of
    the array to the target array.

    Examples:
    Distribute: Suppose for example we copy from A to B, where A has shape [100, 100] and B shape
    [10, 100, 10]. We can distribute A in that case to B using the transformation by setting
    split_array_info=[1, 10]. A will then be divided along it's second dimension into 10 parts
    of size [100, 10] and distributed on B.
    Gather: Suppose A has shape [4, 50, 50] and B has shape [100, 100]. If one sets
    split_array_info to [2, 2] and applies the transformation, it will split
    equally in all dimensions.
    Therefore A[0] will be copied to B[0:50, 0:50], A[1] to B[0:50, 50:100], A[2] to B[50:100, 0:50] and
    A[3] to B[50:100, 50:100].

    Note that simply reversing the AccessNodes for the arrays in the above examples would
    have lead to the inverse operation, i.e. the gather would become a distribute and
    the other way around.
    """

    _src_node = nd.AccessNode("")
    _dst_node = nd.AccessNode("")

    # dtype=List[int]
    split_array_info = properties.Property(
        dtype=List,
        default=None,
        allow_none=True,
        desc="Describes how many times this array is split in each dimension, "
        "where the k-th number describes how many times dimension k is split. "
        "If the k-th number is 1 this means that the array is not split in "
        "the k-th dimension at all. "
        "If None, then the transform will split the first dimension exactly shape[0] times."
    )

    default_to_storage = properties.Property(
        dtype=dtypes.StorageType,
        default=dtypes.StorageType.CPU_Heap,
        allow_none=False,
        desc=
        "The storage type of involved arrays will be set to the value of this property if "
        "they have Default storage type. ")

    def _get_split_size(self, virtual_shape: Iterable,
                        split_count: List[int]) -> List[int]:
        """
        :return: the shape of a part-array on one HBMbank
        """
        new_shape_list = []
        for d in range(len(virtual_shape)):
            if split_count[d] != 1:
                new_shape_list.append(virtual_shape[d] // split_count[d])
            else:
                new_shape_list.append(virtual_shape[d])
        return new_shape_list

    @staticmethod
    def can_be_applied(graph: Union[SDFG, SDFGState],
                       candidate: Dict['PatternNode', int], expr_index: int,
                       sdfg: SDFG, strict: bool) -> bool:
        src = graph.nodes()[candidate[BankSplit._src_node]]
        dst = graph.nodes()[candidate[BankSplit._dst_node]]
        src_array = sdfg.arrays[src.data]
        dst_array = sdfg.arrays[dst.data]

        plain_array = lambda array: isinstance(
            array, data.Array) and not isinstance(array, data.View)

        if not plain_array(src_array):
            return False
        if not plain_array(dst_array):
            return False

        # same dimensions means HBM-array needs 1 dimension more
        collect_src = len(src_array.shape) - 1 == len(dst_array.shape)
        distribute_dst = len(src_array.shape) + 1 == len(dst_array.shape)
        if collect_src and symbolic.issymbolic(src_array.shape[0],
                                               sdfg.constants):
            return False
        elif distribute_dst and symbolic.issymbolic(dst_array.shape[0],
                                                    sdfg.constants):
            return False
        return collect_src or distribute_dst

    @staticmethod
    def expressions():
        return [
            utils.node_path_graph(BankSplit._src_node, BankSplit._dst_node)
        ]

    def apply(self, sdfg: SDFG) -> Union[Any, None]:
        # Load/parse infos from the SDFG
        graph = sdfg.nodes()[self.state_id]
        src = graph.nodes()[self.subgraph[BankSplit._src_node]]
        dst = graph.nodes()[self.subgraph[BankSplit._dst_node]]
        src_array = sdfg.arrays[src.data]
        dst_array = sdfg.arrays[dst.data]
        collect_src = len(src_array.shape) - 1 == len(
            dst_array.shape
        )  # If this is not true we have to distribute to dst (checked in can_apply)
        if collect_src:
            bank_count = int(src_array.shape[0])
            true_size = dst_array.shape
        else:
            bank_count = int(dst_array.shape[0])
            true_size = src_array.shape
        ndim = len(true_size)

        # Move Default storage
        if sdfg.arrays[src.data].storage == dtypes.StorageType.Default:
            sdfg.arrays[src.data].storage = self.default_to_storage
        if sdfg.arrays[dst.data].storage == dtypes.StorageType.Default:
            sdfg.arrays[dst.data].storage = self.default_to_storage

        # Figure out how to split
        if self.split_array_info is None:
            split_info = [1] * ndim
            split_info[0] = bank_count
        else:
            split_info = self.split_array_info
            if len(split_info) != ndim:
                raise RuntimeError(
                    "Length of split_array_info must match number of "
                    "dimensions")
        if functools.reduce(lambda a, b: a * b, split_info) != bank_count:
            raise RuntimeError(
                "Splitting is not possible with the selected splits"
                "and this number of HBM-banks (required number of banks "
                "!= actual number of banks)")

        # create the copy-subgraph
        ndrange = dict()
        usable_params = []
        for i in range(ndim):
            usable_params.append(f"i{i}")
        for i in range(ndim):
            ndrange[usable_params[i]] = f"0:{split_info[i]}"
        graph.remove_edge_and_connectors(graph.edges_between(src, dst)[0])
        copy_map_enter, copy_map_exit = graph.add_map(
            "hbm_bank_split", ndrange, dtypes.ScheduleType.Unrolled)
        graph.add_edge(copy_map_enter, None, src, None, memlet.Memlet())
        graph.add_edge(dst, None, copy_map_exit, None, memlet.Memlet())

        target_size = [
            str(x) for x in self._get_split_size(true_size, split_info)
        ]
        target_hbm_bank = []
        for i in range(ndim):
            target_hbm_bank.append(usable_params[i])
            for j in range(i):
                target_hbm_bank[j] = f"{split_info[i]}*{target_hbm_bank[j]}"
        target_offset = []
        for i in range(ndim):
            target_offset.append(f"{usable_params[i]}*{target_size[i]}")

        target_size_str = ", ".join(
            [f"{x}:{y}" for x, y in zip([0] * ndim, target_size)])
        target_hbm_bank_str = "+ ".join(target_hbm_bank)
        target_offset_str = ", ".join(
            [f"({x}):({x}+{y})" for x, y in zip(target_offset, target_size)])
        if collect_src:
            copy_memlet = memlet.Memlet(
                f"{src.data}[{target_hbm_bank_str}, {target_size_str}]->"
                f"{target_offset_str}")
        else:
            copy_memlet = memlet.Memlet(
                f"{src.data}[{target_offset_str}]->{target_hbm_bank_str}, "
                f"{target_size_str}")
        graph.add_edge(src, None, dst, None, copy_memlet)
Exemple #16
0
class MapReduceFusion(pm.Transformation):
    """ Implements the map-reduce-fusion transformation.
        Fuses a map with an immediately following reduction, where the array
        between the map and the reduction is not used anywhere else.
    """

    no_init = Property(dtype=bool,
                       default=False,
                       desc='If enabled, does not create initialization states '
                       'for reduce nodes with identity')

    _tasklet = nodes.Tasklet('_')
    _tmap_exit = nodes.MapExit(nodes.Map("", [], []))
    _in_array = nodes.AccessNode('_')

    import dace.libraries.standard as stdlib  # Avoid import loop
    _reduce = stdlib.Reduce()

    _out_array = nodes.AccessNode('_')

    @staticmethod
    def expressions():
        return [
            sdutil.node_path_graph(MapReduceFusion._tasklet,
                                   MapReduceFusion._tmap_exit,
                                   MapReduceFusion._in_array,
                                   MapReduceFusion._reduce,
                                   MapReduceFusion._out_array)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        tmap_exit = graph.nodes()[candidate[MapReduceFusion._tmap_exit]]
        in_array = graph.nodes()[candidate[MapReduceFusion._in_array]]
        reduce_node = graph.nodes()[candidate[MapReduceFusion._reduce]]
        tasklet = graph.nodes()[candidate[MapReduceFusion._tasklet]]

        # Make sure that the array is only accessed by the map and the reduce
        if any([
                src != tmap_exit
                for src, _, _, _, memlet in graph.in_edges(in_array)
        ]):
            return False
        if any([
                dest != reduce_node
                for _, _, dest, _, memlet in graph.out_edges(in_array)
        ]):
            return False

        tmem = next(e for e in graph.edges_between(tasklet, tmap_exit)
                    if e.data.data == in_array.data).data

        # (strict) Make sure that the transient is not accessed anywhere else
        # in this state or other states
        if strict and (len([
                n for n in graph.nodes()
                if isinstance(n, nodes.AccessNode) and n.data == in_array.data
        ]) > 1 or in_array.data in sdfg.shared_transients()):
            return False

        # If memlet already has WCR and it is different from reduce node,
        # do not match
        if tmem.wcr is not None and tmem.wcr != reduce_node.wcr:
            return False

        # Verify that reduction ranges match tasklet map
        tout_memlet = graph.in_edges(in_array)[0].data
        rin_memlet = graph.out_edges(in_array)[0].data
        if tout_memlet.subset != rin_memlet.subset:
            return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        tasklet = candidate[MapReduceFusion._tasklet]
        map_exit = candidate[MapReduceFusion._tmap_exit]
        reduce = candidate[MapReduceFusion._reduce]

        return ' -> '.join(str(node) for node in [tasklet, map_exit, reduce])

    def apply(self, sdfg: SDFG):
        graph = sdfg.nodes()[self.state_id]
        tmap_exit = graph.nodes()[self.subgraph[MapReduceFusion._tmap_exit]]
        in_array = graph.nodes()[self.subgraph[MapReduceFusion._in_array]]
        reduce_node = graph.nodes()[self.subgraph[MapReduceFusion._reduce]]
        out_array = graph.nodes()[self.subgraph[MapReduceFusion._out_array]]

        # Set nodes to remove according to the expression index
        nodes_to_remove = [in_array]
        nodes_to_remove.append(reduce_node)

        memlet_edge = None
        for edge in graph.in_edges(tmap_exit):
            if edge.data.data == in_array.data:
                memlet_edge = edge
                break
        if memlet_edge is None:
            raise RuntimeError('Reduction memlet cannot be None')

        # Find which indices should be removed from new memlet
        input_edge = graph.in_edges(reduce_node)[0]
        axes = reduce_node.axes or list(range(len(input_edge.data.subset)))
        array_edge = graph.out_edges(reduce_node)[0]

        # Delete relevant edges and nodes
        graph.remove_nodes_from(nodes_to_remove)

        # Filter out reduced dimensions from subset
        filtered_subset = [
            dim for i, dim in enumerate(memlet_edge.data.subset)
            if i not in axes
        ]
        if len(filtered_subset) == 0:  # Output is a scalar
            filtered_subset = [(0, 0, 1)]

        # Modify edge from tasklet to map exit
        memlet_edge.data.data = out_array.data
        memlet_edge.data.wcr = reduce_node.wcr
        memlet_edge.data.subset = type(memlet_edge.data.subset)(filtered_subset)

        # Add edge from map exit to output array
        graph.add_edge(
            memlet_edge.dst, 'OUT_' + memlet_edge.dst_conn[3:], array_edge.dst,
            array_edge.dst_conn,
            Memlet.simple(array_edge.data.data,
                          array_edge.data.subset,
                          num_accesses=array_edge.data.num_accesses,
                          wcr_str=reduce_node.wcr))

        # Add initialization state as necessary
        if reduce_node.identity is not None:
            init_state = sdfg.add_state_before(graph)
            init_state.add_mapped_tasklet(
                'freduce_init',
                [('o%d' % i, '%s:%s:%s' % (r[0], r[1] + 1, r[2]))
                 for i, r in enumerate(array_edge.data.subset)], {},
                'out = %s' % reduce_node.identity, {
                    'out':
                    Memlet.simple(
                        array_edge.data.data, ','.join([
                            'o%d' % i
                            for i in range(len(array_edge.data.subset))
                        ]))
                },
                external_edges=True)
Exemple #17
0
    def expand(self, sdfg, graph, reduce_node):
        """ Splits the data dimension into an inner and outer dimension,
            where the inner dimension are the reduction axes and the
            outer axes the complement. Pushes the reduce inside a new
            map consisting of the complement axes.

        """

        out_storage_node = graph.out_edges(reduce_node)[0].dst
        in_storage_node = graph.in_edges(reduce_node)[0].src
        wcr = reduce_node.wcr
        identity = reduce_node.identity
        schedule = reduce_node.schedule
        implementation = reduce_node.implementation
        if implementation and 'warp' in implementation:
            raise NotImplementedError(
                "WIP: Warp Reductions are not Implemented yet.")

        # remove the reduce identity
        # we will reassign it later after expanding
        reduce_node.identity = None
        # expand the reduce node
        in_edge = graph.in_edges(reduce_node)[0]
        nsdfg = self._expand_reduce(sdfg, graph, reduce_node)
        # find the new nodes in the nested sdfg created
        nstate = nsdfg.sdfg.nodes()[0]
        for node, scope in nstate.scope_dict().items():
            if isinstance(node, nodes.MapEntry):
                if scope is None:
                    outer_entry = node
                else:
                    inner_entry = node
            if isinstance(node, nodes.Tasklet):
                tasklet_node = node

        inner_exit = nstate.exit_node(inner_entry)
        outer_exit = nstate.exit_node(outer_entry)

        # find earliest parent read-write occurrence of array onto which
        # we perform the reduction:
        # do BFS, best complexity O(V+E)

        queue = [nsdfg]
        array_closest_ancestor = None
        while len(queue) > 0:
            current = queue.pop(0)
            if isinstance(current, nodes.AccessNode):
                if current.data == out_storage_node.data:
                    # it suffices to find the first node
                    # no matter what access (ReadWrite or Read)
                    array_closest_ancestor = current
                    break
            queue.extend([in_edge.src for in_edge in graph.in_edges(current)])

        # if ancestor doesn't exist:
        #           if non-transient: create data node accessing it
        #           if transient: ancestor_node = none, set_zero on outer node

        shortcut = False
        if (not array_closest_ancestor and sdfg.data(out_storage_node.data).transient) \
                                        or identity is not None:
            if self.debug:
                print("ReduceExpansion::Expanding Reduction into Map")
            # we are lucky
            shortcut = True
            nstate.out_edges(outer_exit)[0].data.wcr = None

        else:
            if self.debug:
                print("ReduceExpansion::Expanding Reduction into Map "
                      "and introducing update Tasklet, "
                      "connecting with ancestor.")
            if not array_closest_ancestor:
                array_closest_ancestor = nodes.AccessNode(
                    out_storage_node.data, access=dtypes.AccessType.ReadOnly)
                graph.add_node(array_closest_ancestor)
                # array_closest_ancestor now points to the node we want to connect
                # to the map entry

            # always have to create out transient in this case
            self.create_out_transient = True

        if self.create_out_transient:
            # create an out transient between inner and outer map exit
            array_out = nstate.out_edges(outer_exit)[0].data.data

            from dace.transformation.dataflow.local_storage import LocalStorage
            local_storage_subgraph = {
                LocalStorage.node_a:
                nsdfg.sdfg.nodes()[0].nodes().index(inner_exit),
                LocalStorage.node_b:
                nsdfg.sdfg.nodes()[0].nodes().index(outer_exit)
            }
            nsdfg_id = nsdfg.sdfg.sdfg_list.index(nsdfg.sdfg)
            nstate_id = 0
            local_storage = LocalStorage(nsdfg_id, nstate_id,
                                         local_storage_subgraph, 0)
            local_storage.array = array_out
            local_storage.apply(nsdfg.sdfg)
            out_transient_node_inner = local_storage._data_node

            # push to register
            nsdfg.sdfg.data(out_transient_node_inner.data
                            ).storage = dtypes.StorageType.Register
            if shortcut:
                nstate.out_edges(out_transient_node_inner)[0].data.wcr = None
                nstate.out_edges(out_transient_node_inner)[0].data.volume = 1

            if shortcut:
                nstate.out_edges(out_transient_node_inner)[0].data.wcr = None
                nstate.out_edges(out_transient_node_inner)[0].data.volume = 1

        if self.create_in_transient:
            # create an in-transient between inner and outer map entry
            array_in = nstate.in_edges(outer_entry)[0].data.data

            from dace.transformation.dataflow.local_storage import LocalStorage
            local_storage_subgraph = {
                LocalStorage.node_a:
                nsdfg.sdfg.nodes()[0].nodes().index(outer_entry),
                LocalStorage.node_b:
                nsdfg.sdfg.nodes()[0].nodes().index(inner_entry)
            }

            nsdfg_id = nsdfg.sdfg.sdfg_list.index(nsdfg.sdfg)
            nstate_id = 0
            local_storage = LocalStorage(nsdfg_id, nstate_id,
                                         local_storage_subgraph, 0)
            local_storage.array = array_in
            local_storage.apply(nsdfg.sdfg)
            in_transient_node_inner = local_storage._data_node

            # push to shared memory / default
            nsdfg.sdfg.data(in_transient_node_inner.data
                            ).storage = dtypes.StorageType.Register

        # first, inline fuse back our nested SDFG
        from dace.transformation.interstate import InlineSDFG
        inline_sdfg = InlineSDFG(
            sdfg.sdfg_list.index(sdfg),
            sdfg.nodes().index(graph),
            {InlineSDFG._nested_sdfg: graph.nodes().index(nsdfg)}, 0)
        inline_sdfg.apply(sdfg)
        if not shortcut:
            reduction_type = detect_reduction_type(wcr)
            try:
                code = ReduceExpansion.reduction_type_update[reduction_type]
            except KeyError:
                raise NotImplementedError(
                    "Not yet implemented for custom reduction")

            new_tasklet = graph.add_tasklet(
                name="reduction_transient_update",
                inputs={"reduction_in", "array_in"},
                outputs={"out"},
                code=code)

            edge_to_remove = graph.out_edges(out_transient_node_inner)[0] \
                             if self.create_out_transient \
                             else graph.out_edges(inner_exit)[0]

            new_memlet_array_inner = Memlet(data=out_storage_node.data,
                                            volume=1,
                                            subset=edge_to_remove.data.subset)
            new_memlet_array_outer = Memlet(
                data=array_closest_ancestor.data,
                volume=graph.in_edges(outer_entry)[0].data.volume,
                subset=subsets.Range.from_array(
                    sdfg.data(out_storage_node.data)))

            new_memlet_reduction = Memlet(
                data=graph.out_edges(inner_exit)[0].data.data,
                volume=1,
                subset=graph.out_edges(inner_exit)[0].data.subset)
            new_memlet_out_inner = Memlet(data=edge_to_remove.data.data,
                                          volume=1,
                                          subset=edge_to_remove.data.subset)
            new_memlet_out_outer = dcpy(new_memlet_array_outer)

            # remove old edges

            outer_edge_to_remove = None
            for edge in graph.out_edges(outer_exit):
                if edge.src == edge_to_remove.dst:
                    outer_edge_to_remove = edge

            graph.remove_edge_and_connectors(edge_to_remove)
            graph.remove_edge_and_connectors(outer_edge_to_remove)


            graph.add_edge(out_transient_node_inner if self.create_out_transient \
                                                    else inner_exit,
                           None,
                           new_tasklet,
                           "reduction_in",
                           new_memlet_reduction)

            graph.add_edge(outer_entry, None, new_tasklet, "array_in",
                           new_memlet_array_inner)
            graph.add_edge(array_closest_ancestor, None, outer_entry, None,
                           new_memlet_array_outer)
            graph.add_edge(new_tasklet, "out", outer_exit, None,
                           new_memlet_out_inner)
            graph.add_edge(outer_exit, None, out_storage_node, None,
                           new_memlet_out_outer)

            # fill map scope connectors
            graph.fill_scope_connectors()
            graph._clear_scopedict_cache()
            # wcr is already removed

        # FORNOW: choose default schedule and implementation
        new_schedule = dtypes.ScheduleType.Default
        new_implementation = self.reduce_implementation \
                             if self.reduce_implementation is not None \
                             else implementation
        new_axes = dcpy(reduce_node.axes)

        reduce_node_new = graph.add_reduce(wcr=wcr,
                                           axes=new_axes,
                                           schedule=new_schedule,
                                           identity=identity)
        reduce_node_new.implementation = new_implementation
        edge_tmp = graph.in_edges(inner_entry)[0]
        memlet_src_reduce = dcpy(edge_tmp.data)
        graph.add_edge(edge_tmp.src, edge_tmp.src_conn, reduce_node_new, None,
                       memlet_src_reduce)

        edge_tmp = graph.out_edges(inner_exit)[0]
        memlet_reduce_dst = Memlet(data=edge_tmp.data.data,
                                   volume=1,
                                   subset=edge_tmp.data.subset)

        graph.add_edge(reduce_node_new, None, edge_tmp.dst, edge_tmp.dst_conn,
                       memlet_reduce_dst)
        identity_tasklet = graph.out_edges(inner_entry)[0].dst
        graph.remove_node(inner_entry)
        graph.remove_node(inner_exit)
        graph.remove_node(identity_tasklet)

        # propagate scope for correct volumes
        scope_tree = ScopeTree(outer_entry, outer_exit)
        scope_tree.parent = ScopeTree(None, None)
        propagate_memlets_scope(sdfg, graph, scope_tree)
        sdfg.validate()

        # create variables for outside access
        self._new_reduce = reduce_node_new
        self._outer_entry = outer_entry

        if identity is None and self.create_out_transient:
            # set the reduction identity accordingly so that the correct
            # blank result is written to the out_transient node
            # we use default values deducted from the reduction type
            reduction_type = detect_reduction_type(wcr)
            try:
                reduce_node_new.identity = self.reduction_type_identity[
                    reduction_type]
            except KeyError:

                if reduction_type == dtypes.ReductionType.Min:
                    reduce_node_new.identity = dtypes.max_value(
                        sdfg.arrays[out_storage_node.data].dtype)
                elif reduction_type == dtypes.ReductionType.Max:
                    reduce_node_new.identity = dtypes.min_value(
                        sdfg.arrays[out_storage_node.data].dtype)
                else:
                    raise ValueError(f"Cannot infer reduction identity."
                                     "Please specify the identity of node"
                                     "{reduce_node_new}")

        return
Exemple #18
0
class RedundantSecondArray(pm.Transformation):
    """ Implements the redundant array removal transformation, applied
        when a transient array is copied from and to (from another array),
        but never used anywhere else. This transformation removes the second
        array. """

    _arrays_removed = 0
    _in_array = nodes.AccessNode("_")
    _out_array = nodes.AccessNode("_")

    @staticmethod
    def expressions():
        return [
            sdutil.node_path_graph(RedundantSecondArray._in_array,
                                   RedundantSecondArray._out_array)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        in_array = graph.nodes()[candidate[RedundantSecondArray._in_array]]
        out_array = graph.nodes()[candidate[RedundantSecondArray._out_array]]

        # Ensure in degree is one (only one source, which is in_array)
        if graph.in_degree(out_array) != 1:
            return False

        # Make sure that the candidate is a transient variable
        if not out_array.desc(sdfg).transient:
            return False

        # Make sure that both arrays are using the same storage location
        if in_array.desc(sdfg).storage != out_array.desc(sdfg).storage:
            return False

        # Find occurrences in this and other states
        occurrences = []
        for state in sdfg.nodes():
            occurrences.extend([
                n for n in state.nodes() if isinstance(n, nodes.AccessNode)
                and n.desc(sdfg) == out_array.desc(sdfg)
            ])

        if len(occurrences) > 1:
            return False

        # Only apply if arrays are of same shape (no need to modify memlet subset)
        # if len(in_array.desc(sdfg).shape) != len(
        #         out_array.desc(sdfg).shape) or any(i != o for i, o in zip(
        #             in_array.desc(sdfg).shape,
        #             out_array.desc(sdfg).shape)):
        #     return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        out_array = graph.nodes()[candidate[RedundantSecondArray._out_array]]

        return "Remove " + str(out_array)

    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        graph = sdfg.nodes()[self.state_id]
        in_array = gnode(RedundantSecondArray._in_array)
        out_array = gnode(RedundantSecondArray._out_array)
        memlet = graph.edges_between(in_array, out_array)[0].data
        if memlet.data == in_array.data:
            subset = memlet.subset
        else:
            subset = memlet.other_subset

        for e in graph.out_edges(out_array):
            # Modify all outgoing edges to point to in_array
            path = graph.memlet_tree(e)
            for pe in path:
                if pe.data.data == out_array.data:
                    pe.data.data = in_array.data
                    if isinstance(subset, subsets.Indices):
                        pe.data.subset.offset(subset, False)
                    else:
                        pe.data.subset = subset.compose(pe.data.subset)
                elif pe.data.other_subset:
                    if isinstance(subset, subsets.Indices):
                        pe.data.other_subset.offset(subset, False)
                    else:
                        pe.data.other_subset = subset.compose(
                            pe.data.other_subset)

            # Redirect edge to out_array
            graph.remove_edge(e)
            graph.add_edge(in_array, e.src_conn, e.dst, e.dst_conn, e.data)

        # Finally, remove out_array node
        graph.remove_node(out_array)
        # TODO: Should the array be removed from the SDFG?
        # del sdfg.arrays[out_array]
        if Config.get_bool("debugprint"):
            RedundantSecondArray._arrays_removed += 1
Exemple #19
0
class MatrixProductTranspose(transformation.Transformation):
    """ Implements the matrix-matrix product transpose transformation.

        T(A) @ T(B) = T(B @ A)
    """

    _transpose_a = blas.Transpose("")
    _at = nodes.AccessNode("")
    _transpose_b = blas.Transpose("")
    _bt = nodes.AccessNode("")
    _a_times_b = blas.MatMul("")

    @staticmethod
    def expressions():
        graph = dace.sdfg.graph.OrderedDiGraph()
        graph.add_node(MatrixProductTranspose._transpose_a)
        graph.add_node(MatrixProductTranspose._at)
        graph.add_node(MatrixProductTranspose._transpose_b)
        graph.add_node(MatrixProductTranspose._bt)
        graph.add_node(MatrixProductTranspose._a_times_b)
        graph.add_edge(MatrixProductTranspose._transpose_a,
                       MatrixProductTranspose._at, None)
        graph.add_edge(MatrixProductTranspose._at,
                       MatrixProductTranspose._a_times_b, None)
        graph.add_edge(MatrixProductTranspose._transpose_b,
                       MatrixProductTranspose._bt, None)
        graph.add_edge(MatrixProductTranspose._bt,
                       MatrixProductTranspose._a_times_b, None)
        return [graph]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, permissive=False):
        _at = graph.nodes()[candidate[MatrixProductTranspose._at]]
        _a_times_b = graph.nodes()[candidate[
            MatrixProductTranspose._a_times_b]]
        edges = graph.edges_between(_at, _a_times_b)
        # Enforce unique match
        if len(edges) != 1:
            return False
        _, _, _, dst_conn, _ = edges[0]
        if dst_conn != '_a':
            return False
        return True

    @staticmethod
    def match_to_str(graph, candidate):
        transpose_a = graph.nodes()[candidate[
            MatrixProductTranspose._transpose_a]]
        transpose_b = graph.nodes()[candidate[
            MatrixProductTranspose._transpose_b]]
        a_times_b = graph.nodes()[candidate[MatrixProductTranspose._a_times_b]]
        return f"{transpose_a.name} -> {a_times_b.name} <- {transpose_b.name}"

    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        transpose_a = graph.nodes()[self.subgraph[
            MatrixProductTranspose._transpose_a]]
        _at = graph.nodes()[self.subgraph[MatrixProductTranspose._at]]
        transpose_b = graph.nodes()[self.subgraph[
            MatrixProductTranspose._transpose_b]]
        _bt = graph.nodes()[self.subgraph[MatrixProductTranspose._bt]]
        a_times_b = graph.nodes()[self.subgraph[
            MatrixProductTranspose._a_times_b]]

        for src, src_conn, _, _, memlet in graph.in_edges(transpose_a):
            graph.add_edge(src, src_conn, a_times_b, '_b', memlet)
        graph.remove_node(transpose_a)
        for src, src_conn, _, _, memlet in graph.in_edges(transpose_b):
            graph.add_edge(src, src_conn, a_times_b, '_a', memlet)
        graph.remove_node(transpose_b)
        graph.remove_node(_at)
        graph.remove_node(_bt)

        for _, _, dst, dst_conn, memlet in graph.out_edges(a_times_b):
            subset = dcpy(memlet.subset)
            subset.squeeze()
            size = subset.size()
            shape = [size[1], size[0]]
            break
        tmp_name, tmp_arr = sdfg.add_temp_transient(shape, a_times_b.dtype)
        tmp_acc = graph.add_access(tmp_name)
        transpose_c = blas.Transpose('_Transpose_', a_times_b.dtype)
        for edge in graph.out_edges(a_times_b):
            _, _, dst, dst_conn, memlet = edge
            graph.remove_edge(edge)
            graph.add_edge(transpose_c, '_out', dst, dst_conn, memlet)
        graph.add_edge(a_times_b, '_c', tmp_acc, None,
                       dace.Memlet.from_array(tmp_name, tmp_arr))
        graph.add_edge(tmp_acc, None, transpose_c, '_inp',
                       dace.Memlet.from_array(tmp_name, tmp_arr))
Exemple #20
0
class RedundantArray(pm.Transformation):
    """ Implements the redundant array removal transformation, applied
        when a transient array is copied to and from (to another array),
        but never used anywhere else. """

    _arrays_removed = 0
    _in_array = nodes.AccessNode("_")
    _out_array = nodes.AccessNode("_")

    @staticmethod
    def expressions():
        return [
            sdutil.node_path_graph(RedundantArray._in_array,
                                   RedundantArray._out_array)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        in_array = graph.nodes()[candidate[RedundantArray._in_array]]
        out_array = graph.nodes()[candidate[RedundantArray._out_array]]

        # Ensure out degree is one (only one target, which is out_array)
        if graph.out_degree(in_array) != 1:
            return False

        # Make sure that the candidate is a transient variable
        if not in_array.desc(sdfg).transient:
            return False

        # Make sure that both arrays are using the same storage location
        if in_array.desc(sdfg).storage != out_array.desc(sdfg).storage:
            return False

        # Find occurrences in this and other states
        occurrences = []
        for state in sdfg.nodes():
            occurrences.extend([
                n for n in state.nodes() if isinstance(n, nodes.AccessNode)
                and n.desc(sdfg) == in_array.desc(sdfg)
            ])

        if len(occurrences) > 1:
            return False

        # Only apply if arrays are of same shape (no need to modify subset)
        if len(in_array.desc(sdfg).shape) != len(
                out_array.desc(sdfg).shape) or any(i != o for i, o in zip(
                    in_array.desc(sdfg).shape,
                    out_array.desc(sdfg).shape)):
            return False

        if strict:
            # In strict mode, make sure the memlet covers the removed array
            edge = graph.edges_between(in_array, out_array)[0]
            if any(m != a for m, a in zip(edge.data.subset.size(),
                                          in_array.desc(sdfg).shape)):
                return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        in_array = graph.nodes()[candidate[RedundantArray._in_array]]

        return "Remove " + str(in_array)

    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        graph = sdfg.nodes()[self.state_id]
        in_array = gnode(RedundantArray._in_array)
        out_array = gnode(RedundantArray._out_array)

        for e in graph.in_edges(in_array):
            # Modify all incoming edges to point to out_array
            path = graph.memlet_path(e)
            for pe in path:
                if pe.data.data == in_array.data:
                    pe.data.data = out_array.data

            # Redirect edge to out_array
            graph.remove_edge(e)
            graph.add_edge(e.src, e.src_conn, out_array, e.dst_conn, e.data)

        # Finally, remove in_array node
        graph.remove_node(in_array)
        # TODO: Should the array be removed from the SDFG?
        # del sdfg.arrays[in_array]
        if Config.get_bool("debugprint"):
            RedundantArray._arrays_removed += 1
Exemple #21
0
    def apply(self, sdfg: sd.SDFG):

        #######################################################
        # Step 0: SDFG metadata

        # Find all input and output data descriptors
        input_nodes = []
        output_nodes = []
        global_code_nodes = [[] for _ in sdfg.nodes()]

        for i, state in enumerate(sdfg.nodes()):
            sdict = state.scope_dict()
            for node in state.nodes():
                if (isinstance(node, nodes.AccessNode)
                        and node.desc(sdfg).transient == False):
                    if (state.out_degree(node) > 0
                            and node.data not in input_nodes):
                        # Special case: nodes that lead to top-level dynamic
                        # map ranges must stay on host
                        for e in state.out_edges(node):
                            last_edge = state.memlet_path(e)[-1]
                            if (isinstance(last_edge.dst, nodes.EntryNode)
                                    and last_edge.dst_conn and
                                    not last_edge.dst_conn.startswith('IN_')
                                    and sdict[last_edge.dst] is None):
                                break
                        else:
                            input_nodes.append((node.data, node.desc(sdfg)))
                    if (state.in_degree(node) > 0
                            and node.data not in output_nodes):
                        output_nodes.append((node.data, node.desc(sdfg)))
                elif isinstance(node, nodes.CodeNode) and sdict[node] is None:
                    if not isinstance(node,
                                      (nodes.LibraryNode, nodes.NestedSDFG)):
                        global_code_nodes[i].append(node)

            # Input nodes may also be nodes with WCR memlets and no identity
            for e in state.edges():
                if e.data.wcr is not None:
                    if (e.data.data not in input_nodes
                            and sdfg.arrays[e.data.data].transient == False):
                        input_nodes.append(
                            (e.data.data, sdfg.arrays[e.data.data]))

        start_state = sdfg.start_state
        end_states = sdfg.sink_nodes()

        #######################################################
        # Step 1: Create cloned GPU arrays and replace originals

        cloned_arrays = {}
        for inodename, inode in set(input_nodes):
            if isinstance(inode, data.Scalar):  # Scalars can remain on host
                continue
            if inode.storage == dtypes.StorageType.GPU_Global:
                continue
            newdesc = inode.clone()
            newdesc.storage = dtypes.StorageType.GPU_Global
            newdesc.transient = True
            name = sdfg.add_datadesc('gpu_' + inodename,
                                     newdesc,
                                     find_new_name=True)
            cloned_arrays[inodename] = name

        for onodename, onode in set(output_nodes):
            if onodename in cloned_arrays:
                continue
            if onode.storage == dtypes.StorageType.GPU_Global:
                continue
            newdesc = onode.clone()
            newdesc.storage = dtypes.StorageType.GPU_Global
            newdesc.transient = True
            name = sdfg.add_datadesc('gpu_' + onodename,
                                     newdesc,
                                     find_new_name=True)
            cloned_arrays[onodename] = name

        # Replace nodes
        for state in sdfg.nodes():
            for node in state.nodes():
                if (isinstance(node, nodes.AccessNode)
                        and node.data in cloned_arrays):
                    node.data = cloned_arrays[node.data]

        # Replace memlets
        for state in sdfg.nodes():
            for edge in state.edges():
                if edge.data.data in cloned_arrays:
                    edge.data.data = cloned_arrays[edge.data.data]

        #######################################################
        # Step 2: Create copy-in state
        excluded_copyin = self.exclude_copyin.split(',')

        copyin_state = sdfg.add_state(sdfg.label + '_copyin')
        sdfg.add_edge(copyin_state, start_state, sd.InterstateEdge())

        for nname, desc in dtypes.deduplicate(input_nodes):
            if nname in excluded_copyin or nname not in cloned_arrays:
                continue
            src_array = nodes.AccessNode(nname, debuginfo=desc.debuginfo)
            dst_array = nodes.AccessNode(cloned_arrays[nname],
                                         debuginfo=desc.debuginfo)
            copyin_state.add_node(src_array)
            copyin_state.add_node(dst_array)
            copyin_state.add_nedge(
                src_array, dst_array,
                memlet.Memlet.from_array(src_array.data, src_array.desc(sdfg)))

        #######################################################
        # Step 3: Create copy-out state
        excluded_copyout = self.exclude_copyout.split(',')

        copyout_state = sdfg.add_state(sdfg.label + '_copyout')
        for state in end_states:
            sdfg.add_edge(state, copyout_state, sd.InterstateEdge())

        for nname, desc in dtypes.deduplicate(output_nodes):
            if nname in excluded_copyout or nname not in cloned_arrays:
                continue
            src_array = nodes.AccessNode(cloned_arrays[nname],
                                         debuginfo=desc.debuginfo)
            dst_array = nodes.AccessNode(nname, debuginfo=desc.debuginfo)
            copyout_state.add_node(src_array)
            copyout_state.add_node(dst_array)
            copyout_state.add_nedge(
                src_array, dst_array,
                memlet.Memlet.from_array(dst_array.data, dst_array.desc(sdfg)))

        #######################################################
        # Step 4: Modify transient data storage

        for state in sdfg.nodes():
            sdict = state.scope_dict()
            for node in state.nodes():
                if isinstance(node,
                              nodes.AccessNode) and node.desc(sdfg).transient:
                    nodedesc = node.desc(sdfg)

                    # Special case: nodes that lead to dynamic map ranges must
                    # stay on host
                    if any(
                            isinstance(
                                state.memlet_path(e)[-1].dst, nodes.EntryNode)
                            for e in state.out_edges(node)):
                        continue

                    gpu_storage = [
                        dtypes.StorageType.GPU_Global,
                        dtypes.StorageType.GPU_Shared,
                        dtypes.StorageType.CPU_Pinned
                    ]
                    if sdict[
                            node] is None and nodedesc.storage not in gpu_storage:
                        # NOTE: the cloned arrays match too but it's the same
                        # storage so we don't care
                        nodedesc.storage = dtypes.StorageType.GPU_Global

                        # Try to move allocation/deallocation out of loops
                        if (self.toplevel_trans
                                and not isinstance(nodedesc, data.Stream)):
                            nodedesc.lifetime = dtypes.AllocationLifetime.SDFG
                    elif nodedesc.storage not in gpu_storage:
                        # Make internal transients registers
                        if self.register_trans:
                            nodedesc.storage = dtypes.StorageType.Register

        #######################################################
        # Step 5: Wrap free tasklets and nested SDFGs with a GPU map

        for state, gcodes in zip(sdfg.nodes(), global_code_nodes):
            for gcode in gcodes:
                if gcode.label in self.exclude_tasklets.split(','):
                    continue
                # Create map and connectors
                me, mx = state.add_map(gcode.label + '_gmap',
                                       {gcode.label + '__gmapi': '0:1'},
                                       schedule=dtypes.ScheduleType.GPU_Device)
                # Store in/out edges in lists so that they don't get corrupted
                # when they are removed from the graph
                in_edges = list(state.in_edges(gcode))
                out_edges = list(state.out_edges(gcode))
                me.in_connectors = {('IN_' + e.dst_conn): None
                                    for e in in_edges}
                me.out_connectors = {('OUT_' + e.dst_conn): None
                                     for e in in_edges}
                mx.in_connectors = {('IN_' + e.src_conn): None
                                    for e in out_edges}
                mx.out_connectors = {('OUT_' + e.src_conn): None
                                     for e in out_edges}

                # Create memlets through map
                for e in in_edges:
                    state.remove_edge(e)
                    state.add_edge(e.src, e.src_conn, me, 'IN_' + e.dst_conn,
                                   e.data)
                    state.add_edge(me, 'OUT_' + e.dst_conn, e.dst, e.dst_conn,
                                   e.data)
                for e in out_edges:
                    state.remove_edge(e)
                    state.add_edge(e.src, e.src_conn, mx, 'IN_' + e.src_conn,
                                   e.data)
                    state.add_edge(mx, 'OUT_' + e.src_conn, e.dst, e.dst_conn,
                                   e.data)

                # Map without inputs
                if len(in_edges) == 0:
                    state.add_nedge(me, gcode, memlet.Memlet())
        #######################################################
        # Step 6: Change all top-level maps and library nodes to GPU schedule

        for i, state in enumerate(sdfg.nodes()):
            sdict = state.scope_dict()
            for node in state.nodes():
                if isinstance(node, (nodes.EntryNode, nodes.LibraryNode)):
                    if sdict[node] is None:
                        node.schedule = dtypes.ScheduleType.GPU_Device
                    elif (isinstance(node,
                                     (nodes.EntryNode, nodes.LibraryNode))
                          and self.sequential_innermaps):
                        node.schedule = dtypes.ScheduleType.Sequential

        #######################################################
        # Step 7: Introduce copy-out if data used in outgoing interstate edges

        for state in list(sdfg.nodes()):
            arrays_used = set()
            for e in sdfg.out_edges(state):
                # Used arrays = intersection between symbols and cloned arrays
                arrays_used.update(
                    set(e.data.free_symbols)
                    & set(cloned_arrays.keys()))

            # Create a state and copy out used arrays
            if len(arrays_used) > 0:
                co_state = sdfg.add_state(state.label + '_icopyout')

                # Reconnect outgoing edges to after interim copyout state
                for e in sdfg.out_edges(state):
                    sdutil.change_edge_src(sdfg, state, co_state)
                # Add unconditional edge to interim state
                sdfg.add_edge(state, co_state, sd.InterstateEdge())

                # Add copy-out nodes
                for nname in arrays_used:
                    desc = sdfg.arrays[nname]
                    src_array = nodes.AccessNode(cloned_arrays[nname],
                                                 debuginfo=desc.debuginfo)
                    dst_array = nodes.AccessNode(nname,
                                                 debuginfo=desc.debuginfo)
                    co_state.add_node(src_array)
                    co_state.add_node(dst_array)
                    co_state.add_nedge(
                        src_array, dst_array,
                        memlet.Memlet.from_array(dst_array.data,
                                                 dst_array.desc(sdfg)))

        #######################################################
        # Step 8: Strict transformations
        if not self.strict_transform:
            return

        # Apply strict state fusions greedily.
        sdfg.apply_strict_transformations()
    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        node_a = self.node_a(sdfg)
        node_b = self.node_b(sdfg)

        # Determine direction of new memlet
        scope_dict = graph.scope_dict()
        propagate_forward = sd.scope_contains_scope(scope_dict, node_a, node_b)

        array = self.array
        if array is None or len(array) == 0:
            array = next(e.data.data
                         for e in graph.edges_between(node_a, node_b)
                         if e.data.data is not None and e.data.wcr is None)

        original_edge = None
        invariant_memlet = None
        for edge in graph.edges_between(node_a, node_b):
            if array == edge.data.data:
                original_edge = edge
                invariant_memlet = edge.data
                break
        if invariant_memlet is None:
            for edge in graph.edges_between(node_a, node_b):
                original_edge = edge
                invariant_memlet = edge.data
                warnings.warn('Array %s not found! Using array %s instead.' %
                              (array, invariant_memlet.data))
                array = invariant_memlet.data
                break
        if invariant_memlet is None:
            raise NameError('Array %s not found!' % array)

        # Add transient array
        new_data, _ = sdfg.add_array('trans_' + invariant_memlet.data, [
            symbolic.overapproximate(r)
            for r in invariant_memlet.bounding_box_size()
        ],
                                     sdfg.arrays[invariant_memlet.data].dtype,
                                     transient=True,
                                     find_new_name=True)
        data_node = nodes.AccessNode(new_data)

        # Store as fields so that other transformations can use them
        self._local_name = new_data
        self._data_node = data_node

        to_data_mm = copy.deepcopy(invariant_memlet)
        from_data_mm = copy.deepcopy(invariant_memlet)
        offset = subsets.Indices([r[0] for r in invariant_memlet.subset])

        # Reconnect, assuming one edge to the access node
        graph.remove_edge(original_edge)
        if propagate_forward:
            graph.add_edge(node_a, original_edge.src_conn, data_node, None,
                           to_data_mm)
            new_edge = graph.add_edge(data_node, None, node_b,
                                      original_edge.dst_conn, from_data_mm)
        else:
            new_edge = graph.add_edge(node_a, original_edge.src_conn,
                                      data_node, None, to_data_mm)
            graph.add_edge(data_node, None, node_b, original_edge.dst_conn,
                           from_data_mm)

        # Offset all edges in the memlet tree (including the new edge)
        for edge in graph.memlet_tree(new_edge):
            edge.data.subset.offset(offset, True)
            edge.data.data = new_data

        return data_node
Exemple #23
0
    def __init__(self, name, model: onnx.ModelProto, cuda=False):
        """
        Constructs a new ONNXImporter.
        :param name: the name for the SDFG.
        :param model: the model to import.
        :param cuda: if `True`, weights will be passed as cuda arrays.
        """

        graph: onnx.GraphProto = model.graph

        self.sdfg = SDFG(name)
        self.cuda = cuda
        self.state = self.sdfg.add_state()

        # Add all values to the SDFG, check for unsupported ops
        ##########################################

        self.value_infos = {}

        self.inputs = []
        self.outputs = []

        for value, is_input in chain(zip(graph.input, repeat(True)),
                                     zip(graph.output, repeat(False))):
            if not value.HasField("name"):
                raise ValueError("Got input or output without name")
            if is_input:
                self.inputs.append(value.name)
            else:
                self.outputs.append(value.name)

            self.value_infos[value.name] = value
            self._add_value_info(value)

        for value in graph.value_info:
            if not value.HasField("name"):
                raise ValueError("Got input or output without name")
            if value.name not in self.value_infos:
                self.value_infos[value.name] = value

        # add weights
        self.weights = {}
        for init in graph.initializer:
            self._add_constant_tensor(init)

        access_nodes = {}
        self._idx_to_node = []
        for i, node in enumerate(graph.node):
            if not has_onnx_node(node.op_type):
                raise ValueError("Unsupported ONNX operator: '{}'".format(
                    node.op_type))

            # extract the op attributes

            op_attributes = {
                attribute_proto.name: convert_attribute_proto(attribute_proto)
                for attribute_proto in node.attribute
            }

            if node.HasField("name"):
                node_name = clean_onnx_name(node.name)
            else:
                node_name = node.op_type + "_" + str(i)

            # construct the dace node
            op_node = get_onnx_node(node.op_type)(node_name, **op_attributes)
            self.state.add_node(op_node)
            self._idx_to_node.append(op_node)

            for param_idx, (name, is_input) in chain(
                    enumerate(zip(node.input, repeat(True))),
                    enumerate(zip(node.output, repeat(False)))):
                if clean_onnx_name(name) not in self.sdfg.arrays:
                    if name not in self.value_infos:
                        raise ValueError(
                            "Could not find array with name '{}'".format(name))
                    self._add_value_info(self.value_infos[name])

                # get the access node
                if name in access_nodes:
                    access = access_nodes[name]
                    self._update_access_type(access, is_input)
                else:
                    access = nd.AccessNode(
                        clean_onnx_name(name), AccessType.ReadOnly
                        if is_input else AccessType.WriteOnly)
                    self.state.add_node(access)
                    access_nodes[name] = access

                # get the connector name
                params = op_node.schema.inputs if is_input else op_node.schema.outputs
                params_len = len(params)
                if param_idx >= params_len:
                    # this is a variadic parameter. Then the last parameter of the parameter must be variadic.
                    if params[-1].param_type != ONNXParameterType.Variadic:
                        raise ValueError(
                            "Expected the last {i_or_o} parameter to be variadic,"
                            " since the {i_or_o} with idx {param_idx} has more parameters than the schema ({params_len})"
                            .format(i_or_o="input" if is_input else "output",
                                    param_idx=param_idx,
                                    params_len=params_len))
                    conn_name = params[-1].name + "__" + str(param_idx -
                                                             params_len + 1)
                elif params[
                        param_idx].param_type == ONNXParameterType.Variadic:
                    # this is a variadic parameter, and it is within the range of params, so it must be the first
                    # instance of a variadic parameter
                    conn_name = params[param_idx].name + "__0"
                else:
                    conn_name = params[param_idx].name

                data_desc = self.sdfg.arrays[clean_onnx_name(name)]

                # add the connector if required, and add an edge
                if is_input:
                    if conn_name not in op_node.in_connectors:
                        op_node.add_in_connector(conn_name)
                    self.state.add_edge(
                        access, None, op_node, conn_name,
                        dace.Memlet.from_array(clean_onnx_name(name),
                                               data_desc))
                else:
                    if conn_name not in op_node.out_connectors:
                        op_node.add_out_connector(conn_name)

                    self.state.add_edge(
                        op_node, conn_name, access, None,
                        dace.Memlet.from_array(clean_onnx_name(name),
                                               data_desc))

        if self.cuda:
            self.sdfg.apply_strict_transformations()
            self.sdfg.apply_gpu_transformations()
            self.sdfg.apply_strict_transformations()

            # set all gpu transients to be persistent
            for _, _, arr in self.sdfg.arrays_recursive():
                if arr.transient and arr.storage == StorageType.GPU_Global:
                    arr.lifetime = AllocationLifetime.Persistent
class RedundantArrayCopyingIn(pm.Transformation):
    """ Implements the redundant array removal transformation. Removes the first and second access nodeds
        in pattern A -> B -> A
    """

    _arrays_removed = 0
    _in_array = nodes.AccessNode("_")
    _med_array = nodes.AccessNode("_")
    _out_array = nodes.AccessNode("_")

    @staticmethod
    def expressions():
        return [
            sdutil.node_path_graph(
                RedundantArrayCopying._in_array,
                RedundantArrayCopying._med_array,
                RedundantArrayCopying._out_array,
            )
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        in_array = graph.nodes()[candidate[RedundantArrayCopying._in_array]]
        med_array = graph.nodes()[candidate[RedundantArrayCopying._med_array]]
        out_array = graph.nodes()[candidate[RedundantArrayCopying._out_array]]

        # Safety first (could be relaxed)
        if not (graph.out_degree(in_array) == 1 and graph.in_degree(med_array)
                == 1 and graph.out_degree(med_array)):
            return False

        # Make sure that the removal candidates are transient
        if not (in_array.desc(sdfg).transient
                and med_array.desc(sdfg).transient):
            return False

        # Make sure that both arrays are using the same storage location
        if in_array.desc(sdfg).storage != out_array.desc(sdfg).storage:
            return False

        # Only apply if arrays are of same shape (no need to modify memlet subset)
        if len(in_array.desc(sdfg).shape) != len(
                out_array.desc(sdfg).shape) or any(i != o for i, o in zip(
                    in_array.desc(sdfg).shape,
                    out_array.desc(sdfg).shape)):
            return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        in_array = graph.nodes()[candidate[RedundantArrayCopying._in_array]]
        med_array = graph.nodes()[candidate[RedundantArrayCopying._med_array]]

        return "Remove " + str(in_array) + " and " + str(med_array)

    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        graph = sdfg.nodes()[self.state_id]
        in_array = gnode(RedundantArrayCopying._in_array)
        med_array = gnode(RedundantArrayCopying._med_array)
        out_array = gnode(RedundantArrayCopying._out_array)

        # Modify all edges that point to in_array to point to out_array
        for in_edge in graph.in_edges(in_array):

            # Make all memlets that write to in_array write to out_array instead
            tree = graph.memlet_tree(in_edge)
            for te in tree:
                if te.data.data == in_array.data:
                    te.data.data = out_array.data

            # Redirect edge to in_array
            graph.remove_edge(in_edge)
            graph.add_edge(in_edge.src, in_edge.src_conn, out_array, None,
                           in_edge.data)

        graph.remove_node(med_array)
        graph.remove_node(in_array)
Exemple #25
0
    def apply(self, sdfg):
        state = sdfg.nodes()[self.state_id]
        nested_sdfg = state.nodes()[self.subgraph[CopyToDevice._nested_sdfg]]
        storage = self.storage
        created_arrays = set()

        for _, edge in enumerate(state.in_edges(nested_sdfg)):

            src, src_conn, dst, dst_conn, memlet = edge
            dataname = memlet.data
            if dataname is None:
                continue
            memdata = sdfg.arrays[dataname]

            name = 'device_' + dataname + '_in'
            if name not in created_arrays:
                if isinstance(memdata, data.Array):
                    name, _ = sdfg.add_array(
                        'device_' + dataname + '_in',
                        shape=[
                            symbolic.overapproximate(r)
                            for r in memlet.bounding_box_size()
                        ],
                        dtype=memdata.dtype,
                        transient=True,
                        storage=storage,
                        find_new_name=True)
                elif isinstance(memdata, data.Scalar):
                    name, _ = sdfg.add_scalar('device_' + dataname + '_in',
                                              dtype=memdata.dtype,
                                              transient=True,
                                              storage=storage,
                                              find_new_name=True)
                else:
                    raise NotImplementedError
                created_arrays.add(name)

            data_node = nodes.AccessNode(name)

            to_data_mm = dcpy(memlet)
            from_data_mm = dcpy(memlet)
            from_data_mm.data = name
            offset = []
            for ind, r in enumerate(memlet.subset):
                offset.append(r[0])
                if isinstance(memlet.subset[ind], tuple):
                    begin = memlet.subset[ind][0] - r[0]
                    end = memlet.subset[ind][1] - r[0]
                    step = memlet.subset[ind][2]
                    from_data_mm.subset[ind] = (begin, end, step)
                else:
                    from_data_mm.subset[ind] -= r[0]

            state.remove_edge(edge)
            state.add_edge(src, src_conn, data_node, None, to_data_mm)
            state.add_edge(data_node, None, dst, dst_conn, from_data_mm)

        for _, edge in enumerate(state.out_edges(nested_sdfg)):

            src, src_conn, dst, dst_conn, memlet = edge
            dataname = memlet.data
            if dataname is None:
                continue
            memdata = sdfg.arrays[dataname]

            name = 'device_' + dataname + '_out'
            if name not in created_arrays:
                if isinstance(memdata, data.Array):
                    name, _ = sdfg.add_array(
                        name,
                        shape=[
                            symbolic.overapproximate(r)
                            for r in memlet.bounding_box_size()
                        ],
                        dtype=memdata.dtype,
                        transient=True,
                        storage=storage,
                        find_new_name=True)
                elif isinstance(memdata, data.Scalar):
                    name, _ = sdfg.add_scalar(name,
                                              dtype=memdata.dtype,
                                              transient=True,
                                              storage=storage)
                else:
                    raise NotImplementedError
                created_arrays.add(name)

            data_node = nodes.AccessNode(name)

            to_data_mm = dcpy(memlet)
            from_data_mm = dcpy(memlet)
            to_data_mm.data = name
            offset = []
            for ind, r in enumerate(memlet.subset):
                offset.append(r[0])
                if isinstance(memlet.subset[ind], tuple):
                    begin = memlet.subset[ind][0] - r[0]
                    end = memlet.subset[ind][1] - r[0]
                    step = memlet.subset[ind][2]
                    to_data_mm.subset[ind] = (begin, end, step)
                else:
                    to_data_mm.subset[ind] -= r[0]

            state.remove_edge(edge)
            state.add_edge(src, src_conn, data_node, None, to_data_mm)
            state.add_edge(data_node, None, dst, dst_conn, from_data_mm)

        # Change storage for all data inside nested SDFG to device.
        change_storage(nested_sdfg.sdfg, storage)
Exemple #26
0
class OnTheFlyMapFusion(Transformation):
    _first_map_entry = nodes.MapEntry(nodes.Map('', [], []))
    _first_tasklet = nodes.Tasklet('')
    _first_map_exit = nodes.MapExit(nodes.Map('', [], []))
    _array_access = nodes.AccessNode('')
    _second_map_entry = nodes.MapEntry(nodes.Map('', [], []))
    _second_tasklet = nodes.Tasklet('')

    @staticmethod
    def expressions():
        return [
            sdutils.node_path_graph(OnTheFlyMapFusion._first_map_entry,
                                    OnTheFlyMapFusion._first_tasklet,
                                    OnTheFlyMapFusion._first_map_exit,
                                    OnTheFlyMapFusion._array_access,
                                    OnTheFlyMapFusion._second_map_entry,
                                    OnTheFlyMapFusion._second_tasklet)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        first_map_entry = graph.node(
            candidate[OnTheFlyMapFusion._first_map_entry])
        first_tasklet = graph.node(candidate[OnTheFlyMapFusion._first_tasklet])
        first_map_exit = graph.node(
            candidate[OnTheFlyMapFusion._first_map_exit])
        array_access = graph.node(candidate[OnTheFlyMapFusion._array_access])

        if len(first_map_exit.in_connectors) != 1:
            return False

        if (graph.in_degree(array_access) != 1
                or graph.out_degree(array_access) != 1):
            return False
        return True

    @staticmethod
    def _memlet_offsets(base_memlet, offset_memlet):
        """ Compute subset offset of `offset_memlet` relative to `base_memlet`.
        """
        def offset(base_range, offset_range):
            b0, e0, s0 = base_range
            b1, e1, s1 = offset_range
            assert e1 - e0 == b1 - b0 and s0 == s1
            return int(e1 - e0)

        return tuple(
            offset(b, o) for b, o in zip(base_memlet.subset.ranges,
                                         offset_memlet.subset.ranges))

    @staticmethod
    def _update_map_connectors(state, array_access, first_map_entry,
                               second_map_entry):
        """ Remove unused connector (of the to-be-replaced array) from second
            map entry, add new connectors to second map entry for the inputs
            used in the first map’s tasklets.
        """
        # Remove edges and connectors from arrays access to second map entry
        for edge in state.edges_between(array_access, second_map_entry):
            state.remove_edge_and_connectors(edge)
        state.remove_node(array_access)

        # Add new connectors to second map
        # TODO: implement for the general case with random naming
        for edge in state.in_edges(first_map_entry):
            if second_map_entry.add_in_connector(edge.dst_conn):
                state.add_edge(edge.src, edge.src_conn, second_map_entry,
                               edge.dst_conn, edge.data)

    @staticmethod
    def _read_offsets(state, array_name, first_map_exit, second_map_entry):
        """ Compute offsets of read accesses in second map.
        """
        # Get output memlet of first tasklet
        output_edges = state.in_edges(first_map_exit)
        assert len(output_edges) == 1
        write_memlet = output_edges[0].data

        # Find read offsets by looping over second map entry connectors
        offsets = defaultdict(list)
        for edge in state.out_edges(second_map_entry):
            if edge.data.data == array_name:
                second_map_entry.remove_out_connector(edge.src_conn)
                state.remove_edge(edge)
                offset = OnTheFlyMapFusion._memlet_offsets(
                    write_memlet, edge.data)
                offsets[offset].append(edge)

        return offsets

    @staticmethod
    def _copy_first_map_contents(state, first_map_entry, first_map_exit):
        nodes = list(
            state.all_nodes_between(first_map_entry, first_map_exit) -
            {first_map_entry})
        new_nodes = [copy.deepcopy(node) for node in nodes]
        for node in new_nodes:
            state.add_node(node)
        id_map = {
            state.node_id(old): state.node_id(new)
            for old, new in zip(nodes, new_nodes)
        }

        def map(node):
            return state.node(id_map[state.node_id(node)])

        for edge in state.edges():
            if edge.src in nodes or edge.dst in nodes:
                src = map(edge.src) if edge.src in nodes else edge.src
                dst = map(edge.dst) if edge.dst in nodes else edge.dst
                state.add_edge(src, edge.src_conn, dst, edge.dst_conn,
                               copy.deepcopy(edge.data))

        return new_nodes

    def _replicate_first_map(self, sdfg, array_access, first_map_entry,
                             first_map_exit, second_map_entry):
        """ Replicate tasklet of first map for reach read access in second map.
        """
        state = sdfg.node(self.state_id)
        array_name = array_access.data
        array = sdfg.arrays[array_name]

        read_offsets = self._read_offsets(state, array_name, first_map_exit,
                                          second_map_entry)

        # Replicate first map tasklets once for each read offset access and
        # connect them to other tasklets accordingly
        for offset, edges in read_offsets.items():
            nodes = self._copy_first_map_contents(state, first_map_entry,
                                                  first_map_exit)
            tmp_name = sdfg.temp_data_name()
            sdfg.add_scalar(tmp_name, array.dtype, transient=True)
            tmp_access = state.add_access(tmp_name)

            for node in nodes:
                for edge in state.edges_between(node, first_map_exit):
                    state.add_edge(edge.src, edge.src_conn, tmp_access, None,
                                   dace.Memlet(tmp_name))
                    state.remove_edge(edge)

                for edge in state.edges_between(first_map_entry, node):
                    memlet = copy.deepcopy(edge.data)
                    memlet.subset.offset(list(offset), negative=False)
                    second_map_entry.add_out_connector(edge.src_conn)
                    state.add_edge(second_map_entry, edge.src_conn, node,
                                   edge.dst_conn, memlet)
                    state.remove_edge(edge)

            for edge in edges:
                state.add_edge(tmp_access, None, edge.dst, edge.dst_conn,
                               dace.Memlet(tmp_name))

    def apply(self, sdfg: dace.SDFG):
        state = sdfg.node(self.state_id)
        first_map_entry = state.node(self.subgraph[self._first_map_entry])
        first_tasklet = state.node(self.subgraph[self._first_tasklet])
        first_map_exit = state.node(self.subgraph[self._first_map_exit])
        array_access = state.node(self.subgraph[self._array_access])
        second_map_entry = state.node(self.subgraph[self._second_map_entry])

        self._update_map_connectors(state, array_access, first_map_entry,
                                    second_map_entry)

        self._replicate_first_map(sdfg, array_access, first_map_entry,
                                  first_map_exit, second_map_entry)

        state.remove_nodes_from(
            state.all_nodes_between(first_map_entry, first_map_exit)
            | {first_map_exit})