예제 #1
0
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        in_array = graph.nodes()[candidate[RedundantArray._in_array]]
        out_array = graph.nodes()[candidate[RedundantArray._out_array]]

        in_desc = in_array.desc(sdfg)
        out_desc = out_array.desc(sdfg)

        # Ensure out degree is one (only one target, which is out_array)
        if graph.out_degree(in_array) != 1:
            return False

        # Make sure that the candidate is a transient variable
        if not in_desc.transient:
            return False

        # Make sure that both arrays are using the same storage location
        # and are of the same type (e.g., Stream->Stream)
        if in_desc.storage != out_desc.storage:
            return False
        if type(in_desc) != type(out_desc):
            return False

        # Find occurrences in this and other states
        occurrences = []
        for state in sdfg.nodes():
            occurrences.extend([
                n for n in state.nodes()
                if isinstance(n, nodes.AccessNode) and n.desc(sdfg) == in_desc
            ])
        for isedge in sdfg.edges():
            if in_array.data in isedge.data.free_symbols:
                occurrences.append(isedge)

        if len(occurrences) > 1:
            return False

        # Only apply if arrays are of same shape (no need to modify subset)
        if len(in_desc.shape) != len(out_desc.shape) or any(
                i != o for i, o in zip(in_desc.shape, out_desc.shape)):
            return False

        if strict:
            # In strict mode, make sure the memlet covers the removed array
            edge = graph.edges_between(in_array, out_array)[0]
            if any(m != a
                   for m, a in zip(edge.data.subset.size(), in_desc.shape)):
                return False

        return True
예제 #2
0
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        nested_sdfg = graph.nodes()[candidate[CopyToDevice._nested_sdfg]]

        for edge in graph.all_edges(nested_sdfg):
            # Stream inputs/outputs not allowed
            path = graph.memlet_path(edge)
            if ((isinstance(path[0].src, nodes.AccessNode)
                 and isinstance(sdfg.arrays[path[0].src.data], data.Stream)) or
                (isinstance(path[-1].dst, nodes.AccessNode)
                 and isinstance(sdfg.arrays[path[-1].dst.data], data.Stream))):
                return False
            # WCR outputs with arrays are not allowed
            if (edge.data.wcr is not None
                    and edge.data.subset.num_elements() != 1):
                return False

        return True
예제 #3
0
 def match_to_str(graph, candidate):
     nested_sdfg = graph.nodes()[candidate[CopyToDevice._nested_sdfg]]
     return nested_sdfg.label
예제 #4
0
파일: reduce.py 프로젝트: fthaler/dace
    def expansion(node: 'Reduce', state: SDFGState, sdfg: SDFG):
        """ Create a map around the BlockReduce node
            with in and out transients in registers
            and an if tasklet that redirects the output
            of thread 0 to a shared memory transient
        """
        ### define some useful vars
        graph = state
        reduce_node = node
        in_edge = graph.in_edges(reduce_node)[0]
        out_edge = graph.out_edges(reduce_node)[0]

        axes = reduce_node.axes
        ### add a map that encloses the reduce node
        (new_entry, new_exit) = graph.add_map(
                      name = 'inner_reduce_block',
                      ndrange = {'i'+str(i): f'{rng[0]}:{rng[1]+1}:{rng[2]}'  \
                                for (i,rng) in enumerate(in_edge.data.subset) \
                                if i in axes},
                      schedule = dtypes.ScheduleType.Default)

        map = new_entry.map
        ExpandReduceCUDABlockAll.redirect_edge(graph,
                                               in_edge,
                                               new_dst=new_entry)
        ExpandReduceCUDABlockAll.redirect_edge(graph,
                                               out_edge,
                                               new_src=new_exit)

        subset_in = subsets.Range([
            in_edge.data.subset[i] if i not in axes else
            (new_entry.map.params[0], new_entry.map.params[0], 1)
            for i in range(len(in_edge.data.subset))
        ])
        memlet_in = dace.Memlet(data=in_edge.data.data,
                                volume=1,
                                subset=subset_in)
        memlet_out = dcpy(out_edge.data)
        graph.add_edge(u=new_entry,
                       u_connector=None,
                       v=reduce_node,
                       v_connector=None,
                       memlet=memlet_in)
        graph.add_edge(u=reduce_node,
                       u_connector=None,
                       v=new_exit,
                       v_connector=None,
                       memlet=memlet_out)

        ### add in and out local storage
        from dace.transformation.dataflow.local_storage import LocalStorage

        in_local_storage_subgraph = {
            LocalStorage._node_a: graph.nodes().index(new_entry),
            LocalStorage._node_b: graph.nodes().index(reduce_node)
        }
        out_local_storage_subgraph = {
            LocalStorage._node_a: graph.nodes().index(reduce_node),
            LocalStorage._node_b: graph.nodes().index(new_exit)
        }

        local_storage = LocalStorage(sdfg.sdfg_id,
                                     sdfg.nodes().index(state),
                                     in_local_storage_subgraph, 0)

        local_storage.array = in_edge.data.data
        local_storage.apply(sdfg)
        in_transient = local_storage._data_node
        sdfg.data(in_transient.data).storage = dtypes.StorageType.Register

        local_storage = LocalStorage(sdfg.sdfg_id,
                                     sdfg.nodes().index(state),
                                     out_local_storage_subgraph, 0)
        local_storage.array = out_edge.data.data
        local_storage.apply(sdfg)
        out_transient = local_storage._data_node
        sdfg.data(out_transient.data).storage = dtypes.StorageType.Register

        # hack: swap edges as local_storage does not work correctly here
        # as subsets and data get assigned wrongly (should be swapped)
        # NOTE: If local_storage ever changes, this will not work any more
        e1 = graph.in_edges(out_transient)[0]
        e2 = graph.out_edges(out_transient)[0]
        e1.data.data = dcpy(e2.data.data)
        e1.data.subset = dcpy(e2.data.subset)

        ### add an if tasket and diverge
        code = 'if '
        for (i, param) in enumerate(new_entry.map.params):
            code += (param + '== 0')
            if i < len(axes) - 1:
                code += ' and '
        code += ':\n'
        code += '\tout=inp'

        tasklet_node = graph.add_tasklet(name='block_reduce_write',
                                         inputs=['inp'],
                                         outputs=['out'],
                                         code=code)

        edge_out_outtrans = graph.out_edges(out_transient)[0]
        edge_out_innerexit = graph.out_edges(new_exit)[0]
        ExpandReduceCUDABlockAll.redirect_edge(graph,
                                               edge_out_outtrans,
                                               new_dst=tasklet_node,
                                               new_dst_conn='inp')
        e = graph.add_edge(u=tasklet_node,
                           u_connector='out',
                           v=new_exit,
                           v_connector=None,
                           memlet=dcpy(edge_out_innerexit.data))
        # set dynamic with volume 0 FORNOW
        e.data.volume = 0
        e.data.dynamic = True

        ### set reduce_node axes to all (needed)
        reduce_node.axes = None

        # fill scope connectors, done.
        sdfg.fill_scope_connectors()

        # finally, change the implementation to cuda (block)
        # itself and expand again.
        reduce_node.implementation = 'CUDA (block)'
        sub_expansion = ExpandReduceCUDABlock(0, 0, {}, 0)
        return sub_expansion.expansion(node=node, state=state, sdfg=sdfg)
예제 #5
0
 def gnode(nname):
     return graph.nodes()[self.subgraph[nname]]
예제 #6
0
    def match_to_str(graph, candidate):
        out_array = graph.nodes()[candidate[RedundantSecondArray._out_array]]

        return "Remove " + str(out_array)
예제 #7
0
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        in_array = graph.nodes()[candidate[RedundantSecondArray._in_array]]
        out_array = graph.nodes()[candidate[RedundantSecondArray._out_array]]

        in_desc = in_array.desc(sdfg)
        out_desc = out_array.desc(sdfg)

        # Ensure in degree is one (only one source, which is in_array)
        if graph.in_degree(out_array) != 1:
            return False

        # Make sure that the candidate is a transient variable
        if not out_desc.transient:
            return False

        # Dimensionality must be the same in strict mode
        if strict and len(in_desc.shape) != len(out_desc.shape):
            return False

        # Make sure that both arrays are using the same storage location
        # and are of the same type (e.g., Stream->Stream)
        if in_desc.storage != out_desc.storage:
            return False
        if type(in_desc) != type(out_desc):
            return False

        # Find occurrences in this and other states
        occurrences = []
        for state in sdfg.nodes():
            occurrences.extend([
                n for n in state.nodes()
                if isinstance(n, nodes.AccessNode) and n.desc(sdfg) == out_desc
            ])
        for isedge in sdfg.edges():
            if out_array.data in isedge.data.free_symbols:
                occurrences.append(isedge)

        if len(occurrences) > 1:
            return False

        # Check whether the data copied from the first datanode cover
        # the subsets of all the output edges of the second datanode.
        # We assume the following pattern: A -- e1 --> B -- e2 --> others

        # 1. Get edge e1 and extract/validate subsets for arrays A and B
        e1 = graph.edges_between(in_array, out_array)[0]
        try:
            _, b1_subset = _validate_subsets(e1, sdfg.arrays)
        except NotImplementedError:
            return False
        # 2. Iterate over the e2 edges
        for e2 in graph.out_edges(out_array):
            # 2-a. Extract/validate subsets for array B and others
            try:
                b2_subset, _ = _validate_subsets(e2, sdfg.arrays)
            except NotImplementedError:
                return False
            # 2-b. Check where b1_subset covers b2_subset
            if not b1_subset.covers(b2_subset):
                return False
            # 2-c. Validate subsets in memlet tree
            # (should not be needed for valid SDGs)
            path = graph.memlet_tree(e2)
            for e3 in path:
                if e3 is not e2:
                    try:
                        _validate_subsets(e3,
                                          sdfg.arrays,
                                          src_name=out_array.data)
                    except NotImplementedError:
                        return False

        return True
예제 #8
0
    def match_to_str(graph, candidate):
        in_array = graph.nodes()[candidate[RedundantArray._in_array]]

        return "Remove " + str(in_array)
예제 #9
0
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        in_array = graph.nodes()[candidate[RedundantSecondArray._in_array]]
        out_array = graph.nodes()[candidate[RedundantSecondArray._out_array]]

        in_desc = in_array.desc(sdfg)
        out_desc = out_array.desc(sdfg)

        # Ensure in degree is one (only one source, which is in_array)
        if graph.in_degree(out_array) != 1:
            return False

        # Make sure that the candidate is a transient variable
        if not out_desc.transient:
            return False

        # 1. Get edge e1 and extract/validate subsets for arrays A and B
        e1 = graph.edges_between(in_array, out_array)[0]
        a_subset, b1_subset = _validate_subsets(e1, sdfg.arrays)

        if strict:
            # In strict mode, make sure the memlet covers the removed array
            if not b1_subset:
                return False
            subset = copy.deepcopy(b1_subset)
            subset.squeeze()
            shape = [sz for sz in out_desc.shape if sz != 1]
            if any(m != a for m, a in zip(subset.size(), shape)):
                return False

            # NOTE: Library node check
            # The transformation must not apply in strict mode if out_array is
            # not a view, is input to a library node, and an access or a view
            # of in_desc is also output to the same library node.
            # The reason is that the application of the transformation will lead
            # to in_desc being both input and output of the library node.
            # We do not know if this is safe.

            # First find the true in_desc (in case in_array is a view).
            true_in_desc = in_desc
            if isinstance(in_desc, data.View):
                e = sdutil.get_view_edge(graph, in_array)
                if not e:
                    return False
                true_in_desc = sdfg.arrays[e.dst.data]

            if not isinstance(out_desc, data.View):

                edges_to_check = []
                for a in graph.out_edges(out_array):
                    if isinstance(a.dst, nodes.LibraryNode):
                        edges_to_check.append(a)
                    elif (isinstance(a.dst, nodes.AccessNode)
                          and isinstance(sdfg.arrays[a.dst.data], data.View)):
                        for b in graph.out_edges(a.dst):
                            edges_to_check.append(graph.memlet_path(b)[-1])

                for a in edges_to_check:
                    if isinstance(a.dst, nodes.LibraryNode):
                        for b in graph.out_edges(a.dst):
                            if isinstance(b.dst, nodes.AccessNode):
                                desc = sdfg.arrays[b.dst.data]
                                if isinstance(desc, data.View):
                                    e = sdutil.get_view_edge(graph, b.dst)
                                    if not e:
                                        return False
                                    desc = sdfg.arrays[e.dst.data]
                                    if desc is true_in_desc:
                                        return False

            # In strict mode, check if the state has two or more access nodes
            # for in_array and at least one of them is a write access. There
            # might be a RW, WR, or WW dependency.
            accesses = [
                n for n in graph.nodes() if isinstance(n, nodes.AccessNode)
                and n.desc(sdfg) == in_desc and n is not in_array
            ]
            if len(accesses) > 0:
                if (graph.in_degree(in_array) > 0
                        or any(graph.in_degree(a) > 0 for a in accesses)):
                    # We need to ensure that a data race will not happen if we
                    # remove in_array.
                    # First, we simplify the graph
                    G = helpers.simplify_state(graph)
                    # Loop over the accesses
                    for a in accesses:
                        subsets_intersect = False
                        for e in graph.in_edges(a):
                            _, subset = _validate_subsets(e,
                                                          sdfg.arrays,
                                                          dst_name=a.data)
                            res = subsets.intersects(a_subset, subset)
                            if res == True or res is None:
                                subsets_intersect = True
                                break
                        if not subsets_intersect:
                            continue
                        try:
                            has_bward_path = nx.has_path(G, a, in_array)
                        except NodeNotFound:
                            has_bward_path = nx.has_path(graph.nx, a, in_array)
                        try:
                            has_fward_path = nx.has_path(G, in_array, a)
                        except NodeNotFound:
                            has_fward_path = nx.has_path(graph.nx, in_array, a)
                        # If there is no path between the access nodes
                        # (disconnected components), then it is definitely
                        # possible to have data races. Abort.
                        if not (has_bward_path or has_fward_path):
                            return False
                        # If there is a forward path then a must not be a direct
                        # successor of in_array.
                        if has_fward_path and a in G.successors(in_array):
                            for src, _ in G.in_edges(a):
                                if src is in_array:
                                    continue
                                if (nx.has_path(G, in_array, src)
                                        and src != out_array):
                                    continue
                                return False

        # Make sure that both arrays are using the same storage location
        # and are of the same type (e.g., Stream->Stream)
        if in_desc.storage != out_desc.storage:
            return False
        if in_desc.location != out_desc.location:
            return False
        if type(in_desc) != type(out_desc):
            if isinstance(in_desc, data.View):
                # Case View -> Access
                # If the View points to the Access (and has a different shape?)
                # then we should (probably) not remove the Access.
                e = sdutil.get_view_edge(graph, in_array)
                if e and e.dst is out_array and in_desc.shape != out_desc.shape:
                    return False
                # Check that the View's immediate ancestors are Accesses.
                # Otherwise, the application of the transformation will result
                # in an ambiguous View.
                view_ancestors_desc = [
                    e.src.desc(sdfg)
                    if isinstance(e.src, nodes.AccessNode) else None
                    for e in graph.in_edges(in_array)
                ]
                if any([
                        not desc or isinstance(desc, data.View)
                        for desc in view_ancestors_desc
                ]):
                    return False
            elif isinstance(out_desc, data.View):
                # Case Access -> View
                # If the View points to the Access and has the same shape,
                # it can be removed
                e = sdutil.get_view_edge(graph, out_array)
                if e and e.src is in_array and in_desc.shape == out_desc.shape:
                    return True
                return False
            else:
                # Something else, for example, Stream
                return False
        else:
            # Two views connected to each other
            if isinstance(in_desc, data.View):
                return False

        # Find occurrences in this and other states
        occurrences = []
        for state in sdfg.nodes():
            occurrences.extend([
                n for n in state.nodes()
                if isinstance(n, nodes.AccessNode) and n.desc(sdfg) == out_desc
            ])
        for isedge in sdfg.edges():
            if out_array.data in isedge.data.free_symbols:
                occurrences.append(isedge)

        if len(occurrences) > 1:
            return False

        # Check whether the data copied from the first datanode cover
        # the subsets of all the output edges of the second datanode.
        # We assume the following pattern: A -- e1 --> B -- e2 --> others

        # 2. Iterate over the e2 edges
        for e2 in graph.out_edges(out_array):
            # 2-a. Extract/validate subsets for array B and others
            try:
                b2_subset, _ = _validate_subsets(e2, sdfg.arrays)
            except NotImplementedError:
                return False
            # 2-b. Check where b1_subset covers b2_subset
            if not b1_subset.covers(b2_subset):
                return False
            # 2-c. Validate subsets in memlet tree
            # (should not be needed for valid SDGs)
            path = graph.memlet_tree(e2)
            for e3 in path:
                if e3 is not e2:
                    try:
                        _validate_subsets(e3,
                                          sdfg.arrays,
                                          src_name=out_array.data)
                    except NotImplementedError:
                        return False

        return True