Example #1
0
    def _build_dag(self, cgroups, prefix):
        """
        A DAG representing the data dependences across the ClusterGroups within
        a given scope.
        """
        prefix = {i.dim for i in as_tuple(prefix)}

        dag = DAG(nodes=cgroups)
        for n, cg0 in enumerate(cgroups):
            for cg1 in cgroups[n + 1:]:
                # A Scope to compute all cross-ClusterGroup anti-dependences
                rule = lambda i: i.is_cross
                scope = Scope(exprs=cg0.exprs + cg1.exprs, rules=rule)

                # Optimization: we exploit the following property:
                # no prefix => (edge <=> at least one (any) dependence)
                # to jump out of this potentially expensive loop as quickly as possible
                if not prefix and any(scope.d_all_gen()):
                    dag.add_edge(cg0, cg1)

                # Anti-dependences along `prefix` break the execution flow
                # (intuitively, "the loop nests are to be kept separated")
                # * All ClusterGroups between `cg0` and `cg1` must precede `cg1`
                # * All ClusterGroups after `cg1` cannot precede `cg1`
                elif any(i.cause & prefix for i in scope.d_anti_gen()):
                    for cg2 in cgroups[n:cgroups.index(cg1)]:
                        dag.add_edge(cg2, cg1)
                    for cg2 in cgroups[cgroups.index(cg1) + 1:]:
                        dag.add_edge(cg1, cg2)
                    break

                # Any anti- and iaw-dependences impose that `cg1` follows `cg0`
                # while not being its immediate successor (unless it already is),
                # to avoid they are fused together (thus breaking the dependence)
                # TODO: the "not being its immediate successor" part *seems* to be
                # a work around to the fact that any two Clusters characterized
                # by anti-dependence should have been given a different stamp,
                # and same for guarded Clusters, but that is not the case (yet)
                elif any(scope.d_anti_gen()) or\
                        any(i.is_iaw for i in scope.d_output_gen()):
                    dag.add_edge(cg0, cg1)
                    index = cgroups.index(cg1) - 1
                    if index > n and self._key(cg0) == self._key(cg1):
                        dag.add_edge(cg0, cgroups[index])
                        dag.add_edge(cgroups[index], cg1)

                # Any flow-dependences along an inner Dimension (i.e., a Dimension
                # that doesn't appear in `prefix`) impose that `cg1` follows `cg0`
                elif any(not (i.cause and i.cause & prefix)
                         for i in scope.d_flow_gen()):
                    dag.add_edge(cg0, cg1)

                # Clearly, output dependences must be honored
                elif any(scope.d_output_gen()):
                    dag.add_edge(cg0, cg1)

        return dag
Example #2
0
def _mark_overlappable(iet):
    """
    Detect the HaloSpots allowing computation/communication overlap and turn
    them into OverlappableHaloSpots.
    """
    # Analysis
    found = []
    for hs in FindNodes(HaloSpot).visit(iet):
        expressions = FindNodes(Expression).visit(hs)
        scope = Scope([i.expr for i in expressions])

        test = True

        # Comp/comm overlaps is legal only if the OWNED regions can grow
        # arbitrarly, which means all of the dependences must be carried
        # along a non-halo Dimension
        for dep in scope.d_all_gen():
            if dep.function in hs.functions:
                if not dep.cause:
                    # E.g. increments
                    # for x
                    #   for y
                    #     f[x, y] = f[x, y] + 1
                    test = False
                    break
                elif dep.cause & hs.dimensions:
                    # E.g. dependences across PARALLEL iterations
                    # for x
                    #   for y
                    #     ... = ... f[x, y-1] ...
                    #   for y
                    #     f[x, y] = ...
                    test = False
                    break

        # Heuristic: avoid comp/comm overlap for sparse Iteration nests
        test = test and all(i.is_Affine
                            for i in FindNodes(Iteration).visit(hs))

        if test:
            found.append(hs)

    # Transform the IET replacing HaloSpots with OverlappableHaloSpots
    mapper = {hs: OverlappableHaloSpot(**hs.args) for hs in found}
    iet = Transformer(mapper, nested=True).visit(iet)

    return iet
Example #3
0
    def callback(self, clusters, prefix):
        if not prefix:
            return clusters

        d = prefix[-1].dim

        for c in clusters:
            # PARALLEL* and AFFINE are necessary conditions
            if AFFINE not in c.properties[d] or \
               not ({PARALLEL, PARALLEL_IF_PVT} & c.properties[d]):
                return clusters

            # Heuristic: innermost Dimensions may be ruled out a-priori
            is_inner = d is c.itintervals[-1].dim
            if is_inner and not self.inner:
                return clusters

            # Heuristic: TILABLE not worth it if not within a SEQUENTIAL Dimension
            if not any(SEQUENTIAL in c.properties[i.dim] for i in prefix[:-1]):
                return clusters

            # Heuristic: same as above if there's a local SubDimension
            if any(i.dim.is_Sub and i.dim.local for i in c.itintervals):
                return clusters

        if len(clusters) > 1:
            # Heuristic: same as above if it induces dynamic bounds
            exprs = flatten(c.exprs for c in as_tuple(clusters))
            scope = Scope(exprs)
            if any(i.is_lex_non_stmt for i in scope.d_all_gen()):
                return clusters
        else:
            # Just avoiding potentially expensive checks
            pass

        # All good, `d` is actually TILABLE
        processed = attach_property(clusters, d, TILABLE)

        return processed
Example #4
0
File: misc.py Project: ofmla/devito
    def callback(self, clusters, prefix):
        if not prefix or len(clusters) == 1:
            return clusters

        d = prefix[-1].dim

        # Do not waste time if definitely illegal
        if any(SEQUENTIAL in c.properties[d] for c in clusters):
            return clusters

        # Do not waste time if definitely nothing to do
        if all(len(prefix) == len(c.itintervals) for c in clusters):
            return clusters

        # Analyze and abort if fissioning would break a dependence
        scope = Scope(flatten(c.exprs for c in clusters))
        if any(d._defines & dep.cause or dep.is_reduce(d)
               for dep in scope.d_all_gen()):
            return clusters

        processed = []
        for k, g in groupby(clusters, key=lambda c: self._key(c, len(prefix))):
            it, _ = k
            group = list(g)

            if any(SEQUENTIAL in c.properties[it.dim] for c in group):
                # Heuristic: no gain from fissioning if unable to ultimately
                # increase the number of collapsable iteration spaces, hence give up
                processed.extend(group)
            else:
                stamp = Stamp()
                for c in group:
                    ispace = c.ispace.lift(d, stamp)
                    dspace = c.dspace.lift(d, stamp)
                    processed.append(c.rebuild(ispace=ispace, dspace=dspace))

        return processed
Example #5
0
    def _build_dag(self, cgroups, prefix):
        """
        A DAG captures data dependences between ClusterGroups up to the iteration
        space depth dictated by ``prefix``.

        Examples
        --------
        Consider two ClusterGroups `c0` and `c1`, and ``prefix=[i]``.

        1) cg0 := b[i, j] = ...
           cg1 := ... = ... b[i, j] ...
           Non-carried flow-dependence, so `cg1` must go after `cg0`.

        2) cg0 := b[i, j] = ...
           cg1 := ... = ... b[i, j-1] ...
           Carried flow-dependence in `j`, so `cg1` must go after `cg0`.

        3) cg0 := b[i, j] = ...
           cg1 := ... = ... b[i, j+1] ...
           Carried anti-dependence in `j`, so `cg1` must go after `cg0`.

        4) cg0 := b[i, j] = ...
           cg1 := ... = ... b[i-1, j+1] ...
           Carried flow-dependence in `i`, so `cg1` can safely go before or after
           `cg0`. Note: the `j+1` in `cg1` has no impact -- the actual dependence
           betweeb `b[i, j]` and `b[i-1, j+1]` is along `i`.
        """
        prefix = {i.dim for i in as_tuple(prefix)}

        dag = DAG(nodes=cgroups)
        for n, cg0 in enumerate(cgroups):
            for cg1 in cgroups[n + 1:]:
                rule = lambda i: i.is_cross  # Only retain dep if cross-ClusterGroup
                scope = Scope(exprs=cg0.exprs + cg1.exprs, rules=rule)

                # Optimization: we exploit the following property:
                # no prefix => (edge <=> at least one (any) dependence)
                # To jump out of this potentially expensive loop as quickly as possible
                if not prefix and any(scope.d_all_gen()):
                    dag.add_edge(cg0, cg1)
                    continue

                # Handle anti-dependences
                if any(i.cause & prefix for i in scope.d_anti_gen()):
                    # Anti-dependences break the execution flow
                    # i) ClusterGroups between `cg0` and `cg1` must precede `cg1`
                    for cg2 in cgroups[n:cgroups.index(cg1)]:
                        dag.add_edge(cg2, cg1)
                    # ii) ClusterGroups after `cg1` cannot precede `cg1`
                    for cg2 in cgroups[cgroups.index(cg1) + 1:]:
                        dag.add_edge(cg1, cg2)
                    break
                elif any(scope.d_anti_gen()):
                    dag.add_edge(cg0, cg1)
                    continue

                # Flow-dependences along one of the `prefix` Dimensions can
                # be ignored; all others require sequentialization
                if any(not (i.cause and i.cause & prefix)
                       for i in scope.d_flow_gen()):
                    dag.add_edge(cg0, cg1)
                    continue

                # Handle increment-after-write dependences
                if any(i.is_iaw for i in scope.d_output_gen()):
                    dag.add_edge(cg0, cg1)
                    continue

        return dag