예제 #1
0
def clusterize(exprs):
    """
    Group a sequence of :class:`ir.Eq`s into one or more :class:`Cluster`s.
    """
    clusters = ClusterGroup()
    flowmap = detect_flow_directions(exprs)
    prev = None
    for idx, e in enumerate(exprs):
        if e.is_Tensor:
            scalars = [i for i in exprs[prev:idx] if i.is_Scalar]
            # Iteration space
            ispace = IterationSpace.merge(e.ispace, *[i.ispace for i in scalars])
            # Enforce iteration directions
            fdirs, _ = force_directions(flowmap, lambda d: ispace.directions.get(d))
            ispace = IterationSpace(ispace.intervals, ispace.sub_iterators, fdirs)
            # Data space
            dspace = DataSpace.merge(e.dspace, *[i.dspace for i in scalars])
            # Prepare for next range
            prev = idx

            clusters.append(PartialCluster(scalars + [e], ispace, dspace))

    # Group PartialClusters together where possible
    clusters = groupby(clusters)

    # Introduce conditional PartialClusters
    clusters = guard(clusters)

    return clusters.finalize()
def guard(clusters):
    """
    Return a new :class:`ClusterGroup` including new :class:`PartialCluster`s
    for each conditional expression encountered in ``clusters``.
    """
    processed = ClusterGroup()
    for c in clusters:
        # Find out what expressions in /c/ should be guarded
        mapper = {}
        for e in c.exprs:
            for k, v in e.ispace.sub_iterators.items():
                for i in v:
                    if i.dim.is_Conditional:
                        mapper.setdefault(i.dim, []).append(e)

        # Build conditional expressions to guard clusters
        conditions = {d: CondEq(d.parent % d.factor, 0) for d in mapper}
        negated = {d: CondNe(d.parent % d.factor, 0) for d in mapper}

        # Expand with guarded clusters
        combs = list(powerset(mapper))
        for dims, ndims in zip(combs, reversed(combs)):
            banned = flatten(v for k, v in mapper.items() if k not in dims)
            exprs = [
                e.xreplace({i: IntDiv(i.parent, i.factor)
                            for i in mapper}) for e in c.exprs
                if e not in banned
            ]
            guards = [(i.parent, conditions[i]) for i in dims]
            guards.extend([(i.parent, negated[i]) for i in ndims])
            cluster = PartialCluster(exprs, c.ispace, c.dspace, c.atomics,
                                     dict(guards))
            processed.append(cluster)

    return processed
예제 #3
0
파일: algorithms.py 프로젝트: fymenq/devito
def clusterize(exprs):
    """Group a sequence of :class:`ir.Eq`s into one or more :class:`Cluster`s."""

    # Build a graph capturing the dependencies among the input tensor expressions
    mapper = OrderedDict()
    for i, e1 in enumerate(exprs):
        trace = [e2 for e2 in exprs[:i] if Scope([e2, e1]).has_dep] + [e1]
        trace.extend([e2 for e2 in exprs[i + 1:] if Scope([e1, e2]).has_dep])
        mapper[e1] = Bunch(trace=trace, ispace=e1.ispace)

    # Derive the iteration spaces
    queue = list(mapper)
    while queue:
        target = queue.pop(0)

        ispaces = [mapper[i].ispace for i in mapper[target].trace]

        coerced_ispace = mapper[target].ispace.intersection(*ispaces)

        if coerced_ispace != mapper[target].ispace:
            # Something has changed, need to propagate the update
            mapper[target].ispace = coerced_ispace
            queue.extend([i for i in mapper[target].trace if i not in queue])

    # Build a PartialCluster for each tensor expression
    clusters = ClusterGroup()
    for k, v in mapper.items():
        if k.is_Tensor:
            scalars = [i for i in v.trace[:v.trace.index(k)] if i.is_Scalar]
            clusters.append(PartialCluster(scalars + [k], v.ispace))

    # Group PartialClusters together where possible
    clusters = groupby(clusters)

    return clusters.finalize()
예제 #4
0
def guard(clusters):
    """
    Return a new :class:`ClusterGroup` with a new :class:`PartialCluster`
    for each conditional expression encountered in ``clusters``.
    """
    processed = ClusterGroup()
    for c in clusters:
        free = []
        for e in c.exprs:
            if e.conditionals:
                # Expressions that need no guarding are kept in a separate Cluster
                if free:
                    processed.append(PartialCluster(free, c.ispace, c.dspace, c.atomics))
                    free = []
                # Create a guarded PartialCluster
                guards = {}
                for d in e.conditionals:
                    condition = guards.setdefault(d.parent, [])
                    condition.append(d.condition or CondEq(d.parent % d.factor, 0))
                guards = {k: sympy.And(*v, evaluate=False) for k, v in guards.items()}
                processed.append(PartialCluster(e, c.ispace, c.dspace, c.atomics, guards))
            else:
                free.append(e)
        # Leftover
        if free:
            processed.append(PartialCluster(free, c.ispace, c.dspace, c.atomics))

    return ClusterGroup(processed)
예제 #5
0
파일: algorithms.py 프로젝트: fymenq/devito
def groupby(clusters):
    """
    Attempt grouping :class:`PartialCluster`s together to create bigger
    :class:`PartialCluster`s (i.e., containing more expressions).

    .. note::

        This function relies on advanced data dependency analysis tools
        based upon classic Lamport theory.
    """
    clusters = clusters.unfreeze()

    processed = ClusterGroup()
    for c in clusters:
        fused = False
        for candidate in reversed(list(processed)):
            # Collect all relevant data dependences
            scope = Scope(exprs=candidate.exprs + c.exprs)
            anti = scope.d_anti.carried() - scope.d_anti.increment
            flow = scope.d_flow - (scope.d_flow.inplace() +
                                   scope.d_flow.increment)
            funcs = [i.function for i in anti]
            if candidate.ispace == c.ispace and\
                    all(is_local(i, candidate, c, clusters) for i in funcs):
                # /c/ will be fused into /candidate/. All fusion-induced anti
                # dependences are eliminated through so called "index bumping and
                # array contraction", which transforms array accesses into scalars

                # Optimization: we also bump-and-contract the Arrays inducing
                # non-carried dependences, to avoid useless memory accesses
                funcs += [
                    i.function for i in scope.d_flow.independent()
                    if is_local(i.function, candidate, c, clusters)
                ]

                bump_and_contract(funcs, candidate, c)
                candidate.squash(c)
                fused = True
                break
            elif anti:
                # Data dependences prevent fusion with earlier clusters, so
                # must break up the search
                processed.atomics[c].update(set(anti.cause))
                break
            elif set(flow).intersection(processed.atomics[candidate]):
                # We cannot even attempt fusing with earlier clusters, as
                # otherwise the existing flow dependences wouldn't be honored
                break
        # Fallback
        if not fused:
            processed.append(c)

    return processed
예제 #6
0
파일: algorithms.py 프로젝트: dalide/devito
def groupby(clusters):
    """
    Given an ordered collection of :class:`PartialCluster` objects, return a
    (potentially) smaller sequence in which dependence-free PartialClusters with
    identical stencil have been squashed into a single :class:`PartialCluster`.
    """
    clusters = clusters.unfreeze()

    # Attempt cluster fusion
    processed = ClusterGroup()
    for c in clusters:
        fused = False
        for candidate in reversed(list(processed)):
            # Check all data dependences relevant for cluster fusion
            scope = Scope(exprs=candidate.exprs + c.exprs)
            anti = scope.d_anti.carried() - scope.d_anti.increment
            flow = scope.d_flow - (scope.d_flow.inplace() +
                                   scope.d_flow.increment)
            funcs = [i.function for i in anti]
            if candidate.stencil == c.stencil and\
                    all(is_local(i, candidate, c, clusters) for i in funcs):
                # /c/ will be fused into /candidate/. All fusion-induced anti
                # dependences are eliminated through so called "index bumping and
                # array contraction", which transforms array accesses into scalars

                # Optimization: we also bump-and-contract the Arrays inducing
                # non-carried dependences, to avoid useless memory accesses
                funcs += [
                    i.function for i in scope.d_flow.independent()
                    if is_local(i.function, candidate, c, clusters)
                ]

                bump_and_contract(funcs, candidate, c)
                candidate.squash(c)
                fused = True
                break
            elif anti:
                # Data dependences prevent fusion with earlier clusters, so
                # must break up the search
                processed.atomics[c].update(set(anti.cause))
                break
            elif set(flow).intersection(processed.atomics[candidate]):
                # We cannot even attempt fusing with earlier clusters, as
                # otherwise the existing flow dependences wouldn't be honored
                break
        # Fallback
        if not fused:
            processed.append(c)

    return processed.freeze()
예제 #7
0
def guard(clusters):
    """
    Split Clusters containing conditional expressions into separate Clusters.
    """
    processed = []
    for c in clusters:
        # Group together consecutive expressions with same ConditionalDimensions
        for cds, g in groupby(c.exprs, key=lambda e: e.conditionals):
            if not cds:
                processed.append(c.rebuild(exprs=list(g)))
                continue

            # Create a guarded Cluster
            guards = {}
            for cd in cds:
                condition = guards.setdefault(cd.parent, [])
                if cd.condition is None:
                    condition.append(CondEq(cd.parent % cd.factor, 0))
                else:
                    condition.append(cd.condition)
            guards = {
                k: sympy.And(*v, evaluate=False)
                for k, v in guards.items()
            }
            processed.append(c.rebuild(exprs=list(g), guards=guards))

    return ClusterGroup(processed)
예제 #8
0
def guard(clusters):
    """
    Split Clusters containing conditional expressions into separate Clusters.
    """
    processed = []
    for c in clusters:
        # Group together consecutive expressions with same ConditionalDimensions
        for cds, g in groupby(c.exprs, key=lambda e: tuple(e.conditionals)):
            exprs = list(g)

            if not cds:
                processed.append(c.rebuild(exprs=exprs))
                continue

            # Chain together all conditions from all expressions in `c`
            guards = {}
            for cd in cds:
                condition = guards.setdefault(cd.parent, [])
                for e in exprs:
                    try:
                        condition.append(e.conditionals[cd])
                        break
                    except KeyError:
                        pass
            guards = {
                d: sympy.And(*v, evaluate=False)
                for d, v in guards.items()
            }

            # Construct a guarded Cluster
            processed.append(c.rebuild(exprs=exprs, guards=guards))

    return ClusterGroup(processed)
예제 #9
0
def optimize(clusters, dse_mode):
    """
    Optimize a topologically-ordered sequence of Clusters by applying the
    following transformations:

        * [cross-cluster] Fusion
        * [intra-cluster] Several flop-reduction passes via the DSE
        * [cross-cluster] Lifting
        * [cross-cluster] Scalarization
        * [cross-cluster] Arrays Elimination
    """
    # To create temporaries
    counter = generator()
    template = lambda: "r%d" % counter()

    # Fusion
    clusters = fuse(clusters)

    from devito.dse import rewrite
    clusters = rewrite(clusters, template, mode=dse_mode)

    # Lifting
    clusters = Lift().process(clusters)

    # Lifting may create fusion opportunities
    clusters = fuse(clusters)

    # Fusion may create opportunities to eliminate Arrays (thus shrinking the
    # working set) if these store identical expressions
    clusters = eliminate_arrays(clusters, template)

    # Fusion may create scalarization opportunities
    clusters = scalarize(clusters, template)

    return ClusterGroup(clusters)
예제 #10
0
def clusterize(exprs, dse_mode=None):
    """
    Turn a sequence of LoweredEqs into a sequence of Clusters.
    """
    # Initialization
    clusters = [Cluster(e, e.ispace, e.dspace) for e in exprs]

    # Compute a topological ordering that honours flow- and anti-dependences.
    # This is necessary prior to enforcing the iteration direction (step below)
    clusters = Toposort().process(clusters)

    # Enforce iteration directions. This turns anti- into flow-dependences by
    # reversing the iteration direction (Backward instead of Forward). A new
    # topological sorting is then computed to expose more fusion opportunities,
    # which will be exploited within `optimize`
    clusters = Enforce().process(clusters)
    clusters = Toposort().process(clusters)

    # Apply optimizations
    clusters = optimize(clusters, dse_mode)

    # Introduce conditional Clusters
    clusters = guard(clusters)

    return ClusterGroup(clusters)
예제 #11
0
파일: algorithms.py 프로젝트: dalide/devito
def clusterize(exprs, stencils):
    """
    Derive :class:`Cluster` objects from an iterable of expressions; a stencil for
    each expression must be provided.
    """
    assert len(exprs) == len(stencils)

    exprs, stencils = aggregate(exprs, stencils)

    # Create a PartialCluster for each sequence of expressions computing a tensor
    mapper = OrderedDict()
    g = TemporariesGraph(exprs)
    for (k, v), j in zip(g.items(), stencils):
        if v.is_tensor:
            exprs = g.trace(k)
            exprs += tuple(i for i in g.trace(k, readby=True)
                           if i not in exprs)
            mapper[k] = PartialCluster(exprs, j)

    # Update the PartialClusters' Stencils by looking at the Stencil of the
    # surrounding PartialClusters.
    queue = list(mapper)
    while queue:
        target = queue.pop(0)

        pc = mapper[target]
        strict_trace = [i.lhs for i in pc.exprs if i.lhs != target]

        stencil = pc.stencil.copy()
        for i in strict_trace:
            if i in mapper:
                stencil = stencil.add(mapper[i].stencil)

        if stencil != pc.stencil:
            # Something has changed, need to propagate the update
            pc.stencil = stencil
            queue.extend([i for i in strict_trace if i not in queue])

    # Drop all non-output tensors, as computed by other clusters
    clusters = ClusterGroup()
    for target, pc in mapper.items():
        exprs = [i for i in pc.exprs if i.lhs.is_Symbol or i.lhs == target]
        clusters.append(PartialCluster(exprs, pc.stencil))

    # Attempt grouping as many PartialClusters as possible together
    return groupby(clusters)
예제 #12
0
def guard(clusters):
    """
    Return a new :class:`ClusterGroup` with a new :class:`PartialCluster`
    for each conditional expression encountered in ``clusters``.
    """
    processed = ClusterGroup()
    for c in clusters:
        # Separate the expressions that should be guarded from the free ones
        mapper = OrderedDict()
        free = []
        for e in c.exprs:
            found = [d for d in e.dimensions if d.is_Conditional]
            if found:
                mapper.setdefault(tuple(filter_sorted(found)), []).append(e)
            else:
                free.append(e)

        # Some expressions may not require guards at all. We put them in their
        # own cluster straigh away
        if free:
            processed.append(PartialCluster(free, c.ispace, c.dspace, c.atomics))

        # Then we add in all guarded clusters
        for k, v in mapper.items():
            guards = {d.parent: CondEq(d.parent % d.factor, 0) for d in k}
            processed.append(PartialCluster(v, c.ispace, c.dspace, c.atomics, guards))

    return ClusterGroup(processed)
예제 #13
0
def clusterize(exprs):
    """Group a sequence of :class:`ir.Eq`s into one or more :class:`Cluster`s."""

    # Group expressions based on data dependences
    groups = group_expressions(exprs)

    clusters = ClusterGroup()
    for g in groups:
        # Coerce iteration space of each expression in each group
        mapper = OrderedDict([(e, e.ispace) for e in g])
        flowmap = detect_flow_directions(g)
        queue = list(g)
        while queue:
            v = queue.pop(0)

            intervals, sub_iterators, directions = mapper[v].args
            forced, clashes = force_directions(flowmap,
                                               lambda i: directions.get(i))
            for e in g:
                intervals = intervals.intersection(
                    mapper[e].intervals.drop(clashes))
            directions = {i: forced[i] for i in directions}
            coerced_ispace = IterationSpace(intervals, sub_iterators,
                                            directions)

            # Need update propagation ?
            if coerced_ispace != mapper[v]:
                mapper[v] = coerced_ispace
                queue.extend([i for i in g if i not in queue])

        # Wrap each tensor expression in a PartialCluster
        for k, v in mapper.items():
            if k.is_Tensor:
                scalars = [i for i in g[:g.index(k)] if i.is_Scalar]
                clusters.append(PartialCluster(scalars + [k], v))

    # Group PartialClusters together where possible
    clusters = groupby(clusters)

    # Introduce conditional PartialClusters
    clusters = guard(clusters)

    return clusters.finalize()
예제 #14
0
def guard(clusters):
    """
    Split Clusters containing conditional expressions into separate Clusters.
    """
    processed = []
    for c in clusters:
        # Group together consecutive expressions with same ConditionalDimensions
        for cds, g in groupby(c.exprs, key=lambda e: tuple(e.conditionals)):
            exprs = list(g)

            if not cds:
                processed.append(c.rebuild(exprs=exprs))
                continue

            # Separate out the indirect ConditionalDimensions, which only serve
            # the purpose of protecting from OOB accesses
            cds = [d for d in cds if not d.indirect]

            # Chain together all `cds` conditions from all expressions in `c`
            guards = {}
            for cd in cds:
                # `BOTTOM` parent implies a guard that lives outside of
                # any iteration space, which corresponds to the placeholder None
                if cd.parent is BOTTOM:
                    d = None
                else:
                    d = cd.parent

                # Pull `cd` from any expr
                condition = guards.setdefault(d, [])
                for e in exprs:
                    try:
                        condition.append(e.conditionals[cd])
                        break
                    except KeyError:
                        pass

                # Remove `cd` from all `exprs` since this will be now encoded
                # globally at the Cluster level
                for i, e in enumerate(list(exprs)):
                    conditionals = dict(e.conditionals)
                    conditionals.pop(cd, None)
                    exprs[i] = e.func(*e.args, conditionals=conditionals)

            guards = {
                d: sympy.And(*v, evaluate=False)
                for d, v in guards.items()
            }

            # Construct a guarded Cluster
            processed.append(c.rebuild(exprs=exprs, guards=guards))

    return ClusterGroup(processed)
def clusterize(exprs):
    """Group a sequence of :class:`ir.Eq`s into one or more :class:`Cluster`s."""
    # Group expressions based on data dependences
    groups = group_expressions(exprs)

    clusters = ClusterGroup()

    # Coerce iteration direction of each expression in each group
    for g in groups:
        mapper = OrderedDict([(e, e.directions) for e in g])
        flowmap = detect_flow_directions(g)
        queue = list(g)
        while queue:
            k = queue.pop(0)
            directions, _ = force_directions(flowmap,
                                             lambda i: mapper[k].get(i))
            directions = {i: directions[i] for i in mapper[k]}
            # Need update propagation ?
            if directions != mapper[k]:
                mapper[k] = directions
                queue.extend([i for i in g if i not in queue])

        # Wrap each tensor expression in a PartialCluster
        for k, v in mapper.items():
            if k.is_Tensor:
                scalars = [i for i in g[:g.index(k)] if i.is_Scalar]
                intervals, sub_iterators, _ = k.ispace.args
                ispace = IterationSpace(intervals, sub_iterators, v)
                clusters.append(PartialCluster(scalars + [k], ispace,
                                               k.dspace))

    # Group PartialClusters together where possible
    clusters = groupby(clusters)

    # Introduce conditional PartialClusters
    clusters = guard(clusters)

    return clusters.finalize()
예제 #16
0
def clusterize(exprs):
    """Group a sequence of LoweredEqs into one or more Clusters."""
    clusters = ClusterGroup()

    # Wrap each LoweredEq in `exprs` within a PartialCluster. The PartialCluster's
    # iteration direction is enforced based on the iteration direction of the
    # surrounding LoweredEqs
    flowmap = detect_flow_directions(exprs)
    for e in exprs:
        directions, _ = force_directions(flowmap,
                                         lambda d: e.ispace.directions.get(d))
        ispace = IterationSpace(e.ispace.intervals, e.ispace.sub_iterators,
                                directions)

        clusters.append(PartialCluster(e, ispace, e.dspace))

    # Group PartialClusters together where possible
    clusters = groupby(clusters)

    # Introduce conditional PartialClusters
    clusters = guard(clusters)

    return clusters.finalize()
예제 #17
0
def clusterize(exprs):
    """
    Turn a sequence of LoweredEqs into a sequence of Clusters.
    """
    # Initialization
    clusters = [Cluster(e, e.ispace, e.dspace) for e in exprs]

    # Setup the IterationSpaces based on data dependence analysis
    clusters = Schedule().process(clusters)

    # Handle ConditionalDimensions
    clusters = guard(clusters)

    # Determine relevant computational properties (e.g., parallelism)
    clusters = analyze(clusters)

    return ClusterGroup(clusters)
예제 #18
0
def clusterize(exprs, dse_mode=None):
    """
    Turn a sequence of LoweredEqs into a sequence of Clusters.
    """
    # Initialization
    clusters = [Cluster(e, e.ispace, e.dspace) for e in exprs]

    # Compute a topological ordering that honours flow- and anti-dependences
    clusters = Toposort().process(clusters)

    # Setup the IterationSpaces based on data dependence analysis
    clusters = Schedule().process(clusters)

    # Introduce conditional Clusters
    clusters = guard(clusters)

    # Apply optimizations
    clusters = optimize(clusters, dse_mode)

    return ClusterGroup(clusters)
예제 #19
0
def clusterize(exprs):
    """
    Turn a sequence of LoweredEqs into a sequence of Clusters.
    """
    # Initialization
    clusters = [Cluster(e, e.ispace, e.dspace) for e in exprs]

    # Compute a topological ordering that honours flow- and anti-dependences
    clusters = Toposort().process(clusters)

    # Setup the IterationSpaces based on data dependence analysis
    clusters = Schedule().process(clusters)

    # Introduce conditional Clusters
    clusters = guard(clusters)

    # Determine relevant computational properties (e.g., parallelism)
    clusters = analyze(clusters)

    return ClusterGroup(clusters)
예제 #20
0
def optimize(clusters, dse_mode):
    """
    Optimize a topologically-ordered sequence of Clusters by applying the
    following transformations:

        * [cross-cluster] Fusion
        * [intra-cluster] Several flop-reduction passes via the DSE
        * [cross-cluster] Lifting
        * [cross-cluster] Scalarization
        * [cross-cluster] Arrays Elimination
    """
    # To create temporaries
    counter = generator()
    template = lambda: "r%d" % counter()

    # Toposort+Fusion (the former to expose more fusion opportunities)
    clusters = Toposort().process(clusters)
    clusters = fuse(clusters)

    # Flop reduction via the DSE
    from devito.dse import rewrite
    clusters = rewrite(clusters, template, mode=dse_mode)

    # Lifting
    clusters = Lift().process(clusters)

    # Lifting may create fusion opportunities
    clusters = fuse(clusters)

    # Fusion may create opportunities to eliminate Arrays (thus shrinking the
    # working set) if these store identical expressions
    clusters = eliminate_arrays(clusters, template)

    # Fusion may create scalarization opportunities
    clusters = scalarize(clusters, template)

    # Determine computational properties (e.g., parallelism) that will be
    # necessary for the later passes
    clusters = analyze(clusters)

    return ClusterGroup(clusters)
예제 #21
0
 def _aggregate(self, cgroups, prefix):
     """
     Concatenate a sequence of ClusterGroups into a new ClusterGroup.
     """
     return [ClusterGroup(cgroups, prefix)]
예제 #22
0
 def process(self, clusters):
     cgroups = [ClusterGroup(c, c.itintervals) for c in clusters]
     cgroups = self._process_fdta(cgroups, 1)
     clusters = ClusterGroup.concatenate(*cgroups)
     return clusters
예제 #23
0
def groupby(clusters):
    """
    Attempt grouping :class:`PartialCluster`s together to create bigger
    :class:`PartialCluster`s (i.e., containing more expressions).

    .. note::

        This function relies on advanced data dependency analysis tools
        based upon classic Lamport theory.
    """
    clusters = clusters.unfreeze()

    processed = ClusterGroup()
    for c in clusters:
        fused = False
        for candidate in reversed(list(processed)):
            # Guarded clusters cannot be grouped together
            if c.guards:
                break

            # Collect all relevant data dependences
            scope = Scope(exprs=candidate.exprs + c.exprs)

            # Collect anti-dependences preventing grouping
            anti = scope.d_anti.carried() - scope.d_anti.increment
            funcs = set(anti.functions)

            # Collect flow-dependences breaking the search
            flow = scope.d_flow - (scope.d_flow.inplace() + scope.d_flow.increment)

            # Can we group `c` with `candidate`?
            test0 = not candidate.guards  # No intervening guards
            test1 = candidate.ispace.is_compatible(c.ispace)  # Compatible ispaces
            test2 = all(is_local(i, candidate, c, clusters) for i in funcs)  # No antideps
            if test0 and test1 and test2:
                # Yes, `c` can be grouped with `candidate`. All anti-dependences
                # (if any) can be eliminated through "index bumping and array
                # contraction", which turns Array temporaries into Scalar temporaries

                # Optimization: we also bump-and-contract the Arrays inducing
                # non-carried dependences, to minimize the working set
                funcs.update({i.function for i in scope.d_flow.independent()
                              if is_local(i.function, candidate, c, clusters)})

                bump_and_contract(funcs, candidate, c)
                candidate.squash(c)
                fused = True
                break
            elif anti:
                # Data dependences prevent fusion with earlier Clusters, so
                # must break up the search
                c.atomics.update(anti.cause)
                break
            elif flow.cause & candidate.atomics:
                # We cannot even attempt fusing with earlier Clusters, as
                # otherwise the carried flow dependences wouldn't be honored
                break
            elif set(candidate.guards) & set(c.dimensions):
                # Like above, we can't attempt fusion with earlier Clusters.
                # Time time because there are intervening conditionals along
                # one or more of the shared iteration dimensions
                break
        # Fallback
        if not fused:
            processed.append(c)

    return processed