Example #1
0
def guard(clusters):
    """
    Split Clusters containing conditional expressions into separate Clusters.
    """
    processed = []
    for c in clusters:
        # Group together consecutive expressions with same ConditionalDimensions
        for cds, g in groupby(c.exprs, key=lambda e: e.conditionals):
            if not cds:
                processed.append(Cluster(list(g), c.ispace, c.dspace))
                continue

            # Create a guarded Cluster
            guards = {}
            for cd in cds:
                condition = guards.setdefault(cd.parent, [])
                if cd.condition is None:
                    condition.append(CondEq(cd.parent % cd.factor, 0))
                else:
                    condition.append(cd.condition)
            guards = {
                k: sympy.And(*v, evaluate=False)
                for k, v in guards.items()
            }
            processed.append(Cluster(list(g), c.ispace, c.dspace, guards))

    return processed
Example #2
0
def guard(clusters):
    """
    Split Clusters containing conditional expressions into separate Clusters.
    """
    processed = []
    for c in clusters:
        free = []
        for e in c.exprs:
            if e.conditionals:
                # Expressions that need no guarding are kept in a separate Cluster
                if free:
                    processed.append(Cluster(free, c.ispace, c.dspace))
                    free = []

                # Create a guarded Cluster
                guards = {}
                for d in e.conditionals:
                    condition = guards.setdefault(d.parent, [])
                    if d.condition is None:
                        condition.append(CondEq(d.parent % d.factor, 0))
                    else:
                        condition.append(d.condition)
                guards = {
                    k: sympy.And(*v, evaluate=False)
                    for k, v in guards.items()
                }
                processed.append(Cluster(e, c.ispace, c.dspace, guards))
            else:
                free.append(e)
        # Leftover
        if free:
            processed.append(Cluster(free, c.ispace, c.dspace))

    return processed
Example #3
0
def clusterize(exprs, dse_mode=None):
    """
    Turn a sequence of LoweredEqs into a sequence of Clusters.
    """
    # Initialization
    clusters = [Cluster(e, e.ispace, e.dspace) for e in exprs]

    # Compute a topological ordering that honours flow- and anti-dependences.
    # This is necessary prior to enforcing the iteration direction (step below)
    clusters = Toposort().process(clusters)

    # Enforce iteration directions. This turns anti- into flow-dependences by
    # reversing the iteration direction (Backward instead of Forward). A new
    # topological sorting is then computed to expose more fusion opportunities,
    # which will be exploited within `optimize`
    clusters = Enforce().process(clusters)
    clusters = Toposort().process(clusters)

    # Apply optimizations
    clusters = optimize(clusters, dse_mode)

    # Introduce conditional Clusters
    clusters = guard(clusters)

    return ClusterGroup(clusters)
Example #4
0
    def callback(self, clusters, prefix):
        if not prefix:
            # No iteration space to be lifted from
            return clusters

        hope_invariant = {i.dim for i in prefix}
        candidates = [
            c for c in clusters
            if any(e.is_Tensor for e in c.exprs) and  # Not just scalar exprs
            not any(e.is_Increment for e in c.exprs) and  # No reductions
            not c.used_dimensions & hope_invariant
        ]  # Not an invariant ispace
        if not candidates:
            return clusters

        # Now check data dependences
        lifted = []
        processed = []
        for c in clusters:
            impacted = set(clusters) - {c}
            if c in candidates and\
                    not any(set(c.functions) & set(i.scope.writes) for i in impacted):
                # Perform lifting, which requires contracting the iteration space
                key = lambda d: d not in hope_invariant
                ispace = c.ispace.project(key)
                dspace = c.dspace.project(key)
                lifted.append(Cluster(c.exprs, ispace, dspace,
                                      guards=c.guards))
            else:
                processed.append(c)

        return lifted + processed
Example #5
0
def clusterize(exprs, stencils, atomics=None):
    """
    Derive :class:`Cluster` objects from an iterable of expressions; a stencil for
    each expression must be provided. A list of atomic dimensions (see description
    in Cluster.__doc__) may be provided.
    """
    assert len(exprs) == len(stencils)

    exprs, stencils = aggregate(exprs, stencils)

    Info = namedtuple('Info', 'trace stencil')

    # Build a dependence graph and associate each node with its Stencil
    mapper = OrderedDict()
    g = TemporariesGraph(exprs)
    for (k, v), j in zip(g.items(), stencils):
        if v.is_tensor:
            trace = g.trace(k)
            trace += tuple(i for i in g.trace(k, readby=True) if i not in trace)
            mapper[k] = Info(trace, j)

    # A cluster stencil is determined iteratively, by first calculating the
    # "local" stencil and then by looking at the stencils of all other clusters
    # depending on it. The stencil information is propagated until there are
    # no more updates.
    queue = list(mapper)
    while queue:
        target = queue.pop(0)

        info = mapper[target]
        strict_trace = [i.lhs for i in info.trace if i.lhs != target]

        stencil = Stencil(info.stencil.entries)
        for i in strict_trace:
            if i in mapper:
                stencil = stencil.add(mapper[i].stencil)

        mapper[target] = Info(info.trace, stencil)

        if stencil != info.stencil:
            # Something has changed, need to propagate the update
            queue.extend([i for i in strict_trace if i not in queue])

    clusters = []
    for target, info in mapper.items():
        # Drop all non-output tensors, as computed by other clusters
        exprs = [i for i in info.trace if i.lhs.is_Symbol or i.lhs == target]

        # Create and track the cluster
        clusters.append(Cluster(exprs, info.stencil.frozen, atomics))

    return merge(clusters)
Example #6
0
def fuse(clusters):
    """
    Fuse sub-sequences of Clusters with compatible IterationSpace.
    """
    processed = []
    for k, g in groupby(clusters, key=lambda cg: cg.itintervals):
        maybe_fusible = list(g)

        if len(maybe_fusible) == 1 or any(c.guards for c in maybe_fusible):
            processed.extend(maybe_fusible)
        else:
            # Perform fusion
            fused = Cluster.from_clusters(*maybe_fusible)
            processed.append(fused)

    return processed
Example #7
0
def clusterize(exprs):
    """
    Turn a sequence of LoweredEqs into a sequence of Clusters.
    """
    # Initialization
    clusters = [Cluster(e, e.ispace, e.dspace) for e in exprs]

    # Setup the IterationSpaces based on data dependence analysis
    clusters = Schedule().process(clusters)

    # Handle ConditionalDimensions
    clusters = guard(clusters)

    # Determine relevant computational properties (e.g., parallelism)
    clusters = analyze(clusters)

    return ClusterGroup(clusters)
Example #8
0
def clusterize(exprs, dse_mode=None):
    """
    Turn a sequence of LoweredEqs into a sequence of Clusters.
    """
    # Initialization
    clusters = [Cluster(e, e.ispace, e.dspace) for e in exprs]

    # Compute a topological ordering that honours flow- and anti-dependences
    clusters = Toposort().process(clusters)

    # Setup the IterationSpaces based on data dependence analysis
    clusters = Schedule().process(clusters)

    # Introduce conditional Clusters
    clusters = guard(clusters)

    # Apply optimizations
    clusters = optimize(clusters, dse_mode)

    return ClusterGroup(clusters)
Example #9
0
def clusterize(exprs):
    """
    Turn a sequence of LoweredEqs into a sequence of Clusters.
    """
    # Initialization
    clusters = [Cluster(e, e.ispace, e.dspace) for e in exprs]

    # Compute a topological ordering that honours flow- and anti-dependences
    clusters = Toposort().process(clusters)

    # Setup the IterationSpaces based on data dependence analysis
    clusters = Schedule().process(clusters)

    # Introduce conditional Clusters
    clusters = guard(clusters)

    # Determine relevant computational properties (e.g., parallelism)
    clusters = analyze(clusters)

    return ClusterGroup(clusters)
Example #10
0
def fuse(clusters):
    """
    Fuse sub-sequences of Clusters with compatible IterationSpace.
    """
    processed = []
    for k, g in groupby(clusters, key=lambda c: set(c.itintervals)):
        maybe_fusible = list(g)

        if len(maybe_fusible) == 1 or any(c.guards for c in maybe_fusible):
            processed.extend(maybe_fusible)
        else:
            try:
                # Perform fusion
                fused = Cluster.from_clusters(*maybe_fusible)
                processed.append(fused)
            except ValueError:
                # We end up here if, for example, some Clusters have same
                # iteration Dimensions but different (partial) orderings
                processed.extend(maybe_fusible)

    return processed
Example #11
0
    def callback(self, clusters, prefix):
        if not prefix:
            # No iteration space to be lifted from
            return clusters

        hope_invariant = {i.dim for i in prefix}

        lifted = []
        processed = []
        for n, c in enumerate(clusters):
            # Increments prevent lifting
            if c.has_increments:
                processed.append(c)
                continue

            # Is `c` a real candidate -- is there at least one invariant Dimension?
            if c.used_dimensions & hope_invariant:
                processed.append(c)
                continue

            impacted = set(processed) | set(clusters[n + 1:])

            # None of the Functions appearing in a lifted Cluster can be written to
            if any(c.functions & set(i.scope.writes) for i in impacted):
                processed.append(c)
                continue

            # Scalars prevent lifting if they are read by another Cluster
            swrites = {f for f in c.scope.writes if f.is_Scalar}
            if any(swrites & set(i.scope.reads) for i in impacted):
                processed.append(c)
                continue

            # Perform lifting, which requires contracting the iteration space
            key = lambda d: d not in hope_invariant
            ispace = c.ispace.project(key).reset()
            dspace = c.dspace.project(key).reset()
            lifted.append(Cluster(c.exprs, ispace, dspace, c.guards))

        return lifted + processed
Example #12
0
def merge(clusters):
    """
    Given an ordered collection of :class:`Cluster` objects, return a
    (potentially) smaller sequence in which clusters with identical stencil
    have been merged into a single :class:`Cluster`.
    """
    mapper = OrderedDict()
    for c in clusters:
        mapper.setdefault((c.stencil.entries, c.atomics), []).append(c)

    processed = []
    for (entries, atomics), clusters in mapper.items():
        # Eliminate redundant temporaries
        temporaries = OrderedDict()
        for c in clusters:
            for k, v in c.trace.items():
                if k not in temporaries:
                    temporaries[k] = v
        # Squash the clusters together
        processed.append(Cluster(temporaries.values(), Stencil(entries), atomics))

    return processed
Example #13
0
    def callback(self, clusters, prefix, backlog=None, known_break=None):
        if not prefix:
            return clusters

        known_break = known_break or set()
        backlog = backlog or []

        # Take the innermost Dimension -- no other Clusters other than those in
        # `clusters` are supposed to share it
        candidates = prefix[-1].dim._defines

        scope = Scope(exprs=flatten(c.exprs for c in clusters))

        # The nastiest case:
        # eq0 := u[t+1, x] = ... u[t, x]
        # eq1 := v[t+1, x] = ... v[t, x] ... u[t, x] ... u[t+1, x] ... u[t+2, x]
        # Here, `eq0` marches forward along `t`, while `eq1` has both a flow and an
        # anti dependence with `eq0`, which ultimately will require `eq1` to go in
        # a separate t-loop
        require_break = (scope.d_flow.cause & scope.d_anti.cause) & candidates
        if require_break and len(clusters) > 1:
            backlog = [clusters[-1]] + backlog
            # Try with increasingly smaller Cluster groups until the ambiguity is solved
            return self.callback(clusters[:-1], prefix, backlog, require_break)

        # If the flow- or anti-dependences are not coupled, one or more Clusters
        # might be scheduled separately, to increase parallelism (this is basically
        # what low-level compilers call "loop fission")
        for n, _ in enumerate(clusters):
            d_cross = scope.d_from_access(scope.a_query(n, 'R')).cross()
            if any(d.is_storage_volatile(candidates) for d in d_cross):
                break
            elif d_cross.cause & candidates:
                if n > 0:
                    return self.callback(
                        clusters[:n], prefix, clusters[n:] + backlog,
                        (d_cross.cause & candidates) | known_break)
                break

        # Compute iteration direction
        direction = {
            d: Backward
            for d in candidates if d.root in scope.d_anti.cause
        }
        direction.update(
            {d: Forward
             for d in candidates if d.root in scope.d_flow.cause})
        direction.update(
            {d: Forward
             for d in candidates if d not in direction})

        # Enforce iteration direction on each Cluster
        processed = []
        for c in clusters:
            ispace = IterationSpace(c.ispace.intervals, c.ispace.sub_iterators,
                                    {
                                        **c.ispace.directions,
                                        **direction
                                    })
            processed.append(Cluster(c.exprs, ispace, c.dspace))

        if not backlog:
            return processed

        # Handle the backlog -- the Clusters characterized by flow- and anti-dependences
        # along one or more Dimensions
        direction = {d: Any for d in known_break}
        for i, c in enumerate(list(backlog)):
            ispace = IterationSpace(c.ispace.intervals.lift(known_break),
                                    c.ispace.sub_iterators, {
                                        **c.ispace.directions,
                                        **direction
                                    })
            dspace = c.dspace.lift(known_break)
            backlog[i] = Cluster(c.exprs, ispace, dspace)

        return processed + self.callback(backlog, prefix)
Example #14
0
    def callback(self, clusters, prefix, backlog=None, known_break=None):
        if not prefix:
            return clusters

        known_break = known_break or set()
        backlog = backlog or []

        # Take the innermost Dimension -- no other Clusters other than those in
        # `clusters` are supposed to share it
        candidates = prefix[-1].dim._defines

        scope = Scope(exprs=flatten(c.exprs for c in clusters))

        # Handle the nastiest case -- ambiguity due to the presence of both a
        # flow- and an anti-dependence.
        #
        # Note: in most cases, `scope.d_anti.cause == {}` -- either because
        # `scope.d_anti == {}` or because the few anti dependences are not carried
        # in any Dimension. We exploit this observation so that we only compute
        # `d_flow`, which instead may be expensive, when strictly necessary
        maybe_break = scope.d_anti.cause & candidates
        if len(clusters) > 1 and maybe_break:
            require_break = scope.d_flow.cause & maybe_break
            if require_break:
                backlog = [clusters[-1]] + backlog
                # Try with increasingly smaller ClusterGroups until the ambiguity is gone
                return self.callback(clusters[:-1], prefix, backlog,
                                     require_break)

        # Schedule Clusters over different IterationSpaces if this increases parallelism
        for i in range(1, len(clusters)):
            if self._break_for_parallelism(scope, candidates, i):
                return self.callback(clusters[:i], prefix,
                                     clusters[i:] + backlog,
                                     candidates | known_break)

        # Compute iteration direction
        idir = {
            d: Backward
            for d in candidates if d.root in scope.d_anti.cause
        }
        if maybe_break:
            idir.update({
                d: Forward
                for d in candidates if d.root in scope.d_flow.cause
            })
        idir.update({d: Forward for d in candidates if d not in idir})

        # Enforce iteration direction on each Cluster
        processed = []
        for c in clusters:
            ispace = IterationSpace(c.ispace.intervals, c.ispace.sub_iterators,
                                    {
                                        **c.ispace.directions,
                                        **idir
                                    })
            processed.append(Cluster(c.exprs, ispace, c.dspace))

        if not backlog:
            return processed

        # Handle the backlog -- the Clusters characterized by flow- and anti-dependences
        # along one or more Dimensions
        idir = {d: Any for d in known_break}
        for i, c in enumerate(list(backlog)):
            ispace = IterationSpace(c.ispace.intervals.lift(known_break),
                                    c.ispace.sub_iterators, {
                                        **c.ispace.directions,
                                        **idir
                                    })
            dspace = c.dspace.lift(known_break)
            backlog[i] = Cluster(c.exprs, ispace, dspace)

        return processed + self.callback(backlog, prefix)
Example #15
0
    def callback(self, clusters, prefix, backlog=None, known_flow_break=None):
        if not prefix:
            return clusters

        # Take the innermost Dimension -- no other Clusters other than those in
        # `clusters` are supposed to share it
        candidates = prefix[-1].dim._defines

        scope = Scope(exprs=flatten(c.exprs for c in clusters))

        # The most nasty case:
        # eq0 := u[t+1, x] = ... u[t, x]
        # eq1 := v[t+1, x] = ... v[t, x] ... u[t, x] ... u[t+1, x] ... u[t+2, x]
        # Here, `eq0` marches forward along `t`, while `eq1` has both a flow and an
        # anti dependence with `eq0`, which ultimately will require `eq1` to go in
        # a separate t-loop
        require_flow_break = (scope.d_flow.cause
                              & scope.d_anti.cause) & candidates
        if require_flow_break and len(clusters) > 1:
            backlog = [clusters[-1]] + (backlog or [])
            # Try with increasingly smaller Cluster groups until the ambiguity is solved
            return self.callback(clusters[:-1], prefix, backlog,
                                 require_flow_break)

        # Compute iteration direction
        direction = {
            d: Backward
            for d in candidates if d.root in scope.d_anti.cause
        }
        direction.update(
            {d: Forward
             for d in candidates if d.root in scope.d_flow.cause})
        direction.update(
            {d: Forward
             for d in candidates if d not in direction})

        # Enforce iteration direction on each Cluster
        processed = []
        for c in clusters:
            ispace = IterationSpace(c.ispace.intervals, c.ispace.sub_iterators,
                                    {
                                        **c.ispace.directions,
                                        **direction
                                    })
            processed.append(Cluster(c.exprs, ispace, c.dspace))

        if backlog is None:
            return processed

        # Handle the backlog -- the Clusters characterized by flow+anti dependences along
        # one or more Dimensions
        direction = {d: Any for d in known_flow_break}
        for i, c in enumerate(as_tuple(backlog)):
            ispace = IterationSpace(c.ispace.intervals.lift(known_flow_break),
                                    c.ispace.sub_iterators, {
                                        **c.ispace.directions,
                                        **direction
                                    })
            backlog[i] = Cluster(c.exprs, ispace, c.dspace)

        return processed + self.callback(backlog, prefix)