def callback(self, clusters, prefix): if not prefix or len(clusters) == 1: return clusters d = prefix[-1].dim # Do not waste time if definitely illegal if any(SEQUENTIAL in c.properties[d] for c in clusters): return clusters # Do not waste time if definitely nothing to do if all(len(prefix) == len(c.itintervals) for c in clusters): return clusters # Analyze and abort if fissioning would break a dependence scope = Scope(flatten(c.exprs for c in clusters)) if any(d._defines & dep.cause or dep.is_reduce(d) for dep in scope.d_all_gen()): return clusters processed = [] for k, g in groupby(clusters, key=lambda c: self._key(c, len(prefix))): it, _ = k group = list(g) if any(SEQUENTIAL in c.properties[it.dim] for c in group): # Heuristic: no gain from fissioning if unable to ultimately # increase the number of collapsable iteration spaces, hence give up processed.extend(group) else: stamp = Stamp() for c in group: ispace = c.ispace.lift(d, stamp) dspace = c.dspace.lift(d, stamp) processed.append(c.rebuild(ispace=ispace, dspace=dspace)) return processed
def callback(self, clusters, prefix, backlog=None, known_break=None): if not prefix: return clusters known_break = known_break or set() backlog = backlog or [] # Take the innermost Dimension -- no other Clusters other than those in # `clusters` are supposed to share it candidates = prefix[-1].dim._defines scope = self._fetch_scope(clusters) # Handle the nastiest case -- ambiguity due to the presence of both a # flow- and an anti-dependence. # # Note: in most cases, `scope.d_anti.cause == {}` -- either because # `scope.d_anti == {}` or because the few anti dependences are not carried # in any Dimension. We exploit this observation so that we only compute # `d_flow`, which instead may be expensive, when strictly necessary maybe_break = scope.d_anti.cause & candidates if len(clusters) > 1 and maybe_break: require_break = scope.d_flow.cause & maybe_break if require_break: backlog = [clusters[-1]] + backlog # Try with increasingly smaller ClusterGroups until the ambiguity is gone return self.callback(clusters[:-1], prefix, backlog, require_break) # Schedule Clusters over different IterationSpaces if this increases parallelism for i in range(1, len(clusters)): if self._break_for_parallelism(scope, candidates, i): return self.callback(clusters[:i], prefix, clusters[i:] + backlog, candidates | known_break) # Compute iteration direction idir = { d: Backward for d in candidates if d.root in scope.d_anti.cause } if maybe_break: idir.update({ d: Forward for d in candidates if d.root in scope.d_flow.cause }) idir.update({d: Forward for d in candidates if d not in idir}) # Enforce iteration direction on each Cluster processed = [] for c in clusters: ispace = IterationSpace(c.ispace.intervals, c.ispace.sub_iterators, { **c.ispace.directions, **idir }) processed.append(c.rebuild(ispace=ispace)) if not backlog: return processed # Handle the backlog -- the Clusters characterized by flow- and anti-dependences # along one or more Dimensions idir = {d: Any for d in known_break} stamp = Stamp() for i, c in enumerate(list(backlog)): ispace = IterationSpace( c.ispace.intervals.lift(known_break, stamp), c.ispace.sub_iterators, { **c.ispace.directions, **idir }) backlog[i] = c.rebuild(ispace=ispace) return processed + self.callback(backlog, prefix)
def lift(self, v=None): if v is None: v = Stamp() return Interval(self.dim, self.lower, self.upper, v)
def lower_aliases(aliases, meta, maxpar): """ Create a Schedule from an AliasList. """ stampcache = {} dmapper = {} processed = [] for a in aliases: imapper = { **{i.dim: i for i in a.intervals}, **{ i.dim.parent: i for i in a.intervals if i.dim.is_NonlinearDerived } } intervals = [] writeto = [] sub_iterators = {} indicess = [[] for _ in a.distances] for i in meta.ispace: try: interval = imapper[i.dim] except KeyError: if i.dim in a.free_symbols: # Special case: the Dimension appears within the alias but # not as an Indexed index. Then, it needs to be addeed to # the `writeto` region too interval = i else: # E.g., `x0_blk0` or (`a[y_m+1]` => `y not in imapper`) intervals.append(i) continue if not (writeto or interval != interval.zero() or (maxpar and SEQUENTIAL not in meta.properties.get(i.dim))): # The alias doesn't require a temporary Dimension along i.dim intervals.append(i) continue assert not i.dim.is_NonlinearDerived # `i.dim` is necessarily part of the write-to region, so # we have to adjust the Interval's stamp. For example, consider # `i=x[0,0]<1>` and `interval=x[-4,4]<0>`; here we need to # use `<1>` as stamp, which is what appears in `ispace` interval = interval.lift(i.stamp) # We further bump the interval stamp if we were requested to trade # fusion for more collapse-parallelism if maxpar: stamp = stampcache.setdefault(interval.dim, Stamp()) interval = interval.lift(stamp) writeto.append(interval) intervals.append(interval) if i.dim.is_Incr: # Suitable IncrDimensions must be used to avoid OOB accesses. # E.g., r[xs][ys][z] => both `xs` and `ys` must be initialized such # that all accesses are within bounds. This requires traversing the # hierarchy of IncrDimensions to set `xs` (`ys`) in a way that # consecutive blocks access consecutive regions in `r` (e.g., # `xs=x0_blk1-x0_blk0` with `blocklevels=2`; `xs=0` with # `blocklevels=1`, that is it degenerates in this case) try: d = dmapper[i.dim] except KeyError: dd = i.dim.parent assert dd.is_Incr if dd.parent.is_Incr: # An IncrDimension in between IncrDimensions m = i.dim.symbolic_min - i.dim.parent.symbolic_min else: m = 0 d = dmapper[i.dim] = IncrDimension("%ss" % i.dim.name, i.dim, m, dd.symbolic_size, 1, dd.step) sub_iterators[i.dim] = d else: d = i.dim # Given the iteration `interval`, lower distances to indices for distance, indices in zip(a.distances, indicess): try: indices.append(d - interval.lower + distance[interval.dim]) except TypeError: indices.append(d) # The alias write-to space writeto = IterationSpace(IntervalGroup(writeto), sub_iterators) # The alias iteration space ispace = IterationSpace( IntervalGroup(intervals, meta.ispace.relations), meta.ispace.sub_iterators, meta.ispace.directions) ispace = ispace.augment(sub_iterators) processed.append( ScheduledAlias(a.pivot, writeto, ispace, a.aliaseds, indicess, a.score)) # The [ScheduledAliases] must be ordered so as to reuse as many of the # `ispace`'s IterationIntervals as possible in order to honor the # write-to region. Another fundamental reason for ordering is to ensure # deterministic code generation processed = sorted(processed, key=lambda i: cit(meta.ispace, i.ispace)) return Schedule(*processed, dmapper=dmapper)
from cached_property import cached_property from sympy import Expr from devito.ir.support.vector import Vector, vmin, vmax from devito.tools import (PartialOrderTuple, Stamp, as_list, as_tuple, filter_ordered, frozendict, is_integer, toposort) from devito.types import Dimension, ModuloDimension __all__ = [ 'NullInterval', 'Interval', 'IntervalGroup', 'IterationSpace', 'DataSpace', 'Forward', 'Backward', 'Any' ] # The default Stamp, used by all new Intervals S0 = Stamp() class AbstractInterval(object): """ An abstract representation of an iterated closed interval on Z. """ __metaclass__ = abc.ABCMeta is_Null = False is_Defined = False def __init__(self, dim, stamp=S0): self.dim = dim self.stamp = stamp