def callback(self, clusters, prefix): if not prefix: return clusters d = prefix[-1].dim processed = [] for c in clusters: if SKEWABLE not in c.properties[d]: return clusters skew_dims = { i.dim for i in c.ispace if SEQUENTIAL in c.properties[i.dim] } if len(skew_dims) > 1: return clusters skew_dim = skew_dims.pop() # Since we are here, prefix is skewable and nested under a SEQUENTIAL loop intervals = [] for i in c.ispace: if i.dim is d and (not d.is_Block or d._depth == 1): intervals.append(Interval(d, skew_dim, skew_dim)) else: intervals.append(i) intervals = IntervalGroup(intervals, relations=c.ispace.relations) ispace = IterationSpace(intervals, c.ispace.sub_iterators, c.ispace.directions) exprs = xreplace_indices(c.exprs, {d: d - skew_dim}) processed.append(c.rebuild(exprs=exprs, ispace=ispace)) return processed
def clusterize(exprs): """ Group a sequence of :class:`ir.Eq`s into one or more :class:`Cluster`s. """ clusters = ClusterGroup() flowmap = detect_flow_directions(exprs) prev = None for idx, e in enumerate(exprs): if e.is_Tensor: scalars = [i for i in exprs[prev:idx] if i.is_Scalar] # Iteration space ispace = IterationSpace.merge(e.ispace, *[i.ispace for i in scalars]) # Enforce iteration directions fdirs, _ = force_directions(flowmap, lambda d: ispace.directions.get(d)) ispace = IterationSpace(ispace.intervals, ispace.sub_iterators, fdirs) # Data space dspace = DataSpace.merge(e.dspace, *[i.dspace for i in scalars]) # Prepare for next range prev = idx clusters.append(PartialCluster(scalars + [e], ispace, dspace)) # Group PartialClusters together where possible clusters = groupby(clusters) # Introduce conditional PartialClusters clusters = guard(clusters) return clusters.finalize()
def ispace(self): """ Return the union of all Clusters' iteration spaces. """ if not self: return IterationSpace([]) return IterationSpace.generate('intersection', *[i.ispace for i in self])
def decompose(ispace, d, block_dims): """ Create a new IterationSpace in which the `d` Interval is decomposed into a hierarchy of Intervals over ``block_dims``. """ # Create the new Intervals intervals = [] for i in ispace: if i.dim is d: intervals.append(i.switch(block_dims[0])) intervals.extend([i.switch(bd).zero() for bd in block_dims[1:]]) else: intervals.append(i) # Create the relations. # Example: consider the relation `(t, x, y)` and assume we decompose `x` over # `xbb, xb, xi`; then we decompose the relation as two relations, `(t, xbb, y)` # and `(xbb, xb, xi)` relations = [block_dims] for r in ispace.intervals.relations: relations.append([block_dims[0] if i is d else i for i in r]) # The level of a given Dimension in the hierarchy of block Dimensions level = lambda dim: len([i for i in dim._defines if i.is_Incr]) # Add more relations for n, i in enumerate(ispace): if i.dim is d: continue elif i.dim.is_Incr: # Make sure IncrDimensions on the same level stick next to each other. # For example, we want `(t, xbb, ybb, xb, yb, x, y)`, rather than say # `(t, xbb, xb, x, ybb, ...)` for bd in block_dims: if level(i.dim) >= level(bd): relations.append([bd, i.dim]) else: relations.append([i.dim, bd]) elif n > ispace.intervals.index(d): # The non-Incr subsequent Dimensions must follow the block Dimensions for bd in block_dims: relations.append([bd, i.dim]) else: # All other Dimensions must precede the block Dimensions for bd in block_dims: relations.append([i.dim, bd]) intervals = IntervalGroup(intervals, relations=relations) sub_iterators = dict(ispace.sub_iterators) sub_iterators.pop(d, None) sub_iterators.update({bd: ispace.sub_iterators.get(d, []) for bd in block_dims}) directions = dict(ispace.directions) directions.pop(d) directions.update({bd: ispace.directions[d] for bd in block_dims}) return IterationSpace(intervals, sub_iterators, directions)
def __new__(cls, *args, **kwargs): # Parse input if len(args) == 1: input_expr = args[0] assert type(input_expr) != LoweredEq assert isinstance(input_expr, Eq) elif len(args) == 2: # Reconstructing from existing Eq. E.g., we end up here after xreplace stamp = kwargs.pop('stamp') expr = Eq.__new__(cls, *args, evaluate=False) assert isinstance(stamp, Eq) expr.is_Increment = stamp.is_Increment expr.ispace = stamp.ispace return expr else: raise ValueError("Cannot construct LoweredEq from args=%s " "and kwargs=%s" % (str(args), str(kwargs))) # Indexification expr = indexify(input_expr) # Apply caller-provided substitution subs = kwargs.get('subs') if subs is not None: expr = expr.xreplace(subs) # Well-defined dimension ordering ordering = dimension_sort(expr, key=lambda i: not i.is_Time) # Introduce space sub-dimensions if need to region = getattr(input_expr, '_region', DOMAIN) if region == INTERIOR: mapper = { i: SubDimension("%si" % i, i, 1, -1) for i in ordering if i.is_Space } expr = expr.xreplace(mapper) ordering = [mapper.get(i, i) for i in ordering] # Compute iteration space intervals, iterators = compute_intervals(expr) intervals = sorted(intervals, key=lambda i: ordering.index(i.dim)) directions, _ = compute_directions(expr, lambda i: Any) ispace = IterationSpace([i.negate() for i in intervals], iterators, directions) # Finally create the LoweredEq with all metadata attached expr = super(LoweredEq, cls).__new__(cls, expr.lhs, expr.rhs, evaluate=False) expr.is_Increment = getattr(input_expr, 'is_Increment', False) expr.ispace = ispace expr.dimensions = ordering expr.reads, expr.writes = detect_io(expr) return expr
def __new__(cls, *args, **kwargs): if len(args) == 1 and isinstance(args[0], LoweredEq): # origin: LoweredEq(devito.LoweredEq, **kwargs) input_expr = args[0] expr = Eq.__new__(cls, *input_expr.args, evaluate=False) for i in cls._state: setattr(expr, '_%s' % i, kwargs.get(i) or getattr(input_expr, i)) return expr elif len(args) == 1 and isinstance(args[0], Eq): # origin: LoweredEq(sympy.Eq) input_expr = expr = args[0] elif len(args) == 2: expr = Eq.__new__(cls, *args, evaluate=False) for i in cls._state: setattr(expr, '_%s' % i, kwargs.pop(i)) return expr else: raise ValueError("Cannot construct LoweredEq from args=%s " "and kwargs=%s" % (str(args), str(kwargs))) # Well-defined dimension ordering ordering = dimension_sort(expr) # Analyze the expression mapper = detect_accesses(expr) oobs = detect_oobs(mapper) conditionals = [i for i in ordering if i.is_Conditional] # The iteration space is constructed so that information always flows # from an iteration to another (i.e., no anti-dependences are created) directions, _ = force_directions(detect_flow_directions(expr), lambda i: Any) iterators = build_iterators(mapper) intervals = build_intervals(Stencil.union(*mapper.values())) intervals = IntervalGroup(intervals, relations=ordering.relations) ispace = IterationSpace(intervals.zero(), iterators, directions) # The data space is relative to the computational domain. Note that we # are deliberately dropping the intervals ordering (by turning `intervals` # into a list), as this is irrelevant (even more: dangerous) for data spaces intervals = [i if i.dim in oobs else i.zero() for i in intervals] intervals += [Interval(i, 0, 0) for i in ordering if i not in ispace.dimensions + conditionals] parts = {k: IntervalGroup(build_intervals(v)) for k, v in mapper.items() if k} dspace = DataSpace(intervals, parts) # Finally create the LoweredEq with all metadata attached expr = super(LoweredEq, cls).__new__(cls, expr.lhs, expr.rhs, evaluate=False) expr._is_Increment = getattr(input_expr, 'is_Increment', False) expr._dspace = dspace expr._ispace = ispace expr._conditionals = tuple(conditionals) expr._reads, expr._writes = detect_io(expr) return expr
def decompose(ispace, d, block_dims): """ Create a new IterationSpace in which the `d` Interval is decomposed into a hierarchy of Intervals over ``block_dims``. """ # Create the new Intervals intervals = [] for i in ispace: if i.dim is d: intervals.append(i.switch(block_dims[0])) intervals.extend([i.switch(bd).zero() for bd in block_dims[1:]]) else: intervals.append(i) # Create the intervals relations # 1: `bbd > bd > d` relations = [tuple(block_dims)] # 2: Suitably replace `d` with all `bd`'s for r in ispace.relations: if d not in r: relations.append(r) continue for bd in block_dims: # Avoid e.g. `x > yb` if any(i._depth < bd._depth for i in r if i.is_Block): continue relations.append(tuple(bd if i is d else i for i in r)) # 3: Make sure BlockDimensions at same depth stick next to each other # E.g., `(t, xbb, ybb, xb, yb, x, y)`, and NOT e.g. `(t, xbb, xb, x, ybb, ...)` # NOTE: this is perfectly legal since: # TILABLE => (perfect nest & PARALLEL) => interchangeable for i in ispace.itdimensions: if not i.is_Block: continue for bd in block_dims: if i._depth < bd._depth: relations.append((i, bd)) intervals = IntervalGroup(intervals, relations=relations) sub_iterators = dict(ispace.sub_iterators) sub_iterators.pop(d, None) sub_iterators.update({bd: () for bd in block_dims[:-1]}) sub_iterators.update({block_dims[-1]: ispace.sub_iterators[d]}) directions = dict(ispace.directions) directions.pop(d) directions.update({bd: ispace.directions[d] for bd in block_dims}) return IterationSpace(intervals, sub_iterators, directions)
def decompose(ispace, d, block_dims): """ Create a new IterationSpace in which the `d` Interval is decomposed into a hierarchy of Intervals over ``block_dims``. """ # Create the new Intervals intervals = [] for i in ispace.intervals: if i.dim is d: intervals.append(i.switch(block_dims[0])) intervals.extend([i.switch(bd).zero() for bd in block_dims[1:]]) else: intervals.append(i) # Create the new "decomposed" relations. # Example: consider the relation `(t, x, y)` and assume we decompose `x` over # `xbb, xb, xi`; then we decompose the relation as two relations, `(t, xbb, y)` # and `(xbb, xb, xi)` relations = [block_dims] for r in ispace.intervals.relations: relations.append([block_dims[0] if i is d else i for i in r]) # Further, if there are other IncrDimensions, add relations such that # IncrDimensions at the same level stick together, thus we obtain for # example `(t, xbb, ybb, xb, yb, x, y)` instead of `(t, xbb, xb, x, ybb, ...)` for i in intervals: if not isinstance(i.dim, IncrDimension): continue for bd in block_dims: if bd._defines & i.dim._defines: break if len(i.dim._defines) > len(bd._defines): relations.append([bd, i.dim]) intervals = IntervalGroup(intervals, relations=relations) sub_iterators = dict(ispace.sub_iterators) sub_iterators.pop(d, None) sub_iterators.update( {bd: ispace.sub_iterators.get(d, []) for bd in block_dims}) directions = dict(ispace.directions) directions.pop(d) directions.update({bd: ispace.directions[d] for bd in block_dims}) return IterationSpace(intervals, sub_iterators, directions)
def clusterize(exprs): """Group a sequence of :class:`ir.Eq`s into one or more :class:`Cluster`s.""" # Group expressions based on data dependences groups = group_expressions(exprs) clusters = ClusterGroup() for g in groups: # Coerce iteration space of each expression in each group mapper = OrderedDict([(e, e.ispace) for e in g]) flowmap = detect_flow_directions(g) queue = list(g) while queue: v = queue.pop(0) intervals, sub_iterators, directions = mapper[v].args forced, clashes = force_directions(flowmap, lambda i: directions.get(i)) for e in g: intervals = intervals.intersection( mapper[e].intervals.drop(clashes)) directions = {i: forced[i] for i in directions} coerced_ispace = IterationSpace(intervals, sub_iterators, directions) # Need update propagation ? if coerced_ispace != mapper[v]: mapper[v] = coerced_ispace queue.extend([i for i in g if i not in queue]) # Wrap each tensor expression in a PartialCluster for k, v in mapper.items(): if k.is_Tensor: scalars = [i for i in g[:g.index(k)] if i.is_Scalar] clusters.append(PartialCluster(scalars + [k], v)) # Group PartialClusters together where possible clusters = groupby(clusters) # Introduce conditional PartialClusters clusters = guard(clusters) return clusters.finalize()
def __init__(self, exprs, ispace=None, guards=None, properties=None, syncs=None): ispace = ispace or IterationSpace([]) self._exprs = tuple( ClusterizedEq(e, ispace=ispace) for e in as_tuple(exprs)) self._ispace = ispace self._guards = frozendict(guards or {}) self._syncs = frozendict(syncs or {}) properties = dict(properties or {}) properties.update( {i.dim: properties.get(i.dim, set()) for i in ispace.intervals}) self._properties = frozendict(properties)
def callback(self, clusters, prefix): if not prefix: return clusters d = prefix[-1].dim processed = [] for c in clusters: if SKEWABLE not in c.properties[d]: return clusters if d is c.ispace[-1].dim and not self.skewinner: return clusters skew_dims = {i.dim for i in c.ispace if SEQUENTIAL in c.properties[i.dim]} if len(skew_dims) > 1: return clusters skew_dim = skew_dims.pop() # The level of a given Dimension in the hierarchy of block Dimensions, used # to skew over the outer level of loops. level = lambda dim: len([i for i in dim._defines if i.is_Incr]) # Since we are here, prefix is skewable and nested under a # SEQUENTIAL loop. intervals = [] for i in c.ispace: if i.dim is d and level(d) <= 1: # Skew only at level 0 or 1 intervals.append(Interval(d, skew_dim, skew_dim)) else: intervals.append(i) intervals = IntervalGroup(intervals, relations=c.ispace.relations) ispace = IterationSpace(intervals, c.ispace.sub_iterators, c.ispace.directions) exprs = xreplace_indices(c.exprs, {d: d - skew_dim}) processed.append(c.rebuild(exprs=exprs, ispace=ispace, properties=c.properties)) return processed
def __new__(cls, input_expr, subs=None): # Sanity check assert type(input_expr) != LoweredEq assert isinstance(input_expr, Eq) # Indexification expr = indexify(input_expr) # Apply caller-provided substitution if subs is not None: expr = expr.xreplace(subs) expr = super(LoweredEq, cls).__new__(cls, expr.lhs, expr.rhs, evaluate=False) expr.is_Increment = getattr(input_expr, 'is_Increment', False) # Get the accessed data points stencil = Stencil(expr) # Well-defined dimension ordering ordering = dimension_sort(expr, key=lambda i: not i.is_Time) # Split actual Intervals (the data spaces) from the "derived" iterators, # to build an IterationSpace iterators = OrderedDict() for i in ordering: if i.is_Derived: iterators.setdefault(i.parent, []).append(stencil.entry(i)) else: iterators.setdefault(i, []) intervals = [] for k, v in iterators.items(): offs = set.union(set(stencil.get(k)), *[i.ofs for i in v]) intervals.append(Interval(k, min(offs), max(offs)).negate()) expr.ispace = IterationSpace(intervals, iterators) return expr
def clusterize(exprs): """Group a sequence of :class:`ir.Eq`s into one or more :class:`Cluster`s.""" # Group expressions based on data dependences groups = group_expressions(exprs) clusters = ClusterGroup() # Coerce iteration direction of each expression in each group for g in groups: mapper = OrderedDict([(e, e.directions) for e in g]) flowmap = detect_flow_directions(g) queue = list(g) while queue: k = queue.pop(0) directions, _ = force_directions(flowmap, lambda i: mapper[k].get(i)) directions = {i: directions[i] for i in mapper[k]} # Need update propagation ? if directions != mapper[k]: mapper[k] = directions queue.extend([i for i in g if i not in queue]) # Wrap each tensor expression in a PartialCluster for k, v in mapper.items(): if k.is_Tensor: scalars = [i for i in g[:g.index(k)] if i.is_Scalar] intervals, sub_iterators, _ = k.ispace.args ispace = IterationSpace(intervals, sub_iterators, v) clusters.append(PartialCluster(scalars + [k], ispace, k.dspace)) # Group PartialClusters together where possible clusters = groupby(clusters) # Introduce conditional PartialClusters clusters = guard(clusters) return clusters.finalize()
def clusterize(exprs): """Group a sequence of LoweredEqs into one or more Clusters.""" clusters = ClusterGroup() # Wrap each LoweredEq in `exprs` within a PartialCluster. The PartialCluster's # iteration direction is enforced based on the iteration direction of the # surrounding LoweredEqs flowmap = detect_flow_directions(exprs) for e in exprs: directions, _ = force_directions(flowmap, lambda d: e.ispace.directions.get(d)) ispace = IterationSpace(e.ispace.intervals, e.ispace.sub_iterators, directions) clusters.append(PartialCluster(e, ispace, e.dspace)) # Group PartialClusters together where possible clusters = groupby(clusters) # Introduce conditional PartialClusters clusters = guard(clusters) return clusters.finalize()
def __new__(cls, *args, **kwargs): if len(args) == 1 and isinstance(args[0], LoweredEq): # origin: LoweredEq(devito.LoweredEq, **kwargs) input_expr = args[0] expr = sympy.Eq.__new__(cls, *input_expr.args, evaluate=False) for i in cls._state: setattr(expr, '_%s' % i, kwargs.get(i) or getattr(input_expr, i)) return expr elif len(args) == 1 and isinstance(args[0], Eq): # origin: LoweredEq(devito.Eq) input_expr = expr = args[0] elif len(args) == 2: expr = sympy.Eq.__new__(cls, *args, evaluate=False) for i in cls._state: setattr(expr, '_%s' % i, kwargs.pop(i)) return expr else: raise ValueError("Cannot construct LoweredEq from args=%s " "and kwargs=%s" % (str(args), str(kwargs))) # Well-defined dimension ordering ordering = dimension_sort(expr) # Analyze the expression mapper = detect_accesses(expr) oobs = detect_oobs(mapper) conditionals = [i for i in ordering if i.is_Conditional] # Construct Intervals for IterationSpace and DataSpace intervals = build_intervals(Stencil.union(*mapper.values())) iintervals = [] # iteration Intervals dintervals = [] # data Intervals for i in intervals: d = i.dim if d in oobs: iintervals.append(i.zero()) dintervals.append(i) else: iintervals.append(i.zero()) dintervals.append(i.zero()) # Construct the IterationSpace iintervals = IntervalGroup(iintervals, relations=ordering.relations) iterators = build_iterators(mapper) ispace = IterationSpace(iintervals, iterators) # Construct the DataSpace dintervals.extend([ Interval(i, 0, 0) for i in ordering if i not in ispace.dimensions + conditionals ]) parts = { k: IntervalGroup(build_intervals(v)).add(iintervals) for k, v in mapper.items() if k } dspace = DataSpace(dintervals, parts) # Lower all Differentiable operations into SymPy operations rhs = diff2sympy(expr.rhs) # Finally create the LoweredEq with all metadata attached expr = super(LoweredEq, cls).__new__(cls, expr.lhs, rhs, evaluate=False) expr._dspace = dspace expr._ispace = ispace expr._conditionals = tuple(conditionals) expr._reads, expr._writes = detect_io(expr) expr._is_Increment = input_expr.is_Increment expr._implicit_dims = input_expr.implicit_dims return expr
def __new__(cls, *args, **kwargs): if len(args) == 1 and isinstance(args[0], LoweredEq): # origin: LoweredEq(devito.LoweredEq, **kwargs) input_expr = args[0] expr = Eq.__new__(cls, *input_expr.args, evaluate=False) for i in cls._state: setattr(expr, '_%s' % i, kwargs.get(i) or getattr(input_expr, i)) return expr elif len(args) == 1 and isinstance(args[0], Eq): # origin: LoweredEq(sympy.Eq) input_expr = expr = args[0] elif len(args) == 2: expr = Eq.__new__(cls, *args, evaluate=False) for i in cls._state: setattr(expr, '_%s' % i, kwargs.pop(i)) return expr else: raise ValueError("Cannot construct LoweredEq from args=%s " "and kwargs=%s" % (str(args), str(kwargs))) # Well-defined dimension ordering ordering = dimension_sort(expr, key=lambda i: not i.is_Time) # Introduce space sub-dimensions if need to region = getattr(input_expr, '_region', DOMAIN) if region == INTERIOR: mapper = { i: SubDimension.middle("%si" % i, i, 1, 1) for i in ordering if i.is_Space } expr = expr.xreplace(mapper) for k, v in mapper.items(): ordering.insert(ordering.index(k) + 1, v) # Analyze the expression mapper = detect_accesses(expr) oobs = detect_oobs(mapper) # The iteration space is constructed so that information always flows # from an iteration to another (i.e., no anti-dependences are created) directions, _ = force_directions(detect_flow_directions(expr), lambda i: Any) iterators = build_iterators(mapper) intervals = build_intervals(Stencil.union(*mapper.values())) intervals = sorted(intervals, key=lambda i: ordering.index(i.dim)) ispace = IterationSpace([i.zero() for i in intervals], iterators, directions) # The data space is relative to the computational domain intervals = [i if i.dim in oobs else i.zero() for i in intervals] intervals += [ Interval(i, 0, 0) for i in ordering if i not in ispace.dimensions ] parts = { k: IntervalGroup(build_intervals(v)) for k, v in mapper.items() if k } dspace = DataSpace(intervals, parts) # Finally create the LoweredEq with all metadata attached expr = super(LoweredEq, cls).__new__(cls, expr.lhs, expr.rhs, evaluate=False) expr._is_Increment = getattr(input_expr, 'is_Increment', False) expr._dspace = dspace expr._ispace = ispace expr._reads, expr._writes = detect_io(expr) return expr
def __new__(cls, *args, **kwargs): # Parse input if len(args) == 1: input_expr = args[0] assert type(input_expr) != LoweredEq assert isinstance(input_expr, Eq) elif len(args) == 2: # Reconstructing from existing Eq. E.g., we end up here after xreplace expr = super(Eq, cls).__new__(cls, *args, evaluate=False) stamp = kwargs.get('stamp') assert isinstance(stamp, Eq) expr.is_Increment = stamp.is_Increment expr.dspace = stamp.dspace expr.ispace = stamp.ispace return expr else: raise ValueError("Cannot construct Eq from args=%s " "and kwargs=%s" % (str(args), str(kwargs))) # Indexification expr = indexify(input_expr) # Apply caller-provided substitution subs = kwargs.get('subs') if subs is not None: expr = expr.xreplace(subs) # Well-defined dimension ordering ordering = dimension_sort(expr, key=lambda i: not i.is_Time) # Introduce space sub-dimensions if need to region = getattr(input_expr, '_region', DOMAIN) if region == INTERIOR: mapper = { i: SubDimension("%si" % i, i, 1, -1) for i in ordering if i.is_Space } expr = expr.xreplace(mapper) ordering = [mapper.get(i, i) for i in ordering] # Get the accessed data points stencil = Stencil(expr) # Split actual Intervals (the data spaces) from the "derived" iterators, # to build an IterationSpace iterators = OrderedDict() for i in ordering: if i.is_Stepping: iterators.setdefault(i.parent, []).append(stencil.entry(i)) else: iterators.setdefault(i, []) intervals = [] for k, v in iterators.items(): offs = set.union(set(stencil.get(k)), *[i.ofs for i in v]) intervals.append(Interval(k, min(offs), max(offs))) # Finally create the LoweredEq with all metadata attached expr = super(LoweredEq, cls).__new__(cls, expr.lhs, expr.rhs, evaluate=False) expr.is_Increment = getattr(input_expr, 'is_Increment', False) expr.dspace = DataSpace(intervals) expr.ispace = IterationSpace([i.negate() for i in intervals], iterators) return expr
def callback(self, clusters, prefix, backlog=None, known_break=None): if not prefix: return clusters known_break = known_break or set() backlog = backlog or [] # Take the innermost Dimension -- no other Clusters other than those in # `clusters` are supposed to share it candidates = prefix[-1].dim._defines scope = Scope(exprs=flatten(c.exprs for c in clusters)) # The nastiest case: # eq0 := u[t+1, x] = ... u[t, x] # eq1 := v[t+1, x] = ... v[t, x] ... u[t, x] ... u[t+1, x] ... u[t+2, x] # Here, `eq0` marches forward along `t`, while `eq1` has both a flow and an # anti dependence with `eq0`, which ultimately will require `eq1` to go in # a separate t-loop require_break = (scope.d_flow.cause & scope.d_anti.cause) & candidates if require_break and len(clusters) > 1: backlog = [clusters[-1]] + backlog # Try with increasingly smaller Cluster groups until the ambiguity is solved return self.callback(clusters[:-1], prefix, backlog, require_break) # If the flow- or anti-dependences are not coupled, one or more Clusters # might be scheduled separately, to increase parallelism (this is basically # what low-level compilers call "loop fission") for n, _ in enumerate(clusters): d_cross = scope.d_from_access(scope.a_query(n, 'R')).cross() if any(d.is_storage_volatile(candidates) for d in d_cross): break elif d_cross.cause & candidates: if n > 0: return self.callback( clusters[:n], prefix, clusters[n:] + backlog, (d_cross.cause & candidates) | known_break) break # Compute iteration direction direction = { d: Backward for d in candidates if d.root in scope.d_anti.cause } direction.update( {d: Forward for d in candidates if d.root in scope.d_flow.cause}) direction.update( {d: Forward for d in candidates if d not in direction}) # Enforce iteration direction on each Cluster processed = [] for c in clusters: ispace = IterationSpace(c.ispace.intervals, c.ispace.sub_iterators, { **c.ispace.directions, **direction }) processed.append(Cluster(c.exprs, ispace, c.dspace)) if not backlog: return processed # Handle the backlog -- the Clusters characterized by flow- and anti-dependences # along one or more Dimensions direction = {d: Any for d in known_break} for i, c in enumerate(list(backlog)): ispace = IterationSpace(c.ispace.intervals.lift(known_break), c.ispace.sub_iterators, { **c.ispace.directions, **direction }) dspace = c.dspace.lift(known_break) backlog[i] = Cluster(c.exprs, ispace, dspace) return processed + self.callback(backlog, prefix)
def __new__(cls, *args, **kwargs): if len(args) == 1 and isinstance(args[0], LoweredEq): # origin: LoweredEq(devito.LoweredEq, **kwargs) input_expr = args[0] expr = sympy.Eq.__new__(cls, *input_expr.args, evaluate=False) for i in cls._state: setattr(expr, '_%s' % i, kwargs.get(i) or getattr(input_expr, i)) return expr elif len(args) == 1 and isinstance(args[0], Eq): # origin: LoweredEq(devito.Eq) input_expr = expr = args[0] elif len(args) == 2: expr = sympy.Eq.__new__(cls, *args, evaluate=False) for i in cls._state: setattr(expr, '_%s' % i, kwargs.pop(i)) return expr else: raise ValueError("Cannot construct LoweredEq from args=%s " "and kwargs=%s" % (str(args), str(kwargs))) # Well-defined dimension ordering ordering = dimension_sort(expr) # Analyze the expression accesses = detect_accesses(expr) dimensions = Stencil.union(*accesses.values()) # Separate out the SubIterators from the main iteration Dimensions, that # is those which define an actual iteration space iterators = {} for d in dimensions: if d.is_SubIterator: iterators.setdefault(d.root, set()).add(d) elif d.is_Conditional: # Use `parent`, and `root`, because a ConditionalDimension may # have a SubDimension as parent iterators.setdefault(d.parent, set()) else: iterators.setdefault(d, set()) # Construct the IterationSpace intervals = IntervalGroup([Interval(d, 0, 0) for d in iterators], relations=ordering.relations) ispace = IterationSpace(intervals, iterators) # Construct the conditionals and replace the ConditionalDimensions in `expr` conditionals = {} for d in ordering: if not d.is_Conditional: continue if d.condition is None: conditionals[d] = GuardFactor(d) else: conditionals[d] = diff2sympy(lower_exprs(d.condition)) if d.factor is not None: expr = uxreplace(expr, {d: IntDiv(d.index, d.factor)}) conditionals = frozendict(conditionals) # Lower all Differentiable operations into SymPy operations rhs = diff2sympy(expr.rhs) # Finally create the LoweredEq with all metadata attached expr = super(LoweredEq, cls).__new__(cls, expr.lhs, rhs, evaluate=False) expr._ispace = ispace expr._conditionals = conditionals expr._reads, expr._writes = detect_io(expr) expr._is_Increment = input_expr.is_Increment expr._implicit_dims = input_expr.implicit_dims return expr
def callback(self, clusters, prefix): if not prefix: return clusters d = prefix[-1].dim subiters = flatten( [c.ispace.sub_iterators.get(d, []) for c in clusters]) subiters = {i for i in subiters if i.is_Stepping} if not subiters: return clusters # Collect the index access functions along `d`, e.g., `t + 1` where `t` is # a SteppingDimension for `d = time` mapper = DefaultOrderedDict(lambda: DefaultOrderedDict(set)) for c in clusters: indexeds = [ a.indexed for a in c.scope.accesses if a.function.is_Tensor ] for i in indexeds: try: iaf = i.indices[d] except KeyError: continue # Sanity checks sis = iaf.free_symbols & subiters if len(sis) == 0: continue elif len(sis) == 1: si = sis.pop() else: raise InvalidOperator( "Cannot use multiple SteppingDimensions " "to index into a Function") size = i.function.shape_allocated[d] assert is_integer(size) mapper[size][si].add(iaf) # Construct the ModuloDimensions mds = [] for size, v in mapper.items(): for si, iafs in list(v.items()): # Offsets are sorted so that the semantic order (t0, t1, t2) follows # SymPy's index ordering (t, t-1, t+1) afer modulo replacement so # that associativity errors are consistent. This corresponds to # sorting offsets {-1, 0, 1} as {0, -1, 1} assigning -inf to 0 siafs = sorted(iafs, key=lambda i: -np.inf if i - si == 0 else (i - si)) for iaf in siafs: name = '%s%d' % (si.name, len(mds)) offset = uxreplace(iaf, {si: d.root}) mds.append( ModuloDimension(name, si, offset, size, origin=iaf)) # Replacement rule for ModuloDimensions def rule(size, e): try: return e.function.shape_allocated[d] == size except (AttributeError, KeyError): return False # Reconstruct the Clusters processed = [] for c in clusters: # Apply substitutions to expressions # Note: In an expression, there could be `u[t+1, ...]` and `v[t+1, # ...]`, where `u` and `v` are TimeFunction with circular time # buffers (save=None) *but* different modulo extent. The `t+1` # indices above are therefore conceptually different, so they will # be replaced with the proper ModuloDimension through two different # calls to `xreplace_indices` exprs = c.exprs groups = as_mapper(mds, lambda d: d.modulo) for size, v in groups.items(): mapper = {md.origin: md for md in v} func = partial(xreplace_indices, mapper=mapper, key=partial(rule, size)) exprs = [e.apply(func) for e in exprs] # Augment IterationSpace ispace = IterationSpace(c.ispace.intervals, { **c.ispace.sub_iterators, **{ d: tuple(mds) } }, c.ispace.directions) processed.append(c.rebuild(exprs=exprs, ispace=ispace)) return processed
def callback(self, clusters, prefix, backlog=None, known_flow_break=None): if not prefix: return clusters # Take the innermost Dimension -- no other Clusters other than those in # `clusters` are supposed to share it candidates = prefix[-1].dim._defines scope = Scope(exprs=flatten(c.exprs for c in clusters)) # The most nasty case: # eq0 := u[t+1, x] = ... u[t, x] # eq1 := v[t+1, x] = ... v[t, x] ... u[t, x] ... u[t+1, x] ... u[t+2, x] # Here, `eq0` marches forward along `t`, while `eq1` has both a flow and an # anti dependence with `eq0`, which ultimately will require `eq1` to go in # a separate t-loop require_flow_break = (scope.d_flow.cause & scope.d_anti.cause) & candidates if require_flow_break and len(clusters) > 1: backlog = [clusters[-1]] + (backlog or []) # Try with increasingly smaller Cluster groups until the ambiguity is solved return self.callback(clusters[:-1], prefix, backlog, require_flow_break) # Compute iteration direction direction = { d: Backward for d in candidates if d.root in scope.d_anti.cause } direction.update( {d: Forward for d in candidates if d.root in scope.d_flow.cause}) direction.update( {d: Forward for d in candidates if d not in direction}) # Enforce iteration direction on each Cluster processed = [] for c in clusters: ispace = IterationSpace(c.ispace.intervals, c.ispace.sub_iterators, { **c.ispace.directions, **direction }) processed.append(Cluster(c.exprs, ispace, c.dspace)) if backlog is None: return processed # Handle the backlog -- the Clusters characterized by flow+anti dependences along # one or more Dimensions direction = {d: Any for d in known_flow_break} for i, c in enumerate(as_tuple(backlog)): ispace = IterationSpace(c.ispace.intervals.lift(known_flow_break), c.ispace.sub_iterators, { **c.ispace.directions, **direction }) backlog[i] = Cluster(c.exprs, ispace, c.dspace) return processed + self.callback(backlog, prefix)
def callback(self, clusters, prefix, backlog=None, known_break=None): if not prefix: return clusters known_break = known_break or set() backlog = backlog or [] # Take the innermost Dimension -- no other Clusters other than those in # `clusters` are supposed to share it candidates = prefix[-1].dim._defines scope = Scope(exprs=flatten(c.exprs for c in clusters)) # Handle the nastiest case -- ambiguity due to the presence of both a # flow- and an anti-dependence. # # Note: in most cases, `scope.d_anti.cause == {}` -- either because # `scope.d_anti == {}` or because the few anti dependences are not carried # in any Dimension. We exploit this observation so that we only compute # `d_flow`, which instead may be expensive, when strictly necessary maybe_break = scope.d_anti.cause & candidates if len(clusters) > 1 and maybe_break: require_break = scope.d_flow.cause & maybe_break if require_break: backlog = [clusters[-1]] + backlog # Try with increasingly smaller ClusterGroups until the ambiguity is gone return self.callback(clusters[:-1], prefix, backlog, require_break) # Schedule Clusters over different IterationSpaces if this increases parallelism for i in range(1, len(clusters)): if self._break_for_parallelism(scope, candidates, i): return self.callback(clusters[:i], prefix, clusters[i:] + backlog, candidates | known_break) # Compute iteration direction idir = { d: Backward for d in candidates if d.root in scope.d_anti.cause } if maybe_break: idir.update({ d: Forward for d in candidates if d.root in scope.d_flow.cause }) idir.update({d: Forward for d in candidates if d not in idir}) # Enforce iteration direction on each Cluster processed = [] for c in clusters: ispace = IterationSpace(c.ispace.intervals, c.ispace.sub_iterators, { **c.ispace.directions, **idir }) processed.append(c.rebuild(ispace=ispace)) if not backlog: return processed # Handle the backlog -- the Clusters characterized by flow- and anti-dependences # along one or more Dimensions idir = {d: Any for d in known_break} for i, c in enumerate(list(backlog)): ispace = IterationSpace(c.ispace.intervals.lift(known_break), c.ispace.sub_iterators, { **c.ispace.directions, **idir }) dspace = c.dspace.lift(known_break) backlog[i] = c.rebuild(ispace=ispace, dspace=dspace) return processed + self.callback(backlog, prefix)
def __new__(cls, *args, **kwargs): if len(args) == 1: # origin: LoweredEq(expr) expr = input_expr = args[0] assert not isinstance(expr, LoweredEq) and isinstance(expr, Eq) elif len(args) == 2: # origin: LoweredEq(lhs, rhs, stamp=...) stamp = kwargs.pop('stamp') expr = Eq.__new__(cls, *args, evaluate=False) assert isinstance(stamp, Eq) expr.is_Increment = stamp.is_Increment expr._ispace, expr._dspace = stamp.ispace, stamp.dspace expr.reads, expr.writes = stamp.reads, stamp.writes return expr elif len(args) == 5: # origin: LoweredEq(expr, ispace, space) input_expr, ispace, dspace, reads, writes = args assert isinstance(ispace, IterationSpace) and isinstance( dspace, DataSpace) expr = Eq.__new__(cls, *input_expr.args, evaluate=False) expr.is_Increment = input_expr.is_Increment expr._ispace, expr._dspace = ispace, dspace expr.reads, expr.writes = reads, writes return expr else: raise ValueError("Cannot construct LoweredEq from args=%s " "and kwargs=%s" % (str(args), str(kwargs))) # Well-defined dimension ordering ordering = dimension_sort(expr, key=lambda i: not i.is_Time) # Introduce space sub-dimensions if need to region = getattr(input_expr, '_region', DOMAIN) if region == INTERIOR: mapper = { i: SubDimension("%si" % i, i, 1, -1) for i in ordering if i.is_Space } expr = expr.xreplace(mapper) ordering = [mapper.get(i, i) for i in ordering] # Analyze data accesses mapper = detect_accesses(expr) oobs = detect_oobs(mapper) # The iteration space is constructed so that information always flows # from an iteration to another (i.e., no anti-dependences are created) directions, _ = force_directions(detect_flow_directions(expr), lambda i: Any) intervals, iterators = build_intervals(mapper) intervals = sorted(intervals, key=lambda i: ordering.index(i.dim)) ispace = IterationSpace([i.zero() for i in intervals], iterators, directions) # The data space is relative to the computational domain intervals = [i if i.dim in oobs else i.zero() for i in intervals] intervals += [ Interval(i, 0, 0) for i in ordering if i not in ispace.dimensions ] parts = { k: IntervalGroup(Interval(i, min(j), max(j)) for i, j in v.items()) for k, v in mapper.items() } dspace = DataSpace(intervals, parts) # Finally create the LoweredEq with all metadata attached expr = super(LoweredEq, cls).__new__(cls, expr.lhs, expr.rhs, evaluate=False) expr.is_Increment = getattr(input_expr, 'is_Increment', False) expr._dspace = dspace expr._ispace = ispace expr.reads, expr.writes = detect_io(expr) return expr