def written(self): """ The `written` IterationSpace, that is the iteration space that must be iterated over in order to read all of the written buffer values. """ intervals = [] sub_iterators = {} directions = {} for dd in self.function.dimensions: d = dd.xreplace(self.subdims_mapper) try: interval, si, direction = self.itintervals_mapper[d] except KeyError: # E.g., d=time_sub assert d.is_NonlinearDerived d = d.root interval, si, direction = self.itintervals_mapper[d] intervals.append(interval) sub_iterators[d] = si + as_tuple(self.sub_iterators[d]) directions[d] = direction relations = (tuple(i.dim for i in intervals),) intervals = IntervalGroup(intervals, relations=relations) return IterationSpace(intervals, sub_iterators, directions)
def writeto(self): """ The `writeto` IterationSpace, that is the iteration space that must be iterated over in order to initialize the buffer. """ intervals = [] sub_iterators = {} directions = {} for d in self.buffer.dimensions: try: interval, si, direction = self.itintervals_mapper[d] except KeyError: # E.g., the contraction Dimension `db0` assert d in self.contraction_mapper.values() interval, si, direction = Interval(d, 0, 0), (), Forward intervals.append(interval) sub_iterators[d] = si directions[d] = direction relations = (self.buffer.dimensions,) intervals = IntervalGroup(intervals, relations=relations) return IterationSpace(intervals, sub_iterators, directions)
def _eliminate_inter_stencil_redundancies(self, cluster, template, **kwargs): """ Search aliasing expressions and capture them into vector temporaries. Examples -------- 1) temp = (a[x,y,z]+b[x,y,z])*c[t,x,y,z] >>> ti[x,y,z] = a[x,y,z] + b[x,y,z] temp = ti[x,y,z]*c[t,x,y,z] 2) temp1 = 2.0*a[x,y,z]*b[x,y,z] temp2 = 3.0*a[x,y,z+1]*b[x,y,z+1] >>> ti[x,y,z] = a[x,y,z]*b[x,y,z] temp1 = 2.0*ti[x,y,z] temp2 = 3.0*ti[x,y,z+1] """ # For more information about "aliases", refer to collect.__doc__ aliases = collect(cluster.exprs) # Redundancies will be stored in space-varying temporaries graph = FlowGraph(cluster.exprs) time_invariants = { v.rhs: graph.time_invariant(v) for v in graph.values() } # Find the candidate expressions processed = [] candidates = OrderedDict() for k, v in graph.items(): # Cost check (to keep the memory footprint under control) naliases = len(aliases.get(v.rhs)) cost = estimate_cost(v, True) * naliases test0 = lambda: cost >= self.MIN_COST_ALIAS and naliases > 1 test1 = lambda: cost >= self.MIN_COST_ALIAS_INV and time_invariants[ v.rhs] if test0() or test1(): candidates[v.rhs] = k else: processed.append(v) # Create alias Clusters and all necessary substitution rules # for the new temporaries alias_clusters = [] subs = {} for origin, alias in aliases.items(): if all(i not in candidates for i in alias.aliased): continue # The write-to Intervals writeto = [ Interval(i.dim, *alias.relaxed_diameter.get(i.dim, (0, 0))) for i in cluster.ispace.intervals if not i.dim.is_Time ] writeto = IntervalGroup(writeto) # Optimization: no need to retain a SpaceDimension if it does not # induce a flow/anti dependence (below, `i.offsets` captures this, by # telling how much halo will be needed to honour such dependences) dep_inducing = [i for i in writeto if any(i.offsets)] try: index = writeto.index(dep_inducing[0]) writeto = IntervalGroup(writeto[index:]) except IndexError: warning("Couldn't optimize some of the detected redundancies") # Create a temporary to store `alias` dimensions = [d.root for d in writeto.dimensions] halo = [(abs(i.lower), abs(i.upper)) for i in writeto] array = Array(name=template(), dimensions=dimensions, halo=halo, dtype=cluster.dtype) # Build up the expression evaluating `alias` access = tuple(i.dim - i.lower for i in writeto) expression = Eq(array[access], origin) # Create the substitution rules so that we can use the newly created # temporary in place of the aliasing expressions for aliased, distance in alias.with_distance: assert all(i.dim in distance.labels for i in writeto) access = [i.dim - i.lower + distance[i.dim] for i in writeto] if aliased in candidates: # It would *not* be in `candidates` if part of a composite alias subs[candidates[aliased]] = array[access] subs[aliased] = array[access] # Construct the `alias` IterationSpace intervals, sub_iterators, directions = cluster.ispace.args ispace = IterationSpace(intervals.add(writeto), sub_iterators, directions) # Construct the `alias` DataSpace mapper = detect_accesses(expression) parts = { k: IntervalGroup(build_intervals(v)).add(ispace.intervals) for k, v in mapper.items() if k } dspace = DataSpace(cluster.dspace.intervals, parts) # Create a new Cluster for `alias` alias_clusters.append(Cluster([expression], ispace, dspace)) # Switch temporaries in the expression trees processed = [e.xreplace(subs) for e in processed] return alias_clusters + [cluster.rebuild(processed)]
def _optimize_schedule_rotations(schedule, sregistry): """ Transform the schedule such that the tensor temporaries "rotate" along the outermost Dimension. This trades a parallel Dimension for a smaller working set size. """ # The rotations Dimension is the outermost ridx = 0 rmapper = defaultdict(list) processed = [] for k, group in groupby(schedule, key=lambda i: i.writeto): g = list(group) candidate = k[ridx] d = candidate.dim try: ds = schedule.dmapper[d] except KeyError: # Can't do anything if `d` isn't an IncrDimension over a block processed.extend(g) continue n = candidate.min_size assert n > 0 iis = candidate.lower iib = candidate.upper ii = ModuloDimension('%sii' % d, ds, iis, incr=iib) cd = CustomDimension(name='%s%s' % (d, d), symbolic_min=ii, symbolic_max=iib, symbolic_size=n) dsi = ModuloDimension('%si' % ds, cd, cd + ds - iis, n) mapper = OrderedDict() for i in g: # Update `indicess` to use `xs0`, `xs1`, ... mds = [] for indices in i.indicess: v = indices[ridx] try: md = mapper[v] except KeyError: name = sregistry.make_name(prefix='%sr' % d.name) md = mapper.setdefault(v, ModuloDimension(name, ds, v, n)) mds.append(md) indicess = [ indices[:ridx] + [md] + indices[ridx + 1:] for md, indices in zip(mds, i.indicess) ] # Update `writeto` by switching `d` to `dsi` intervals = k.intervals.switch(d, dsi).zero(dsi) sub_iterators = dict(k.sub_iterators) sub_iterators[d] = dsi writeto = IterationSpace(intervals, sub_iterators) # Transform `alias` by adding `i` alias = i.alias.xreplace({d: d + cd}) # Extend `ispace` to iterate over rotations d1 = writeto[ridx + 1].dim # Note: we're by construction in-bounds here intervals = IntervalGroup(Interval(cd, 0, 0), relations={(d, cd, d1)}) rispace = IterationSpace(intervals, {cd: dsi}, {cd: Forward}) aispace = i.ispace.zero(d) aispace = aispace.augment({d: mds + [ii]}) ispace = IterationSpace.union(rispace, aispace) processed.append( ScheduledAlias(alias, writeto, ispace, i.aliaseds, indicess)) # Update the rotations mapper rmapper[d].extend(list(mapper.values())) return Schedule(*processed, dmapper=schedule.dmapper, rmapper=rmapper)
def lower_aliases(cluster, aliases, in_writeto, maxpar): """ Create a Schedule from an AliasMapper. """ dmapper = {} processed = [] for alias, v in aliases.items(): imapper = { **{i.dim: i for i in v.intervals}, **{ i.dim.parent: i for i in v.intervals if i.dim.is_NonlinearDerived } } intervals = [] writeto = [] sub_iterators = {} indicess = [[] for _ in v.distances] for i in cluster.ispace.intervals: try: interval = imapper[i.dim] except KeyError: # E.g., `x0_blk0` or (`a[y_m+1]` => `y not in imapper`) intervals.append(i) continue assert i.stamp >= interval.stamp if not (writeto or interval != interval.zero() or in_writeto(i.dim, cluster)): # The alias doesn't require a temporary Dimension along i.dim intervals.append(i) continue assert not i.dim.is_NonlinearDerived # `i.dim` is necessarily part of the write-to region, so # we have to adjust the Interval's stamp. For example, consider # `i=x[0,0]<1>` and `interval=x[-4,4]<0>`; here we need to # use `<1>` as stamp, which is what appears in `cluster` interval = interval.lift(i.stamp) # We further bump the interval stamp if we were requested to trade # fusion for more collapse-parallelism interval = interval.lift(interval.stamp + int(maxpar)) writeto.append(interval) intervals.append(interval) if i.dim.is_Incr: # Suitable IncrDimensions must be used to avoid OOB accesses. # E.g., r[xs][ys][z] => both `xs` and `ys` must be initialized such # that all accesses are within bounds. This requires traversing the # hierarchy of IncrDimensions to set `xs` (`ys`) in a way that # consecutive blocks access consecutive regions in `r` (e.g., # `xs=x0_blk1-x0_blk0` with `blocklevels=2`; `xs=0` with # `blocklevels=1`, that is it degenerates in this case) try: d = dmapper[i.dim] except KeyError: dd = i.dim.parent assert dd.is_Incr if dd.parent.is_Incr: # An IncrDimension in between IncrDimensions m = i.dim.symbolic_min - i.dim.parent.symbolic_min else: m = 0 d = dmapper[i.dim] = IncrDimension("%ss" % i.dim.name, i.dim, m, dd.symbolic_size, 1, dd.step) sub_iterators[i.dim] = d else: d = i.dim # Given the iteration `interval`, lower distances to indices for distance, indices in zip(v.distances, indicess): indices.append(d - interval.lower + distance[interval.dim]) # The alias write-to space writeto = IterationSpace(IntervalGroup(writeto), sub_iterators) # The alias iteration space intervals = IntervalGroup(intervals, cluster.ispace.relations) ispace = IterationSpace(intervals, cluster.sub_iterators, cluster.directions) ispace = ispace.augment(sub_iterators) processed.append( ScheduledAlias(alias, writeto, ispace, v.aliaseds, indicess)) # The [ScheduledAliases] must be ordered so as to reuse as many of the # `cluster`'s IterationIntervals as possible in order to honor the # write-to region. Another fundamental reason for ordering is to ensure # deterministic code generation processed = sorted(processed, key=lambda i: cit(cluster.ispace, i.ispace)) return Schedule(*processed, dmapper=dmapper)
def _eliminate_inter_stencil_redundancies(self, cluster, template, **kwargs): """ Search for redundancies across the expressions and expose them to the later stages of the optimisation pipeline by introducing new temporaries of suitable rank. Two type of redundancies are sought: * Time-invariants, and * Across different space points Examples ======== Let ``t`` be the time dimension, ``x, y, z`` the space dimensions. Then: 1) temp = (a[x,y,z]+b[x,y,z])*c[t,x,y,z] >>> ti[x,y,z] = a[x,y,z] + b[x,y,z] temp = ti[x,y,z]*c[t,x,y,z] 2) temp1 = 2.0*a[x,y,z]*b[x,y,z] temp2 = 3.0*a[x,y,z+1]*b[x,y,z+1] >>> ti[x,y,z] = a[x,y,z]*b[x,y,z] temp1 = 2.0*ti[x,y,z] temp2 = 3.0*ti[x,y,z+1] """ if cluster.is_sparse: return cluster # For more information about "aliases", refer to collect.__doc__ mapper, aliases = collect(cluster.exprs) # Redundancies will be stored in space-varying temporaries g = cluster.trace indices = g.space_indices time_invariants = {v.rhs: g.time_invariant(v) for v in g.values()} # Find the candidate expressions processed = [] candidates = OrderedDict() for k, v in g.items(): # Cost check (to keep the memory footprint under control) naliases = len(mapper.get(v.rhs, [])) cost = estimate_cost(v, True) * naliases if cost >= self.thresholds['min-cost-alias'] and\ (naliases > 1 or time_invariants[v.rhs]): candidates[v.rhs] = k else: processed.append(v) # Create alias Clusters and all necessary substitution rules # for the new temporaries alias_clusters = ClusterGroup() rules = OrderedDict() for origin, alias in aliases.items(): if all(i not in candidates for i in alias.aliased): continue # Construct an iteration space suitable for /alias/ intervals, sub_iterators, directions = cluster.ispace.args intervals = [ Interval(i.dim, *alias.relaxed_diameter.get(i.dim, i.limits)) for i in cluster.ispace.intervals ] ispace = IterationSpace(intervals, sub_iterators, directions) # Optimization: perhaps we can lift the cluster outside the time dimension if all(time_invariants[i] for i in alias.aliased): ispace = ispace.project(lambda i: not i.is_Time) # Build a symbolic function for /alias/ intervals = ispace.intervals halo = [(abs(intervals[i].lower), abs(intervals[i].upper)) for i in indices] function = Array(name=template(), dimensions=indices, halo=halo) access = tuple(i - intervals[i].lower for i in indices) expression = Eq(Indexed(function.indexed, *access), origin) # Construct a data space suitable for /alias/ mapper = detect_accesses(expression) parts = { k: IntervalGroup(build_intervals(v)).add(intervals) for k, v in mapper.items() if k } dspace = DataSpace([i.zero() for i in intervals], parts) # Create a new Cluster for /alias/ alias_clusters.append(Cluster([expression], ispace, dspace)) # Add substitution rules for aliased, distance in alias.with_distance: access = [ i - intervals[i].lower + j for i, j in distance if i in indices ] temporary = Indexed(function.indexed, *tuple(access)) rules[candidates[aliased]] = temporary rules[aliased] = temporary # Group clusters together if possible alias_clusters = groupby(alias_clusters).finalize() alias_clusters.sort(key=lambda i: i.is_dense) # Switch temporaries in the expression trees processed = [e.xreplace(rules) for e in processed] return alias_clusters + [cluster.rebuild(processed)]
def _eliminate_inter_stencil_redundancies(self, cluster, template, **kwargs): """ Search for redundancies across the expressions and expose them to the later stages of the optimisation pipeline by introducing new temporaries of suitable rank. Two type of redundancies are sought: * Time-invariants, and * Across different space points Examples ======== Let ``t`` be the time dimension, ``x, y, z`` the space dimensions. Then: 1) temp = (a[x,y,z]+b[x,y,z])*c[t,x,y,z] >>> ti[x,y,z] = a[x,y,z] + b[x,y,z] temp = ti[x,y,z]*c[t,x,y,z] 2) temp1 = 2.0*a[x,y,z]*b[x,y,z] temp2 = 3.0*a[x,y,z+1]*b[x,y,z+1] >>> ti[x,y,z] = a[x,y,z]*b[x,y,z] temp1 = 2.0*ti[x,y,z] temp2 = 3.0*ti[x,y,z+1] """ if cluster.is_sparse: return cluster # For more information about "aliases", refer to collect.__doc__ mapper, aliases = collect(cluster.exprs) # Redundancies will be stored in space-varying temporaries g = cluster.trace indices = g.space_indices time_invariants = {v.rhs: g.time_invariant(v) for v in g.values()} # Find the candidate expressions processed = [] candidates = OrderedDict() for k, v in g.items(): # Cost check (to keep the memory footprint under control) naliases = len(mapper.get(v.rhs, [])) cost = estimate_cost(v, True)*naliases if cost >= self.MIN_COST_ALIAS and (naliases > 1 or time_invariants[v.rhs]): candidates[v.rhs] = k else: processed.append(v) # Create alias Clusters and all necessary substitution rules # for the new temporaries alias_clusters = ClusterGroup() rules = OrderedDict() for origin, alias in aliases.items(): if all(i not in candidates for i in alias.aliased): continue # Construct an iteration space suitable for /alias/ intervals, sub_iterators, directions = cluster.ispace.args intervals = [Interval(i.dim, *alias.relaxed_diameter.get(i.dim, i.limits)) for i in cluster.ispace.intervals] ispace = IterationSpace(intervals, sub_iterators, directions) # Optimization: perhaps we can lift the cluster outside the time dimension if all(time_invariants[i] for i in alias.aliased): ispace = ispace.project(lambda i: not i.is_Time) # Build a symbolic function for /alias/ intervals = ispace.intervals halo = [(abs(intervals[i].lower), abs(intervals[i].upper)) for i in indices] function = Array(name=template(), dimensions=indices, halo=halo) access = tuple(i - intervals[i].lower for i in indices) expression = Eq(function[access], origin) # Construct a data space suitable for /alias/ mapper = detect_accesses(expression) parts = {k: IntervalGroup(build_intervals(v)).add(intervals) for k, v in mapper.items() if k} dspace = DataSpace([i.zero() for i in intervals], parts) # Create a new Cluster for /alias/ alias_clusters.append(Cluster([expression], ispace, dspace)) # Add substitution rules for aliased, distance in alias.with_distance: access = [i - intervals[i].lower + j for i, j in distance if i in indices] rules[candidates[aliased]] = function[access] rules[aliased] = function[access] # Group clusters together if possible alias_clusters = groupby(alias_clusters).finalize() alias_clusters.sort(key=lambda i: i.is_dense) # Switch temporaries in the expression trees processed = [e.xreplace(rules) for e in processed] return alias_clusters + [cluster.rebuild(processed)]
def iter(self, cluster, max_par): """ The aliases can legally be scheduled in many different orders, but we privilege the one that minimizes storage while maximizing fusion. """ items = [] for alias, (intervals, aliaseds, distances) in self.items(): mapper = {i.dim: i for i in intervals} mapper.update({ i.dim.parent: i for i in intervals if i.dim.is_NonlinearDerived }) # Becomes True as soon as a Dimension in `ispace` is found to # be independent of `intervals` flag = False iteron = [] writeto = [] for i in cluster.ispace.intervals: try: interval = mapper[i.dim] except KeyError: if not any(i.dim in d._defines for d in mapper): # E.g., `t[0,0]<0>` in the case of t-invariant aliases, # whereas if `i.dim` is `x0_blk0` in `x0_blk0[0,0]<0>` then # we would not enter here flag = True iteron.append(i) continue assert i.stamp >= interval.stamp # Does `i.dim` actually need to be a write-to Dimension ? if flag or interval != interval.zero(): # Yes, so we also have to adjust the Interval's stamp. # E.g., `i=x[0,0]<1>` and `interval=x[-4,4]<0>`. We need to # use `<1>` which is the actual stamp used in `cluster` interval = interval.lift(i.stamp) iteron.append(interval) writeto.append(interval) flag = True elif max_par and PARALLEL in cluster.properties[i.dim]: # Not necessarily, but with `max_par` the user is # expressing the wish to trade-off storage for parallelism interval = interval.lift(i.stamp + 1) iteron.append(interval) writeto.append(interval) flag = True else: iteron.append(i) if writeto: writeto = IntervalGroup(writeto, cluster.ispace.relations) else: # E.g., an `alias` having 0-distance along all Dimensions writeto = IntervalGroup(intervals, cluster.ispace.relations) # Construct the IterationSpace within which the alias will be computed ispace = IterationSpace( IntervalGroup(iteron, cluster.ispace.relations), cluster.sub_iterators, cluster.directions) ispace = ispace.augment(self.index_mapper) items.append((alias, writeto, ispace, aliaseds, distances)) queue = list(items) while queue: # Shortest write-to region first item = min(queue, key=lambda i: len(i[1])) queue.remove(item) yield item
def make_schedule(cluster, aliases, in_writeto, options): """ Create a Schedule from an AliasMapper. """ max_par = options['cire-maxpar'] dmapper = {} processed = [] for alias, v in aliases.items(): imapper = {**{i.dim: i for i in v.intervals}, **{i.dim.parent: i for i in v.intervals if i.dim.is_NonlinearDerived}} intervals = [] writeto = [] sub_iterators = {} indicess = [[] for _ in v.distances] for i in cluster.ispace.intervals: try: interval = imapper[i.dim] except KeyError: # E.g., `x0_blk0` or (`a[y_m+1]` => `y not in imapper`) intervals.append(i) continue assert i.stamp >= interval.stamp if not (writeto or interval != interval.zero() or in_writeto(i.dim, cluster)): # The alias doesn't require a temporary Dimension along i.dim intervals.append(i) continue assert not i.dim.is_NonlinearDerived # `i.dim` is necessarily part of the write-to region, so # we have to adjust the Interval's stamp. For example, consider # `i=x[0,0]<1>` and `interval=x[-4,4]<0>`; here we need to # use `<1>` as stamp, which is what appears in `cluster` interval = interval.lift(i.stamp) # We further bump the interval stamp if we were requested to trade # fusion for more collapse-parallelism interval = interval.lift(interval.stamp + int(max_par)) writeto.append(interval) intervals.append(interval) if i.dim.is_Incr: # Suitable ShiftedDimensions must be used to avoid OOB accesses. # E.g., r[xs][ys][z] => both `xs` and `ys` must start at 0, # not at `x0_blk0` try: d = dmapper[i.dim] except KeyError: d = dmapper[i.dim] = ShiftedDimension(i.dim, name="%ss" % i.dim.name) sub_iterators[i.dim] = d else: d = i.dim # Given the iteration `interval`, lower distances to indices for distance, indices in zip(v.distances, indicess): indices.append(d - interval.lower + distance[interval.dim]) # The alias write-to space writeto = IterationSpace(IntervalGroup(writeto), sub_iterators) # The alias iteration space intervals = IntervalGroup(intervals, cluster.ispace.relations) ispace = IterationSpace(intervals, cluster.sub_iterators, cluster.directions) ispace = ispace.augment(sub_iterators) processed.append(ScheduledAlias(alias, writeto, ispace, v.aliaseds, indicess)) # Sort by write-to region for deterministic code generation processed = sorted(processed, key=lambda i: i.writeto) return Schedule(*processed, dmapper=dmapper)
def process(candidates, aliases, cluster, template): """ Create Clusters from aliasing expressions. """ clusters = [] subs = {} for origin, alias in aliases.items(): if all(i not in candidates for i in alias.aliased): continue # The write-to Intervals writeto = [ Interval(i.dim, *alias.relaxed_diameter.get(i.dim, (0, 0))) for i in cluster.ispace.intervals if not i.dim.is_Time ] writeto = IntervalGroup(writeto) # Optimization: no need to retain a SpaceDimension if it does not # induce a flow/anti dependence (below, `i.offsets` captures this, by # telling how much halo will be required to honour such dependences) dep_inducing = [i for i in writeto if any(i.offsets)] try: index = writeto.index(dep_inducing[0]) writeto = IntervalGroup(writeto[index:]) except IndexError: perf_adv("Could not optimize some of the detected redundancies") # Create a temporary to store `alias` dimensions = [d.root for d in writeto.dimensions] halo = [(abs(i.lower), abs(i.upper)) for i in writeto] array = Array(name=template(), dimensions=dimensions, halo=halo, dtype=cluster.dtype) # Build up the expression evaluating `alias` access = tuple(i.dim - i.lower for i in writeto) expression = Eq(array[access], origin.xreplace(subs)) # Create the substitution rules so that we can use the newly created # temporary in place of the aliasing expressions for aliased, distance in alias.with_distance: assert all(i.dim in distance.labels for i in writeto) access = [i.dim - i.lower + distance[i.dim] for i in writeto] if aliased in candidates: # It would *not* be in `candidates` if part of a composite alias subs[candidates[aliased]] = array[access] subs[aliased] = array[access] # Construct the `alias` IterationSpace intervals, sub_iterators, directions = cluster.ispace.args ispace = IterationSpace(intervals.add(writeto), sub_iterators, directions) # Optimize the `alias` IterationSpace: if possible, the innermost # IterationInterval is rounded up to a multiple of the vector length try: it = ispace.itintervals[-1] if ROUNDABLE in cluster.properties[it.dim]: from devito.parameters import configuration vl = configuration['platform'].simd_items_per_reg( cluster.dtype) ispace = ispace.add(Interval(it.dim, 0, it.interval.size % vl)) except (TypeError, KeyError): pass # Construct the `alias` DataSpace mapper = detect_accesses(expression) parts = { k: IntervalGroup(build_intervals(v)).add(ispace.intervals) for k, v in mapper.items() if k } dspace = DataSpace(cluster.dspace.intervals, parts) # Create a new Cluster for `alias` clusters.append( cluster.rebuild(exprs=[expression], ispace=ispace, dspace=dspace)) return clusters, subs
def _eliminate_inter_stencil_redundancies(self, cluster, template, **kwargs): """ Search for redundancies across the expressions and expose them to the later stages of the optimisation pipeline by introducing new temporaries of suitable rank. Two type of redundancies are sought: * Time-invariants, and * Across different space points Examples ======== Let ``t`` be the time dimension, ``x, y, z`` the space dimensions. Then: 1) temp = (a[x,y,z]+b[x,y,z])*c[t,x,y,z] >>> ti[x,y,z] = a[x,y,z] + b[x,y,z] temp = ti[x,y,z]*c[t,x,y,z] 2) temp1 = 2.0*a[x,y,z]*b[x,y,z] temp2 = 3.0*a[x,y,z+1]*b[x,y,z+1] >>> ti[x,y,z] = a[x,y,z]*b[x,y,z] temp1 = 2.0*ti[x,y,z] temp2 = 3.0*ti[x,y,z+1] """ if cluster.is_sparse: return cluster # For more information about "aliases", refer to collect.__doc__ mapper, aliases = collect(cluster.exprs) # Redundancies will be stored in space-varying temporaries g = cluster.trace indices = g.space_indices time_invariants = {v.rhs: g.time_invariant(v) for v in g.values()} # Template for captured redundancies shape = tuple(i.symbolic_extent for i in indices) make = lambda i: Array( name=template(i), shape=shape, dimensions=indices).indexed # Find the candidate expressions processed = [] candidates = OrderedDict() for k, v in g.items(): # Cost check (to keep the memory footprint under control) naliases = len(mapper.get(v.rhs, [])) cost = estimate_cost(v, True) * naliases if cost >= self.thresholds['min-cost-alias'] and\ (naliases > 1 or time_invariants[v.rhs]): candidates[v.rhs] = k else: processed.append(Eq(k, v.rhs)) # Create alias Clusters and all necessary substitution rules # for the new temporaries alias_clusters = ClusterGroup() rules = OrderedDict() for c, (origin, alias) in enumerate(aliases.items()): if all(i not in candidates for i in alias.aliased): continue function = make(c) # Build new Cluster expression = Eq(Indexed(function, *indices), origin) intervals, sub_iterators, directions = cluster.ispace.args # Adjust intervals intervals = intervals.subtract( alias.anti_stencil.boxify().negate()) if all(time_invariants[i] for i in alias.aliased): intervals = intervals.drop( [i for i in intervals.dimensions if i.is_Time]) ispace = IterationSpace(intervals, sub_iterators, directions) alias_clusters.append(Cluster([expression], ispace)) # Update substitution rules for aliased, distance in alias.with_distance: coordinates = [ sum([i, j]) for i, j in distance.items() if i in indices ] temporary = Indexed(function, *tuple(coordinates)) rules[candidates[aliased]] = temporary rules[aliased] = temporary alias_clusters = groupby(alias_clusters).finalize() alias_clusters.sort(key=lambda i: i.is_dense) # Switch temporaries in the expression trees processed = [e.xreplace(rules) for e in processed] return alias_clusters + [cluster.rebuild(processed)]