def offset_from_centre(d, indices): if d in indices: p = d offset = d - min(indices) assert is_integer(offset) elif len(indices) == 1: p = indices[0] offset = 0 else: # E.g., `time/factor-1` and `time/factor+1` present among the # indices in `index_mapper`, but not `time/factor`. We reconstruct # `time/factor` -- the starting pointing at time_m or time_M assert len(indices) > 0 v = indices[0] try: p = sum(v.args[1:]) if not ((p - v).is_Integer or (p - v).is_Symbol): raise ValueError except (IndexError, ValueError): raise NotImplementedError("Cannot apply buffering with nonlinear " "index functions (found `%s`)" % v) try: # Start assuming e.g. `list(m) = [time - 1, time + 2]` offset = p - min(indices) except TypeError: # Actually, e.g. `list(m) = [time/factor - 1, time/factor + 2]` offset = p - vmin(*[Vector(i) for i in indices])[0] return p, offset
def lastmap(self): """ A mapper from contracted Dimensions to a 2-tuple of indices representing, respectively, the "last" write to the buffer and the "last" read from the buffered Function. For example, `{time: (sb1, time+1)}`. """ mapper = {} for d, m in self.index_mapper.items(): try: func = max if self.written.directions[d.root] is Forward else min v = func(m) except TypeError: func = vmax if self.written.directions[d.root] is Forward else vmin v = func(*[Vector(i) for i in m])[0] mapper[d] = Map(m[v], v) return mapper
def actions_from_update_memcpy(cluster, clusters, prefix, actions): it = prefix[-1] d = it.dim direction = it.direction # Prepare the data to instantiate a PrefetchUpdate SyncOp e = cluster.exprs[0] size = 1 function = e.rhs.function fetch = e.rhs.indices[d] ifetch = fetch.subs(d, d.symbolic_min) fcond = None if direction is Forward: pfetch = fetch + 1 else: pfetch = fetch - 1 pcond = None target = e.lhs.function tstore0 = e.lhs.indices[d] # If fetching into e.g., `ub[sb1]`, we'll need to prefetch into e.g. `ub[sb0]` if is_integer(tstore0): tstore = tstore0 else: assert tstore0.is_Modulo subiters = [ md for md in cluster.sub_iterators[d] if md.parent is tstore0.parent ] osubiters = sorted(subiters, key=lambda i: Vector(i.offset)) n = osubiters.index(tstore0) if direction is Forward: tstore = osubiters[(n + 1) % len(osubiters)] else: tstore = osubiters[(n - 1) % len(osubiters)] # Turn `cluster` into a prefetch Cluster expr = uxreplace(e, {tstore0: tstore, fetch: pfetch}) guards = { d: And( cluster.guards.get(d, True), GuardBoundNext(function.indices[d], direction), ) } syncs = { d: [ PrefetchUpdate(d, size, function, fetch, ifetch, fcond, pfetch, pcond, target, tstore) ] } pcluster = cluster.rebuild(exprs=expr, guards=guards, syncs=syncs) # Since we're turning `e` into a prefetch, we need to: # 1) attach a WaitPrefetch SyncOp to the first Cluster accessing `target` # 2) insert the prefetch Cluster right after the last Cluster accessing `target` # 3) drop the original Cluster performing a memcpy-based fetch n = clusters.index(cluster) first = None last = None for c in clusters[n + 1:]: if target in c.scope.reads: if first is None: first = c last = c assert first is not None assert last is not None actions[first].syncs[d].append( WaitPrefetch(d, size, function, fetch, ifetch, fcond, pfetch, pcond, target, tstore)) actions[last].insert.append(pcluster) actions[cluster].drop = True return last, pcluster
def __init__(self, function, contracted_dims, accessv, options, sregistry, bds=None, mds=None): # Parse compilation options async_degree = options['buf-async-degree'] space = options['buf-mem-space'] dtype = options['buf-dtype'](function) self.function = function self.accessv = accessv self.contraction_mapper = {} self.index_mapper = defaultdict(dict) self.sub_iterators = defaultdict(list) self.subdims_mapper = DefaultOrderedDict(set) # Create the necessary ModuloDimensions for indexing into the buffer # E.g., `u[time,x] + u[time+1,x] -> `ub[sb0,x] + ub[sb1,x]`, where `sb0` # and `sb1` are ModuloDimensions starting at `time` and `time+1` respectively dims = list(function.dimensions) for d in contracted_dims: assert d in function.dimensions # Determine the buffer size, and therefore the span of the ModuloDimension, # along the contracting Dimension `d` indices = filter_ordered(i.indices[d] for i in accessv.accesses) slots = [i.subs({d: 0, d.spacing: 1}) for i in indices] try: size = max(slots) - min(slots) + 1 except TypeError: # E.g., special case `slots=[-1 + time/factor, 2 + time/factor]` # Resort to the fast vector-based comparison machinery (rather than # the slower sympy.simplify) slots = [Vector(i) for i in slots] size = int((vmax(*slots) - vmin(*slots) + 1)[0]) if async_degree is not None: if async_degree < size: warning("Ignoring provided asynchronous degree as it'd be " "too small for the required buffer (provided %d, " "but need at least %d for `%s`)" % (async_degree, size, function.name)) else: size = async_degree # Replace `d` with a suitable CustomDimension `bd` name = sregistry.make_name(prefix='db') bd = bds.setdefault((d, size), CustomDimension(name, 0, size-1, size, d)) self.contraction_mapper[d] = dims[dims.index(d)] = bd # Finally create the ModuloDimensions as children of `bd` if size > 1: # Note: indices are sorted so that the semantic order (sb0, sb1, sb2) # follows SymPy's index ordering (time, time-1, time+1) after modulo # replacement, so that associativity errors are consistent. This very # same strategy is also applied in clusters/algorithms/Stepper p, _ = offset_from_centre(d, indices) indices = sorted(indices, key=lambda i: -np.inf if i - p == 0 else (i - p)) for i in indices: name = sregistry.make_name(prefix='sb') md = mds.setdefault((bd, i), ModuloDimension(name, bd, i, size)) self.index_mapper[d][i] = md self.sub_iterators[d.root].append(md) else: assert len(indices) == 1 self.index_mapper[d][indices[0]] = 0 # Track the SubDimensions used to index into `function` for e in accessv.mapper: m = {i.root: i for i in e.free_symbols if isinstance(i, Dimension) and (i.is_Sub or not i.is_Derived)} for d, v in m.items(): self.subdims_mapper[d].add(v) if any(len(v) > 1 for v in self.subdims_mapper.values()): # Non-uniform SubDimensions. At this point we're going to raise # an exception. It's either illegal or still unsupported for v in self.subdims_mapper.values(): for d0, d1 in combinations(v, 2): if d0.overlap(d1): raise InvalidOperator("Cannot apply `buffering` to `%s` as it " "is accessed over the overlapping " " SubDimensions `<%s, %s>`" % (function, d0, d1)) raise NotImplementedError("`buffering` does not support multiple " "non-overlapping SubDimensions yet.") else: self.subdims_mapper = {d: v.pop() for d, v in self.subdims_mapper.items()} # Build and sanity-check the buffer IterationIntervals self.itintervals_mapper = {} for e in accessv.mapper: for i in e.ispace.itintervals: v = self.itintervals_mapper.setdefault(i.dim, i.args) if v != self.itintervals_mapper[i.dim]: raise NotImplementedError("Cannot apply `buffering` as the buffered " "function `%s` is accessed over multiple, " "non-compatible iteration spaces along the " "Dimension `%s`" % (function.name, i.dim)) # Also add IterationIntervals for initialization along `x`, should `xi` be # the only written Dimension in the `x` hierarchy for d, (interval, _, _) in list(self.itintervals_mapper.items()): for i in d._defines: self.itintervals_mapper.setdefault(i, (interval.relaxed, (), Forward)) # Finally create the actual buffer self.buffer = Array(name=sregistry.make_name(prefix='%sb' % function.name), dimensions=dims, dtype=dtype, halo=function.halo, space=space)