def _compact_temporaries(exprs): """ Drop temporaries consisting of isolated symbols. """ # First of all, convert to SSA exprs = makeit_ssa(exprs) # What's gonna be dropped mapper = {e.lhs: e.rhs for e in exprs if e.lhs.is_Symbol and (q_leaf(e.rhs) or e.rhs.is_Function)} processed = [] for e in exprs: if e.lhs not in mapper: # The temporary is retained, and substitutions may be applied expr = e while True: handle = uxreplace(expr, mapper) if handle == expr: break else: expr = handle processed.append(handle) return processed
def _compact_temporaries(exprs): """ Drop temporaries consisting of isolated symbols. """ # First of all, convert to SSA exprs = makeit_ssa(exprs) # Drop candidates are all exprs in the form `t0 = s` where `s` is a symbol # Note: only CSE-captured Temps, which are by construction local objects, may # safely be compacted; a generic Symbol could instead be accessed in a subsequent # Cluster, for example: `for (i = ...) { a = b; for (j = a ...) ...` mapper = { e.lhs: e.rhs for e in exprs if isinstance(e.lhs, Temp) and (q_leaf(e.rhs) or e.rhs.is_Function) } processed = [] for e in exprs: if e.lhs not in mapper: # The temporary is retained, and substitutions may be applied expr = e while True: handle = uxreplace(expr, mapper) if handle == expr: break else: expr = handle processed.append(handle) return processed
def _extract_sum_of_products(self, cluster, template, **kwargs): """ Extract sub-expressions in sum-of-product form, and assign them to temporaries. """ make = lambda: Scalar(name=template(), dtype=cluster.dtype).indexify() rule = q_sum_of_product costmodel = lambda e: not (q_leaf(e) or q_terminalop(e)) processed, _ = yreplace(cluster.exprs, make, rule, costmodel) return cluster.rebuild(processed)
def _extract_sum_of_products(self, cluster, template, **kwargs): """ Extract sub-expressions in sum-of-product form, and assign them to temporaries. """ make = lambda: Scalar(name=template(), dtype=cluster.dtype).indexify() rule = q_sum_of_product costmodel = lambda e: not (q_leaf(e) or q_terminalop(e)) processed, _ = xreplace_constrained(cluster.exprs, make, rule, costmodel) return cluster.rebuild(processed)
def callbacks_sops(context, n): # The `depth` determines "how big" the extracted sum-of-products will be. # We observe that in typical FD codes: # add(mul, mul, ...) -> stems from first order derivative # add(mul(add(mul, mul, ...), ...), ...) -> stems from second order derivative # To catch the former, we would need `depth=1`; for the latter, `depth=3` depth = 2 * n + 1 extractor = lambda e: q_sum_of_product(e, depth) model = lambda e: not (q_leaf(e) or q_terminalop(e, depth - 1)) ignore_collected = lambda g: len(g) <= 1 selector = lambda c, n: c >= MIN_COST_ALIAS and n > 1 return extractor, model, ignore_collected, selector
def potential_max_deriv_order(exprs): """ The maximum FD derivative order in a list of expressions. """ # NOTE: e might propagate the Derivative(...) information down from the # symbolic language, but users may do crazy things and write their own custom # expansions "by hand" (i.e., not resorting to Derivative(...)), hence instead # of looking for Derivative(...) we use the following heuristic: # add(mul, mul, ...) -> stems from first order derivative # add(mul(add(mul, mul, ...), ...), ...) -> stems from second order derivative # ... nadds = lambda e: (int(e.is_Add) + max([nadds(a) for a in e.args], default=0) if not q_leaf(e) else 0) return max([nadds(e) for e in exprs], default=0)
def search_potential_deriv(expr, n, c=0): """ Retrieve the expressions at depth `n` that potentially stem from FD derivatives. """ assert n >= c >= 0 if q_leaf(expr) or expr.is_Pow: return [] elif expr.is_Mul: if c == n: return [expr] else: return flatten([search_potential_deriv(a, n, c+1) for a in expr.args]) else: return flatten([search_potential_deriv(a, n, c) for a in expr.args])
def _doit(expr): try: if q_function(expr) or q_leaf(expr): # Do not waste time return _doit_handle(expr, []) except AttributeError: # E.g., `Injection` return _doit_handle(expr, []) args = [] terms = [] for a in expr.args: ax, term = _doit(a) args.append(ax) terms.append(term) expr = expr.func(*args, evaluate=False) return _doit_handle(expr, terms)
def _compact_temporaries(exprs): """ Drop temporaries consisting of isolated symbols. """ # First of all, convert to SSA exprs = makeit_ssa(exprs) # What's gonna be dropped mapper = {e.lhs: e.rhs for e in exprs if e.lhs.is_Symbol and (q_leaf(e.rhs) or e.rhs.is_Function)} processed = [] for e in exprs: if e.lhs not in mapper: # The temporary is retained, and substitutions may be applied handle, _ = yreplace(e, mapper, repeat=True) assert len(handle) == 1 processed.extend(handle) return processed
def compact_temporaries(temporaries, leaves): """Drop temporaries consisting of isolated symbols.""" exprs = temporaries + leaves targets = {i.lhs for i in leaves} graph = FlowGraph(exprs) mapper = {k: v.rhs for k, v in graph.items() if v.is_Scalar and (q_leaf(v.rhs) or v.rhs.is_Function) and not v.readby.issubset(targets)} processed = [] for k, v in graph.items(): if k not in mapper: # The temporary /v/ is retained, and substitutions may be applied handle, _ = xreplace_constrained(v, mapper, repeat=True) assert len(handle) == 1 processed.extend(handle) return processed
def compact_temporaries(temporaries, leaves): """Drop temporaries consisting of single symbols.""" exprs = temporaries + leaves targets = {i.lhs for i in leaves} g = FlowGraph(exprs) mapper = {k: v.rhs for k, v in g.items() if v.is_Scalar and (q_leaf(v.rhs) or v.rhs.is_Function) and not v.readby.issubset(targets)} processed = [] for k, v in g.items(): if k not in mapper: # The temporary /v/ is retained, and substitutions may be applied handle, _ = xreplace_constrained(v, mapper, repeat=True) assert len(handle) == 1 processed.extend(handle) return processed
def cire(cluster, template, mode, options, platform): """ Cross-iteration redundancies elimination. Parameters ---------- cluster : Cluster Input Cluster, subject of the optimization pass. template : callable To build the symbols (temporaries) storing the redundant expressions. mode : str The transformation mode. Accepted: ['invariants', 'sops']. * 'invariants' is for sub-expressions that are invariant w.r.t. one or more Dimensions. * 'sops' stands for sums-of-products, that is redundancies are searched across all expressions in sum-of-product form. options : dict The optimization mode. Accepted: ['min-storage']. * 'min-storage': if True, the pass will try to minimize the amount of storage introduced for the tensor temporaries. This might also reduce the operation count. On the other hand, this might affect fusion and therefore data locality. Defaults to False (legacy). platform : Platform The underlying platform. Used to optimize the shape of the introduced tensor symbols. Examples -------- 1) 'invariants'. Below is an expensive sub-expression invariant w.r.t. `t` t0 = (cos(a[x,y,z])*sin(b[x,y,z]))*c[t,x,y,z] becomes t1[x,y,z] = cos(a[x,y,z])*sin(b[x,y,z]) t0 = t1[x,y,z]*c[t,x,y,z] 2) 'sops'. Below are redundant sub-expressions in sum-of-product form (in this case, the sum degenerates to a single product). t0 = 2.0*a[x,y,z]*b[x,y,z] t1 = 3.0*a[x,y,z+1]*b[x,y,z+1] becomes t2[x,y,z] = a[x,y,z]*b[x,y,z] t0 = 2.0*t2[x,y,z] t1 = 3.0*t2[x,y,z+1] """ # Sanity checks assert mode in ['invariants', 'sops'] assert all(i > 0 for i in options['cire-repeats'].values()) # Relevant options min_storage = options['min-storage'] # Setup callbacks if mode == 'invariants': # Extraction rule def extractor(context): is_time_invariant = make_is_time_invariant(context) return lambda e: is_time_invariant(e) # Extraction model model = lambda e: estimate_cost(e, True) >= MIN_COST_ALIAS_INV # Selection rule selector = lambda c, n: c >= MIN_COST_ALIAS_INV and n >= 1 elif mode == 'sops': # Extraction rule def extractor(context): return lambda e: q_sum_of_product(e) # Extraction model model = lambda e: not (q_leaf(e) or q_terminalop(e)) # Selection rule selector = lambda c, n: c >= MIN_COST_ALIAS and n > 1 # Actual CIRE processed = [] context = cluster.exprs for _ in range(options['cire-repeats'][mode]): # Extract potentially aliasing expressions exprs, extracted = extract(cluster, extractor(context), model, template) if not extracted: # Do not waste time break # Search aliasing expressions aliases = collect(extracted, min_storage) # Rule out aliasing expressions with a bad flops/memory trade-off chosen, others = choose(exprs, aliases, selector) if not chosen: # Do not waste time break # Create Aliases and assign them to Clusters clusters, subs = process(cluster, chosen, aliases, template, platform) # Rebuild `cluster` so as to use the newly created Aliases rebuilt = rebuild(cluster, others, aliases, subs) # Prepare for the next round processed.extend(clusters) cluster = rebuilt context = flatten(c.exprs for c in processed) + list(cluster.exprs) processed.append(cluster) return processed