def common_subexprs_elimination(exprs, make, mode='default'): """ Perform common sub-expressions elimination, or CSE. Note: the output is guaranteed to be topologically sorted. Parameters ---------- exprs : expr-like or list of expr-like One or more expressions to which CSE is applied. make : callable Build symbols to store temporary, redundant values. mode : str, optional The CSE algorithm applied. Accepted: ['default']. """ # Note: not defaulting to SymPy's CSE() function for three reasons: # - it also captures array index access functions (eg, i+1 in A[i+1] and B[i+1]); # - it sometimes "captures too much", losing factorization opportunities; # - very slow # TODO: a second "sympy" mode will be provided, relying on SymPy's CSE() but # also ensuring some sort of post-processing assert mode == 'default' # Only supported mode ATM processed = list(exprs) mapped = [] while True: # Detect redundancies counted = count(mapped + processed, q_xop).items() targets = OrderedDict([(k, estimate_cost(k, True)) for k, v in counted if v > 1]) if not targets: break # Create temporaries hit = max(targets.values()) picked = [k for k, v in targets.items() if v == hit] mapper = OrderedDict([(e, make()) for i, e in enumerate(picked)]) # Apply replacements processed = [e.xreplace(mapper) for e in processed] mapped = [e.xreplace(mapper) for e in mapped] mapped = [DummyEq(v, k) for k, v in reversed(list(mapper.items()))] + mapped # Prepare for the next round for k in picked: targets.pop(k) processed = mapped + processed # At this point we may have useless temporaries (e.g., r0=r1). Let's drop them processed = _compact_temporaries(processed) # Perform topological sorting so that reads-after-writes are honored processed = _topological_sort(processed) return processed
def common_subexprs_elimination(exprs, make, mode='default'): """ Perform common sub-expressions elimination, or CSE. Note: the output is not guranteed to be topologically sorted. Parameters ---------- exprs : expr-like or list of expr-like One or more expressions to which CSE is applied. make : callable Build symbols to store temporary, redundant values. mode : str, optional The CSE algorithm applied. Accepted: ['default']. """ # Note: not defaulting to SymPy's CSE() function for three reasons: # - it also captures array index access functions (eg, i+1 in A[i+1] and B[i+1]); # - it sometimes "captures too much", losing factorization opportunities; # - very slow # TODO: a second "sympy" mode will be provided, relying on SymPy's CSE() but # also ensuring some sort of post-processing assert mode == 'default' # Only supported mode ATM processed = list(exprs) mapped = [] while True: # Detect redundancies counted = count(mapped + processed, q_op).items() targets = OrderedDict([(k, estimate_cost(k)) for k, v in counted if v > 1]) if not targets: break # Create temporaries hit = max(targets.values()) picked = [k for k, v in targets.items() if v == hit] mapper = OrderedDict([(e, make()) for i, e in enumerate(picked)]) # Apply replacements processed = [e.xreplace(mapper) for e in processed] mapped = [e.xreplace(mapper) for e in mapped] mapped = [Eq(v, k) for k, v in reversed(list(mapper.items()))] + mapped # Prepare for the next round for k in picked: targets.pop(k) processed = mapped + processed # Simply renumber the temporaries in ascending order mapper = {i.lhs: j.lhs for i, j in zip(mapped, reversed(mapped))} processed = [e.xreplace(mapper) for e in processed] return processed
def common_subexprs_elimination(exprs, make, mode='default'): """ Perform common subexpressions elimination. Note: the output is not guranteed to be topologically sorted. :param exprs: The target SymPy expression, or a collection of SymPy expressions. :param make: A function to construct symbols used for replacement. The function takes as input an integer ID; ID is computed internally and used as a unique identifier for the constructed symbols. """ # Note: not defaulting to SymPy's CSE() function for three reasons: # - it also captures array index access functions (eg, i+1 in A[i+1] and B[i+1]); # - it sometimes "captures too much", losing factorization opportunities; # - very slow # TODO: a second "sympy" mode will be provided, relying on SymPy's CSE() but # also ensuring some sort of post-processing assert mode == 'default' # Only supported mode ATM processed = list(exprs) mapped = [] while True: # Detect redundancies counted = count(mapped + processed, q_op).items() targets = OrderedDict([(k, estimate_cost(k)) for k, v in counted if v > 1]) if not targets: break # Create temporaries hit = max(targets.values()) picked = [k for k, v in targets.items() if v == hit] mapper = OrderedDict([(e, make(len(mapped) + i)) for i, e in enumerate(picked)]) # Apply repleacements processed = [e.xreplace(mapper) for e in processed] mapped = [e.xreplace(mapper) for e in mapped] mapped = [Eq(v, k) for k, v in reversed(list(mapper.items()))] + mapped # Prepare for the next round for k in picked: targets.pop(k) processed = mapped + processed # Simply renumber the temporaries in ascending order mapper = {i.lhs: j.lhs for i, j in zip(mapped, reversed(mapped))} processed = [e.xreplace(mapper) for e in processed] return processed
def _cse(maybe_exprs, make, mode='default'): """ Main common sub-expressions elimination routine. Note: the output is guaranteed to be topologically sorted. Parameters ---------- maybe_exprs : expr-like or list of expr-like or Cluster One or more expressions to which CSE is applied. make : callable Build symbols to store temporary, redundant values. mode : str, optional The CSE algorithm applied. Accepted: ['default']. """ # Note: not defaulting to SymPy's CSE() function for three reasons: # - it also captures array index access functions (eg, i+1 in A[i+1] and B[i+1]); # - it sometimes "captures too much", losing factorization opportunities; # - very slow # TODO: a second "sympy" mode will be provided, relying on SymPy's CSE() but # also ensuring some form of post-processing assert mode == 'default' # Only supported mode ATM # Just for flexibility, accept either Clusters or exprs if isinstance(maybe_exprs, Cluster): cluster = maybe_exprs processed = list(cluster.exprs) scope = cluster.scope else: processed = list(maybe_exprs) scope = Scope(maybe_exprs) # Some sub-expressions aren't really "common" -- that's the case of Dimension- # independent data dependences. For example: # # ... = ... a[i] + 1 ... # a[i] = ... # ... = ... a[i] + 1 ... # # `a[i] + 1` will be excluded, as there's a flow Dimension-independent data # dependence involving `a` exclude = {i.source.access for i in scope.d_flow.independent()} mapped = [] while True: # Detect redundancies counted = count(mapped + processed, q_xop).items() targets = OrderedDict([(k, estimate_cost(k, True)) for k, v in counted if v > 1]) # Rule out Dimension-independent data dependencies targets = OrderedDict([(k, v) for k, v in targets.items() if not k.free_symbols & exclude]) if not targets: break # Create temporaries hit = max(targets.values()) picked = [k for k, v in targets.items() if v == hit] mapper = OrderedDict([(e, make()) for i, e in enumerate(picked)]) # Apply replacements processed = [uxreplace(e, mapper) for e in processed] mapped = [uxreplace(e, mapper) for e in mapped] mapped = [Eq(v, k) for k, v in reversed(list(mapper.items()))] + mapped # Update `exclude` for the same reasons as above -- to rule out CSE across # Dimension-independent data dependences exclude.update({i for i in mapper.values()}) # Prepare for the next round for k in picked: targets.pop(k) processed = mapped + processed # At this point we may have useless temporaries (e.g., r0=r1). Let's drop them processed = _compact_temporaries(processed) return processed