def watch(self, new_target): print("watching new target...") self.backlog_counter = 0 self.target = new_target self.roots = OrderedSet() types = OrderedSet() for e in itertools.chain(all_exps(new_target), *[all_exps(h) for h in self.hints]): if isinstance(e, ELambda): continue for pool in ALL_POOLS: exp = e if pool == STATE_POOL: exp = strip_EStateVar(e) fvs = free_vars(exp) if all(v in self.legal_free_vars for v in fvs) and self.is_legal_in_pool(exp, pool): _on_exp(exp, "new root", pool_name(pool)) exp._root = True self.roots.add((exp, pool)) if pool == STATE_POOL and all(v in self.state_vars for v in fvs): self.roots.add((EStateVar(exp).with_type(exp.type), RUNTIME_POOL)) types.add(exp.type) else: _on_exp(exp, "rejected root", pool_name(pool)) for b in self.binders: types.add(b.type) for t in types: self.roots.add((construct_value(t), RUNTIME_POOL)) self.roots = list(self.roots) self.roots.sort(key = lambda tup: tup[0].size()) self._watches = group_by( enumerate_fragments2(new_target), k=lambda ctx: (ctx.pool, ctx.e.type), v=lambda ctxs: sorted(ctxs, key=lambda ctx: -ctx.e.size())) print("done!")
def visit_EListComprehension(self, e): collection_types = OrderedSet() with self.scope(): for clause in e.clauses: self.visit(clause) if isinstance(clause, syntax.CPull) and clause.e.type is not DEFAULT_TYPE: collection_types.add(clause.e.type) self.visit(e.e) if all(isinstance(t, syntax.TList) for t in collection_types): e.type = syntax.TList(e.e.type) else: e.type = syntax.TBag(e.e.type)
def cleanup(self): """ Remove unused state, queries, and updates. """ # sort of like mark-and-sweep queries_to_keep = OrderedSet(q.name for q in self.query_specs if q.visibility == Visibility.Public) state_vars_to_keep = OrderedSet() changed = True while changed: changed = False for qname in list(queries_to_keep): if qname in self.query_impls: for sv in free_vars(self.query_impls[qname]): if sv not in state_vars_to_keep: state_vars_to_keep.add(sv) changed = True for e in all_exps(self.query_impls[qname].ret): if isinstance(e, ECall): if e.func not in queries_to_keep: queries_to_keep.add(e.func) changed = True for op in self.op_specs: for ((ht, op_name), code) in self.handle_updates.items(): if op.name == op_name: for qname in _queries_used_by(code): if qname not in queries_to_keep: queries_to_keep.add(qname) changed = True for sv in state_vars_to_keep: for qname in _queries_used_by(self.updates[(sv, op.name)]): if qname not in queries_to_keep: queries_to_keep.add(qname) changed = True # remove old specs for q in list(self.query_specs): if q.name not in queries_to_keep: self.query_specs.remove(q) # remove old implementations for qname in list(self.query_impls.keys()): if qname not in queries_to_keep: del self.query_impls[qname] # remove old state vars self.concrete_state = [ v for v in self.concrete_state if any(v[0] in free_vars(q) for q in self.query_impls.values()) ] # remove old method implementations for k in list(self.updates.keys()): v, op_name = k if v not in [var for (var, exp) in self.concrete_state]: del self.updates[k]
def max_storage_size(e, freebies: [Exp] = []): sizes = OrderedSet() for x in all_exps(e): if isinstance(x, EStateVar): sizes.add(storage_size(x.e, freebies)) return max_of(*sizes, type=INT)
class Learner(object): def __init__(self, target, assumptions, binders, state_vars, args, legal_free_vars, examples, cost_model, builder, stop_callback, hints, solver): self.binders = OrderedSet(binders) self.state_vars = OrderedSet(state_vars) self.args = OrderedSet(args) self.legal_free_vars = legal_free_vars self.stop_callback = stop_callback self.cost_model = cost_model self.builder = builder self.seen = SeenSet() self.assumptions = assumptions self.hints = list(hints) self.solver = solver self.reset(examples) self.watch(target) def compare_costs(self, c1, c2): self._on_cost_cmp() solver = self.solver if solver is not None: return c1.compare_to(c2, solver=solver) else: return c1.compare_to(c2, assumptions=self.assumptions) def reset(self, examples): _fates.clear() self.cache = Cache(binders=self.binders, args=self.args) self.current_size = -1 self.examples = list(examples) self.all_examples = instantiate_examples(self.examples, self.binders) self.seen.clear() self.builder_iter = () self.last_progress = 0 self.backlog = None self.backlog_counter = 0 self._start_minor_it() def _check_seen_wf(self): if enforce_seen_wf.value: for (e, pool, fp, size, cost) in self.seen.items(): fpnow = self._fingerprint(e) if fp != fpnow: print("#" * 40) print(pprint(e)) print(fp) print(fpnow) assert False def is_legal_in_pool(self, e, pool): try: return exp_wf(e, state_vars=self.state_vars, args=self.args, pool=pool, assumptions=self.assumptions) except ExpIsNotWf as exc: return False def watch(self, new_target): print("watching new target...") self.backlog_counter = 0 self.target = new_target self.roots = OrderedSet() types = OrderedSet() for e in itertools.chain(all_exps(new_target), *[all_exps(h) for h in self.hints]): if isinstance(e, ELambda): continue for pool in ALL_POOLS: exp = e if pool == STATE_POOL: exp = strip_EStateVar(e) fvs = free_vars(exp) if all(v in self.legal_free_vars for v in fvs) and self.is_legal_in_pool(exp, pool): _on_exp(exp, "new root", pool_name(pool)) exp._root = True self.roots.add((exp, pool)) if pool == STATE_POOL and all(v in self.state_vars for v in fvs): self.roots.add((EStateVar(exp).with_type(exp.type), RUNTIME_POOL)) types.add(exp.type) else: _on_exp(exp, "rejected root", pool_name(pool)) for b in self.binders: types.add(b.type) for t in types: self.roots.add((construct_value(t), RUNTIME_POOL)) self.roots = list(self.roots) self.roots.sort(key = lambda tup: tup[0].size()) self._watches = group_by( enumerate_fragments2(new_target), k=lambda ctx: (ctx.pool, ctx.e.type), v=lambda ctxs: sorted(ctxs, key=lambda ctx: -ctx.e.size())) print("done!") def _fingerprint(self, e): self.fpcount += 1 # bs = tuple(sorted(free_vars(e) & self.binders)) bs = (len(free_vars(e) & self.binders),) return fingerprint(e, self.all_examples) + bs def _watched_contexts(self, pool, type): return self._watches.get((pool, type), ()) # return sorted(list(enumerate_fragments2(self.target)), key=lambda ctx: -ctx.e.size()) def _possible_replacements(self, e, pool, cost): """ Yields watched expressions that appear as worse versions of the given expression. There may be more than one. """ # return free_binders = OrderedSet(v for v in free_vars(e) if v in self.binders) for ctx in self._watched_contexts(pool, e.type): watched_e = ctx.e p = ctx.pool r = ctx.replace_e_with assert e.type == watched_e.type assert p == pool _on_exp(e, "considering replacement of", watched_e) # if e.type != watched_e.type: # # _on_exp(e, "wrong type") # continue # if p != pool: # # _on_exp(e, "wrong pool") # continue if e == watched_e: # _on_exp(e, "no change") continue unbound_binders = [b for b in free_binders if b not in ctx.bound_vars] if unbound_binders: _on_exp(e, "skipped exp with free binders", ", ".join(b.id for b in unbound_binders)) continue if CHECK_SUBST_COST: watched_cost = self.cost_model.cost(watched_e, pool=pool) ordering = self.compare_costs(cost, watched_cost) if ordering == Cost.WORSE: _on_exp(e, "skipped worse replacement", pool_name(pool), watched_e) continue if ordering == Cost.UNORDERED: _on_exp(e, "skipped equivalent replacement", pool_name(pool), watched_e) # print(" e1 = {!r}".format(e)) # print(" e2 = {!r}".format(watched_e)) continue # assert all(eval_bulk(self.assumptions, self.all_examples)) if all(eval_bulk(EEq(self.target, r(e)), self.all_examples)): yield (watched_e, e, ctx.facts, r) else: _on_exp(e, "visited pointless replacement", watched_e) def pre_optimize(self, e, pool): """ Optimize `e` by replacing its subexpressions with the best cached versions available (or leaving them untouched if they are new). """ if not hasattr(e, "_accel"): return e top_level = e class V(BottomUpRewriter): def visit_EStateVar(_, e): return EStateVar(self.pre_optimize(e.e, STATE_POOL)).with_type(e.type) def visit_ELambda(_, e): if e.arg not in self.binders and e.arg in free_vars(e.body): # Derp! Someone made an expression that uses an illegal # binder. There is no way to compute a fingerprint for the # body, unfortunately, so we just stop here. return e return ELambda(e.arg, super().visit_ADT(e.body)) # optimize children def visit_Exp(_, e): # do not shadow `self` if e is top_level: return super().visit_ADT(e) # optimize children fp = self._fingerprint(e) prev = self.seen.find_one(pool, fp) if prev is None: return super().visit_ADT(e) # optimize children prev_exp, prev_size, prev_cost = prev if prev_exp == e: return prev_exp cost = self.cost_model.cost(e, pool) ordering = self.compare_costs(cost, prev_cost) if ordering == Cost.BETTER: return super().visit_ADT(e) # optimize children else: # NOTE: no need to optimize children; if it is cached, then # it is presumably already the best possible. # if not alpha_equivalent(e, prev_exp): # print("*** rewriting {} to {}".format(pprint(e), pprint(prev_exp)), file=sys.stderr) return prev_exp res = None try: res = V().visit(e) assert exp_wf(res, state_vars=self.state_vars, args=self.args, pool=pool, assumptions=self.assumptions) if hasattr(e, "_tag"): res._tag = e._tag return res except: traceback.print_exc(file=sys.stdout) print("FAILED TO PREOPTIMIZE {} ---> {}".format(pprint(e), pprint(res))) print(repr(e)) return e def _start_minor_it(self): now = datetime.datetime.now() if hasattr(self, "mstart"): duration = now - self.mstart print("> minor duration: {}".format(duration)) print("> next() calls: {}".format(self.ncount)) print("> total exps: {}".format(self.ecount)) print("> exps/s: {}".format(self.ecount / duration.total_seconds())) print("> cost comparisons: {}".format(self.ccount)) print("> fingerprints: {}".format(self.fpcount)) if self.current_size >= 0: print("minor iteration {}, |cache|={}".format(self.current_size, len(self.cache))) self.mstart = now self.ecount = 0 self.ccount = 0 self.fpcount = 0 self.ncount = 0 def _on_exp(self, e, pool): # print("next() <<< {p:10} {e}".format(e=pprint(e), p=pool_name(pool))) self.ecount += 1 def _on_cost_cmp(self): self.ccount += 1 def next(self): target_cost = self.cost_model.cost(self.target, RUNTIME_POOL) self.ncount += 1 while True: if self.backlog is not None: if self.stop_callback(): raise StopException() (e, pool, cost) = self.backlog improvements = list(self._possible_replacements(e, pool, cost)) if self.backlog_counter < len(improvements): i = improvements[self.backlog_counter] self.backlog_counter += 1 return i else: self.backlog = None self.backlog_counter = 0 for (e, pool) in self.builder_iter: self._on_exp(e, pool) if self.stop_callback(): raise StopException() # # Stopgap measure... long story --Calvin # bad = False # for x in all_exps(e): # if isinstance(x, EStateVar): # if any(v not in self.state_vars for v in free_vars(x.e)): # bad = True # _on_exp(e, "skipping due to illegal free vars under EStateVar") # if bad: # continue new_e = self.pre_optimize(e, pool) if preopt.value else e if new_e is not e: _on_exp(e, "preoptimized", new_e) e = new_e cost = self.cost_model.cost(e, pool) if pool == RUNTIME_POOL and (self.cost_model.is_monotonic() or hyperaggressive_culling.value) and self.compare_costs(cost, target_cost) == Cost.WORSE: _on_exp(e, "too expensive", cost, target_cost) continue fp = self._fingerprint(e) prev = list(self.seen.find_all(pool, fp)) should_add = True if not prev: _on_exp(e, "new", pool_name(pool)) elif any(alpha_equivalent(e, ee) for (ee, _, _) in prev): _on_exp(e, "duplicate") should_add = False else: better_than = None worse_than = None for prev_exp, prev_size, prev_cost in prev: self._on_cost_cmp() ordering = self.compare_costs(cost, prev_cost) assert ordering in (Cost.WORSE, Cost.BETTER, Cost.UNORDERED) if enforce_strong_progress.value and ordering != Cost.WORSE: bad = find_one(all_exps(e), lambda ee: alpha_equivalent(ee, prev_exp)) if bad: _on_exp(e, "failed strong progress requirement", bad) should_add = False break _on_exp(e, ordering, pool_name(pool), prev_exp) if ordering == Cost.UNORDERED: continue elif ordering == Cost.BETTER: better_than = (prev_exp, prev_size, prev_cost) _on_exp(prev_exp, "found better alternative", e) self.cache.evict(prev_exp, size=prev_size, pool=pool) self.seen.remove(prev_exp, pool, fp) if (self.cost_model.is_monotonic() or hyperaggressive_culling.value) and hyperaggressive_eviction.value: for (cached_e, size, p) in list(self.cache): if p != pool: continue if prev_exp in all_exps(cached_e): _on_exp(cached_e, "evicted since it contains", prev_exp) self.cache.evict(cached_e, size=size, pool=pool) else: should_add = False worse_than = (prev_exp, prev_size, prev_cost) # break if worse_than and better_than: print("Uh-oh! Strange cost relationship between") print(" (1) this exp: {}".format(pprint(e))) print(" (2) prev. A: {}".format(pprint(worse_than[0]))) print(" (2) prev. B: {}".format(pprint(better_than[0]))) print("e1 = {}".format(repr(e))) print("e2 = {}".format(repr(worse_than[0]))) print("e3 = {}".format(repr(better_than[0]))) print("(1) vs (2): {}".format(cost.compare_to(worse_than[2], self.assumptions))) print("(2) vs (3): {}".format(worse_than[2].compare_to(better_than[2], self.assumptions))) print("(3) vs (1): {}".format(better_than[2].compare_to(cost, self.assumptions))) # raise Exception("insane cost model behavior") if should_add: self.cache.add(e, pool=pool, size=self.current_size) self.seen.add(e, pool, fp, self.current_size, cost) self.last_progress = self.current_size else: continue for pr in self._possible_replacements(e, pool, cost): self.backlog = (e, pool, cost) self.backlog_counter = 1 return pr if self.last_progress < (self.current_size+1) // 2: raise NoMoreImprovements("hit termination condition") self.current_size += 1 self.builder_iter = self.builder.build(self.cache, self.current_size) if self.current_size == 0: self.builder_iter = itertools.chain(self.builder_iter, list(self.roots)) for f, ct in sorted(_fates.items(), key=lambda x: x[1], reverse=True): print(" {:6} | {}".format(ct, f)) _fates.clear() self._start_minor_it()