def break_link(self, evalnode, args): args_hash = ','.join([x.str_hash for x in args]) if args_hash not in self.links: raise RException("Something went wrong breaking links in mem") if evalnode not in self.links[args_hash]: raise RException("Something went wrong breaking links in mem, 2") del self.links[args_hash][evalnode] evalnode.out_link = None
def sample(self, args=None): if len(args) != 1: raise RException("Mem takes exactly one argument") procedure = args[0] if procedure.type != 'procedure' and procedure.type != 'xrp': raise RException("Can only mem procedures") mem_proc = mem_proc_XRP(procedure, self.engine_type) return XRPValue(mem_proc)
def incorporate(self, val, args=None): args_hash = ','.join([x.str_hash for x in args]) if not args_hash in self.ids: raise RException( "Engine bug in mem. Did not sample before incorporating?") # TODO get actual node, and evaluate (without reflip) cur_val = self.engine.report_value(self.ids[args_hash]) if not (val.__eq__(cur_val)).bool: raise RException("Engine bug in mem. Incongruous values") self.count[args_hash] = self.count[args_hash] + 1
def follow_prior(names, niter=1000, burnin=100, timer=True, printiters=0): rerun() dict = {} evalnodes = {} infer(burnin) for name in names: if name in ['TIME']: raise RException("shouldn't have a variable called TIME") dict[name] = [] (val, id) = predict(var(name)) evalnodes[name] = directives.engine.predicts[id] if timer: dict['TIME'] = [] for n in range(niter): if printiters > 0 and n % printiters == 0: print n, "iters" t = time.time() infer() for name in names: val = evalnodes[name].val if val.type != 'procedure' and val.type != 'xrp': dict[name].append(val) t = time.time() - t if timer: dict['TIME'].append(t) for name in names: if len(dict[name]) == 0: del dict[name] return dict
def get_lookups(self, name, evalnode): if self.assumes[name] is not evalnode: raise RException("Wrong evalnode getting lookups for %s" % name) if name in self.lookups: return self.lookups[name] else: return {}
def evaluate(self, xrp_force_val=None, restore=False): expr = self.expression env = self.env old_active_children = self.active_children self.active_children = {} if self.observed: xrp_force_val = self.observe_val for (xrp, val, args) in self.xrp_applies: self.remove_xrp(xrp, val, args) self.xrp_applies = [] # TODO: use directive ids... val = self.evaluate_recurse(expr, env, 0, 0, xrp_force_val, restore) if not self.random_xrp_apply: if xrp_force_val is not None: raise RException("Can only force re-scoring XRP applications") assert self.assume or self.predict self.set_val(val) self.active = True for addition in old_active_children: if addition not in self.active_children: evalnode = self.children[addition] evalnode.unevaluate() return val
def __init__(self, op, children): self.initialize() self.type = 'apply' self.op = op self.children = children if self.op.type == 'function' and len(self.op.vars) < len(self.children): raise RException('Applying function to too many arguments!')
def randbelow(self, max = 0): r = self.random() if max == 0: max = r_uint(9007199254740992) elif max < 0: raise RException("Randbelow got a negative argument") return int(r * max)
def sample_prior(names, niter=1000, timer=True, printiters=0): dict = {} for name in names: if name in ['TIME']: raise RException("shouldn't have a variable called TIME") dict[name] = [] if timer: dict['TIME'] = [] for n in range(niter): if printiters > 0 and n % printiters == 0: print n, "iters" t = time.time() rerun() for name in names: (val, id) = predict(var(name)) if val.type != 'procedure' and val.type != 'xrp': dict[name].append(val) t = time.time() - t if timer: dict['TIME'].append(t) for name in names: if len(dict[name]) == 0: del dict[name] return dict
def get(self, stack): stack = tuple(stack) if stack in self.db: return self.db[stack] elif stack in self.db_noise: return self.db_noise[stack] else: raise RException('Failed to get stack %s' % str(stack))
def test_prior(niter=1000, burnin=100, countup=True, timer=True): expressions = [] varnames = [] # TODO : get all the observed variables if directives.engine_type == 'reduced traces': for id in directives.engine.assumes: evalnode = directives.engine.assumes[id] expressions.append(evalnode.expression) varnames.append(evalnode.assume_name) elif directives.engine_type == 'traces': for id in directives.engine.assumes: evalnode = directives.engine.assumes[id] if evalnode.assume_name in ['TIME']: raise RException("shouldn't have an assume_name called TIME") expressions.append(evalnode.expression) varnames.append(evalnode.assume_name) else: for (varname, expr) in directives.engine.assumes: if varname in ['TIME']: raise RException("shouldn't have a varname called TIME") expressions.append(expr) varnames.append(varname) d2 = follow_prior(varnames, niter, burnin, timer) d1 = sample_prior(varnames, niter, timer) for i in range(len(varnames)): name = varnames[i] if name in d1: if name not in d2: raise RException( "sample_prior has varname that follow_prior doesn't: %s" % name) if countup: d1[name] = count_up(d1[name]) d2[name] = count_up(d2[name]) else: if name in d2: raise RException( "follow_prior has varname that sample_prior doesn't: %s" % name) if countup and timer: d1['TIME'] = count_up(d1['TIME']) d2['TIME'] = count_up(d2['TIME']) return (d1, d2)
def propagate_link(self, evalnode, val, restore_inactive): if self.assumes[evalnode.assume_name] is not evalnode: raise RException("Wrong evalnode getting lookups for %s" % evalnode.assume_name) evalnode.val = val lookup_nodes = list_nodes( self.get_lookups(evalnode.assume_name, evalnode)) for new_evalnode in lookup_nodes: evalnode.propagate_to(new_evalnode, restore_inactive)
def lookup(self, name): if name in self.assignments: return (self.assignments[name], self) else: if self.parent is None: raise RException('Variable %s undefined') # raise RException('Variable %s undefined in env:\n%s' % (name, self.__str__())) else: return self.parent.lookup(name)
def get_cdf(valuedict, start, end, bucketsize, normalizebool=True): numbuckets = int(math.floor((end - start) / bucketsize)) density = get_pdf(valuedict, start, end, bucketsize, normalizebool) cumulative = [0] for i in range(numbuckets): cumulative.append(cumulative[-1] + density[i]) if normalizebool: if not 0.999 < cumulative[-1] < 1.001: raise RException("Cumulative distribution should be 1, at the end") return cumulative
def forget(self, id): if self.directives[id] == 'observe': if id not in self.observes: raise RException("id %d was never observed" % id) (expr, val, active) = self.observes[id] if not active: raise RException("observe %d was already forgotten" % id) self.observes[id] = (expr, val, False) elif self.directives[id] == 'predict': if id not in self.predicts: raise RException("id %d was never predicted" % id) (expr, active) = self.predicts[id] if not active: raise RException("predict %d was already forgotten" % id) self.predicts[id] = (expr, False) else: raise RException("Cannot forget assumes") self.engine.forget(id) return
def observe(self, expr, obs_val, id): if expr.hashval in self.observes: raise RException('Already observed %s' % str(expr)) self.observes[id] = (expr, obs_val) # bit of a hack, here, to make it recognize same things as with noisy_expr self.evaluate(expr, self.env, reflip=False, stack=[id], xrp_force_val=obs_val) return expr.hashval
def randomKey(self): if len(self.idToKey) == 0: raise RException("No keys to get") max = self.heap.max() while True: index = rrandom.random.randbelow(len(self.idToKey)) key = self.idToKey[index] p = rrandom.random.random() (value, weight) = self.dict[key] if p * max < weight: return key
def forget(self, id): if id in self.observes: d = self.observes elif id in self.predicts: d = self.predicts else: raise RException("Can only forget predicts and observes") evalnode = d[id] evalnode.unevaluate() #del d[id] return
def remove_xrp(self, evalnode): xrp = evalnode.xrp try: self.old_to_new_q += math.log(xrp.weight(evalnode.args)) except: pass # This fails when restoring/keeping, for example if evalnode.hashval not in self.choices: raise RException("Choices did not already have this evalnode") else: del self.choices[evalnode.hashval] self.delete_from_db(evalnode.hashval)
def infer(self): try: hashval = self.randomKey() except: raise RException("Program has no randomness!") old_p, old_to_new_q, new_p, new_to_old_q = self.reflip(hashval) p = rrandom.random.random() if new_p + new_to_old_q - old_p - old_to_new_q < math.log(p): self.restore() else: self.keep() self.add_accepted_proposal(hashval) self.add_made_proposal(hashval)
def add_xrp(self, xrp, args, evalnode): weight = xrp.weight(args) evalnode.setargs(args) try: self.new_to_old_q += math.log(weight) except: pass # This is only necessary if we're reflipping if self.weighted_db.__contains__( evalnode.hashval) or self.db.__contains__( evalnode.hashval) or (evalnode.hashval in self.choices): raise RException("DB already had this evalnode") self.choices[evalnode.hashval] = evalnode self.add_to_db(evalnode.hashval, weight)
def report_directives(self, desired_directive_type=""): directive_report = [] for id in range(len(self.directives)): directive_type = self.directives[id] if desired_directive_type in ["", directive_type]: d = {} if directive_type == 'assume': (varname, expr) = self.assumes[id] value = self.report_value(id) elif directive_type == 'observe': (expr, value, active) = self.observes[id] elif directive_type == 'predict': (expr, active) = self.predicts[id] value = self.report_value(id) else: raise RException("Invalid directive %s" % directive_type) directive = [ str(id), directive_type, expr.__str__(), value.__str__() ] directive_report.append(directive) #directive_report = [] #for id in range(len(self.directives)): # directive_type = self.directives[id] # if desired_directive_type in ["", directive_type]: # d = {} # d["directive-id"] = str(id) # if directive_type == 'assume': # (varname, expr) = self.assumes[id] # d["directive-type"] = "DIRECTIVE-ASSUME" # d["directive-expression"] = expr.__str__() # d["name"] = varname # d["value"] = self.report_value(id).__str__() # directive_report.append(d) # elif directive_type == 'observe': # (expr, val, active) = self.observes[id] # d["directive-type"] = "DIRECTIVE-OBSERVE" # d["directive-expression"] = expr.__str__() # #d["value"] = self.report_value(id).__str__() # directive_report.append(d) # elif directive_type == 'predict': # (expr, active) = self.predicts[id] # d["directive-type"] = "DIRECTIVE-PREDICT" # d["directive-expression"] = expr.__str__() # d["value"] = self.report_value(id).__str__() # directive_report.append(d) # else: # raise RException("Invalid directive %s" % directive_type) return directive_report
def delete(self, key): if key not in self.index: raise RException("Key not in heap") i = self.index[key] last = len(self.heap) - 1 if (i != last): self.swap(i, last) lastkey = self.heap.pop() assert lastkey == key assert self.index[key] == last del self.index[key] del self.values[key] if (i != last): i = self.heapify_up(i) i = self.heapify_down(i)
def rerun(self): # Class representing environments self.reset_engine() for id in range(len(self.directives)): if self.directives[id] == 'assume': (varname, expr) = self.assumes[id] self.engine.assume(varname, expr, id) elif self.directives[id] == 'observe': (expr, val, active) = self.observes[id] if active: self.engine.observe(expr, val, id) elif self.directives[id] == 'predict': (expr, active) = self.predicts[id] if active: self.engine.predict(expr, id) else: raise RException("Invalid directive")
def __init__(self, procedure, engine_type="traces"): self.initialize() self.resample = True if engine_type == "traces": self.engine = Traces() elif engine_type == "reduced traces": self.engine = ReducedTraces() else: raise RException("Engine type not implemented") self.engine.assume('f', ConstExpression(procedure), 0) self.procedure = procedure self.n = len(procedure.vars) self.argsdict = {} self.hash = rrandom.random.randbelow() self.ids = {} # args_hash -> directive id self.count = {} # args_hash -> number of applications with these args self.links = {} # args_hash -> set of evalnodes self.id = 0
def entry_point(argv): engine_type = 't' if engine_type in [ 'rt', 'reduced', 'reduced_trace', 'reduced_traces', 'reducedtrace', 'reducedtraces' ]: engine = ReducedTraces() elif engine_type in ['t', 'trace', 'traces']: engine = Traces() #elif engine_type in ['r', 'db', 'randomdb']: # engine = RandomDB() else: raise RException("Engine %s is not implemented" % engine_type) directives = Directives(engine) run(directives) return 0
def reflip(self, hashval): if self.debug: print self if hashval in self.choices: self.application_reflip = True self.reflip_node = self.choices[hashval] if not self.reflip_node.random_xrp_apply: raise RException( "Reflipping something which isn't a random xrp application" ) if self.reflip_node.val is None: raise RException( "Reflipping something which previously had value None") else: self.application_reflip = False # internal reflip (self.reflip_xrp, nodes) = self.xrps[hashval] self.nodes = list_nodes(nodes) self.eval_p = 0 self.uneval_p = 0 old_p = self.p self.old_to_new_q = -math.log(self.weight()) if self.application_reflip: self.old_val = self.reflip_node.val self.new_val = self.reflip_node.reflip() else: # TODO: this is copied from traces. is it correct? args_list = [] self.old_vals = [] for node in self.nodes: args_list.append(node.args) self.old_vals.append(node.val) self.old_to_new_q += math.log(self.reflip_xrp.state_weight()) old_p += self.reflip_xrp.theta_prob() self.new_vals, q_forwards, q_back = self.reflip_xrp.theta_mh_prop( args_list, self.old_vals) self.old_to_new_q += q_forwards self.new_to_old_q += q_back for i in range(len(self.nodes)): node = self.nodes[i] val = self.new_vals[i] node.set_val(val) node.propagate_up(False) new_p = self.p self.new_to_old_q = -math.log(self.weight()) self.old_to_new_q += self.eval_p self.new_to_old_q += self.uneval_p if not self.application_reflip: new_p += self.reflip_xrp.theta_prob() self.new_to_old_q += math.log(self.reflip_xrp.state_weight()) if self.debug: if self.application_reflip: print "\nCHANGING VAL OF ", self.reflip_node, "\n FROM : ", self.old_val, "\n TO : ", self.new_val, "\n" if (self.old_val.__eq__(self.new_val)).bool: print "SAME VAL" else: print "TRANSITIONING STATE OF ", self.reflip_xrp print "new db", self print "\nq(old -> new) : ", math.exp(self.old_to_new_q) print "q(new -> old) : ", math.exp(self.new_to_old_q) print "p(old) : ", math.exp(old_p) print "p(new) : ", math.exp(new_p) print 'transition prob : ', math.exp(new_p + self.new_to_old_q - old_p - self.old_to_new_q), "\n" print "\n-----------------------------------------\n" return old_p, self.old_to_new_q, new_p, self.new_to_old_q
def report_value(self, id): node = self.get_directive_node(id) if not node.active: raise RException("Error. Perhaps this directive was forgotten?") val = node.val return val
def evaluate_recurse(self, expr, env, hashval, addition, xrp_force_val=None, restore=False): hashval = rhash.hash_pair(hashval, addition) if expr.type == 'value': val = expr.val elif expr.type == 'variable': (val, lookup_env) = env.lookup(expr.name) self.addlookup(expr.name, lookup_env) # TODO: get rid of. Works with proper if, but not in traces elif expr.type == 'if': cond = self.evaluate_recurse(expr.cond, env, hashval, 1, None, restore) if cond.bool: val = self.evaluate_recurse(expr.true, env, hashval, 2, None, restore) else: val = self.evaluate_recurse(expr.false, env, hashval, 3, None, restore) elif expr.type == 'let': # TODO: this really is a let* # Does environment stuff work properly? n = len(expr.vars) assert len(expr.expressions) == n values = [] new_env = env for i in range(n): # Bind variables new_env = new_env.spawn_child() val = self.evaluate_recurse(expr.expressions[i], new_env, hashval, i + 2, None, restore) values.append(val) new_env.set(expr.vars[i], values[i]) if val.type == 'procedure': val.env = new_env val = self.evaluate_recurse(expr.body, new_env, hashval, 1, None, restore) elif expr.type == 'apply': n = len(expr.children) op = self.evaluate_recurse(expr.op, env, hashval, 1, None, restore) args = [ self.evaluate_recurse(expr.children[i], env, hashval, i + 2, None, restore) for i in range(n) ] if op.type == 'procedure': if hashval == 0: self.args = args if n != len(op.vars): raise RException( 'Procedure should have %d arguments. \nVars were \n%s\n, but had %d children.' % (n, op.vars, len(expr.children))) new_env = op.env.spawn_child() for i in range(n): new_env.set(op.vars[i], args[i]) addition = rhash.hash_many([x.__hash__() for x in args]) val = self.evaluate_recurse(op.body, new_env, hashval, addition, None, restore) elif op.type == 'xrp': xrp = op.xrp if not xrp.resample: if hashval == 0: self.random_xrp_apply = True val = self.apply_random_xrp(xrp, args, xrp_force_val) else: child = self.get_child(hashval, env, expr, restore) val = child.val else: val = xrp.sample(args) self.add_xrp(xrp, val, args) self.xrp_applies.append((xrp, val, args)) assert val is not None else: raise RException( 'Must apply either a procedure or xrp. Instead got expression %s' % str(op)) elif expr.type == 'function': n = len(expr.vars) new_env = env.spawn_child() val = Procedure(expr.vars, expr.body, env) else: raise RException('Invalid expression type %s' % expr.type) return val
def add_assume(self, name, evalnode): if name in self.assumes: raise RException("Already assumed something with this name") self.assumes[name] = evalnode