def lp_propose(self, grammar, x, y, resampleProbability=lambdaOne, xZ=None): """ Returns a log probability of starting at x and ending up at y from a regeneration move. Any node is a candidate if the trees are identical except for what's below those nodes (although what's below *can* be identical!) NOTE: This does NOT take into account insert/delete NOTE: Not so simple because we must count multiple paths NOTE: This is currently not correct because it will mess up with bound variables, which now have unique names. Also it seems to add too many rules to the grammar, probably via recurse_up """ RP = -Infinity if isinstance(x, FunctionNode) and isinstance(y, FunctionNode) and x.returntype == y.returntype: # compute the normalizer if xZ is None: xZ = x.sample_node_normalizer(resampleProbability=resampleProbability) # Well we could select x's root to go to Y, but we must recompute y under the current grammar with BVRuleContextManager(grammar, x, recurse_up=True): RP = logplusexp(RP, log(1.0*resampleProbability(x)) - log(xZ) + grammar.log_probability(y)) if x.name == y.name and x.args is not None and y.args is not None and len(x.args) == len(y.args): # how many kids are not equal, and where was the last? mismatch_count, mismatch_index = 0, 0 for i, xa, ya in zip(xrange(len(x.args)), x.args, y.args): if xa != ya: # checks whole subtree! mismatch_count += 1 mismatch_index = i if mismatch_count > 1: break # can't win if mismatch_count > 1: # We have to have only selected x,y to regenerate pass elif mismatch_count == 1: # we could propose to x, or x.args[mismatch_index], but nothing else (nothing else will fix the mismatch) with BVRuleContextManager(grammar, x, recurse_up=False): # recurse, but keep track of bv RP = logplusexp(RP, lp_propose(grammar, x.args[mismatch_index], y.args[mismatch_index], resampleProbability=resampleProbability, xZ=xZ)) else: # identical trees -- we could propose to any, so that's just the tree probability below convolved with the resample p for xi in x.iterate_subnodes(grammar, recurse_up=True): if xi is not x: # but we already counted ourself (NOTE: Must be "is", not ==) # Here we use grammar.log_probability since the grammar may have changed with bv RP = logplusexp(RP, log(resampleProbability(xi)*1.0) - log(xZ) + grammar.log_probability(xi)) return RP
def probs_data_rule(grammar, rule, data, probs=np.arange(0, 2, 0.2), num_iters=10000, alpha=0.9): """Return the probabilities of set of data given distribution of probabilities for a given rule Args: grammar (LOTlib.Grammar): The grammar. rule (LOTlib.GrammarRule): Specify a specific rule of which to vary the probability. Use get_rule to get the GrammarRule for a name, e.g. 'union_'. Returns: list: Probability of human data for each value in `probs`. Example: >> data = [([2, 8, 16], {4: (10, 2), 6: (4, 8), 12: (7, 5)}), # (data set 1) .. ([3, 9, 13], {6: (11, 1), 5: (3, 9), 12: (8, 4)})] # (data set 2) >> probHDataGivenRuleProbs(G.grammar, 'SET', 'union_', data, probs=[0.,1.,2.,3.,4.]) [-0.923, -2.48, -5.12, -0.44, -6.36] """ dist = [] orig_p = rule.p for p in probs: rule.p = p p_human = logplusexp([prob_data(grammar, d[0], d[1], num_iters, alpha) for d in data]) dist.append(p_human) rule.p = orig_p return dist
def prob_data(grammar, input_data, output_data, num_iters=10000, alpha=0.9): """Compute the probability of generating human data given our grammar & input data. Args: grammar (LOTlib.Grammar): The grammar. input_data (list): List of integers, the likelihood of the model is initially computed with these. output_data (list): List of tuples corresponding to (# yes, # no) responses in human data. Returns: float: Estimated probability of generating human data. """ model_likelihoods = likelihood_data(grammar, input_data, output_data, num_iters, alpha) p_output = -Infinity for o in output_data.keys(): p = model_likelihoods[o] k = output_data[o][0] # num. yes responses n = k + output_data[o][1] # num. trials bc = factorial(n) / (factorial(k) * factorial(n-k)) # binomial coefficient p_o = log(bc) + (k*p) + (n-k)*log1mexp(p) # log version p_output = logplusexp(p_output, p_o) # p_gen_human_data[o] = bc * pow(p, k) * pow(1-p, n-k) # linear version return p_output
def cons_d(x, y): out = defaultdict(lambdaMinusInfinity) for a, av in x.items(): for b, bv in y.items(): out[a + b] = logplusexp(out[a + b], av + bv) return out
def cons_d(x,y): out = defaultdict(lambdaMinusInfinity) for a, av in x.items(): for b, bv in y.items(): out[a+b] = logplusexp(out[a+b], av + bv) return out
def or_d(x, y): out = defaultdict(lambdaMinusInfinity) out[True] = logplusexp( x.get(True, -Infinity) + y.get(False, -Infinity), x.get(False, -Infinity) + y.get(True, -Infinity) ) out[False] = log1mexp(out[True]) return out
def likelihood_data(grammar, input_data, output_data, num_iters=10000, alpha=0.9): """Generate a set of hypotheses, and use these to estimate likelihood of generating the human data. This is taken as a weighted sum over all hypotheses. Args: input_data(list): List of input integers. output_data(dict): Returns: dict: Each output key returns the summed likelihood of that single data point. Keys are the same as those of argument `output_data`. """ hypotheses = mh_sample(input_data, grammar=grammar, num_iters=num_iters, alpha=alpha) Z = normalizing_constant(hypotheses) likelihoods = defaultdict(lambda: -Infinity) for h in hypotheses: w = h.posterior_score - Z for o in output_data.keys(): old_likelihood = h.likelihood # TODO: is h.compute_likelihood updating posterior_score each loop? weighted_likelihood = h.compute_likelihood([o]) + w h.likelihood = old_likelihood likelihoods[o] = logplusexp(likelihoods[o], weighted_likelihood) return likelihoods
def add(self, x): if (not self.unique) or x not in self.set: v = getattr(x, self.key) if not isnan(v): self.Z = logplusexp(self.Z, v) if self.unique: self.set.add(x)
def if_d(prb,x,y): out = defaultdict(lambdaMinusInfinity) pt = prb[True] pf = prb[False] for a, av in x.items(): out[a] = av + pt for b, bv in y.items(): out[b] = logplusexp(out[b], bv + pf) return out
def if_d(prb, x, y): out = defaultdict(lambdaMinusInfinity) pt = prb[True] pf = prb[False] for a, av in x.items(): out[a] = av + pt for b, bv in y.items(): out[b] = logplusexp(out[b], bv + pf) return out
def prob_data_rule(grammar, rule, data, p, num_iters=10000, alpha=0.9): """Return the probabilities of set of data given a single p value for a rule.""" orig_p = rule.p rule.p = p p_human = 0 for d in data: # get probability of producing this data pair, add to total p_human_d = prob_data(grammar, d[0], d[1], num_iters, alpha) p_human = logplusexp(p_human, p_human_d) rule.p = orig_p return p_human
def compute_outcomes(f, *args, **kwargs): """ Return a dictionary of outcomes using our RandomContext tools, giving each possible trace (up to the given depth) and its probability. f here is a function of context, as in f(context, *args) kwargs['Cfirst'] constrols whether C is the first or last argument to f. It cannot be anything else In kwargs you can pass "catchandpass" as a tuple of exceptions to catch and do nothing with """ out = defaultdict( lambdaMinusInfinity) # dict from strings to lps that we accumulate cs = ContextSet() # this is the "open" set of contexts we need to explore cs.add(RandomContext(cs)) # add a single context with no history i = 0 while len(cs) > 0: context = cs.pop() # pop an element from Context set. # print "CTX", context.lp, context#, " \t", cs.Q try: # figure out the ordering of where C is passed to the lambda if kwargs.get('Cfirst', True): # does C go at the beginning or the end? v = f( context, *args ) # when we call context.flip, we may update cs with new paths to explore else: newargs = args + (context, ) v = f(*newargs) # print ">>>", v # add up the lp for this outcomem out[v] = logplusexp(out[v], context.lp) except kwargs.get('catchandpass', None) as e: pass except ContextSizeException: # prune that path pass if i >= kwargs.get('maxit', 1000): return out ## TODO: Hmm can either return the partial answer here or raise an exception if len(cs) > kwargs.get( 'maxcontext', 1000 ): # sometimes we can generate way too many contexts, so let's avoid that raise TooManyContextsException i += 1 return out
def add_to_collapsed_trees(t): resps = ';'.join(map(str, get_tree_set_responses(t, TESTING_SET))) tprior = grammar.log_probability(t) if resps in collapsed_forms: # add to the existing collapsed form if no recursion collapsed_prob = grammar.log_probability(collapsed_forms[resps]) collapsed_forms[resps].my_log_probability = logplusexp(collapsed_prob, tprior) if tprior > collapsed_forms[resps].display_tree_probability: # display the most concise form collapsed_forms[resps] = t collapsed_forms[resps].display_tree_probability = tprior else: collapsed_forms[resps] = t collapsed_forms[resps].display_tree_probability = tprior t.my_log_probability = tprior # FunctionNode uses this value when we call log_probability() print ">>", all_tree_count, len(collapsed_forms), t, tprior
def next(self): nxt = MultipleChainMCMC.next(self) # get the next one idx = self.chain_idx if nxt not in self.seen: self.chainZ[idx] = logplusexp(self.chainZ[idx], nxt.posterior_score) self.seen.add(nxt) # # Process the situation where we need to re-organize if self.nsamples % (self.within_steps * self.nchains) == 0 and self.nsamples > 0: self.refresh() self.nsamples += 1 return nxt
def compute_single_likelihood(self, datum): assert isinstance(datum.output, dict) hp = self(*datum.input) # output dictionary, output->probabilities assert isinstance(hp, dict) s = 0.0 for k, dc in datum.output.items(): lp = -log(self.alphabet_size+1)*(len(k)+1) + log(1.0-datum.alpha) # probability of generating under random typing; +1 is for an EOS marker if k in hp: lp = logplusexp(lp, hp[k] + log(datum.alpha)) # if we could have been generated s += dc*lp return s
def compute_outcomes(f, *args, **kwargs): """ Return a dictionary of outcomes using our RandomContext tools, giving each possible trace (up to the given depth) and its probability. f here is a function of context, as in f(context, *args) kwargs['Cfirst'] constrols whether C is the first or last argument to f. It cannot be anything else In kwargs you can pass "catchandpass" as a tuple of exceptions to catch and do nothing with """ out = defaultdict(lambdaMinusInfinity) # dict from strings to lps that we accumulate cs = ContextSet() # this is the "open" set of contexts we need to explore cs.add(RandomContext(cs)) # add a single context with no history i = 0 while len(cs) > 0: context = cs.pop() # pop an element from Context set. TODO: We should probably do a heapq of the highest probability sequences # print "CTX", context.lp, context#, " \t", cs.Q try: # does C go at the beginning or the end? if kwargs.get('Cfirst', True): v = f(context, *args) # when we call context.flip, we may update cs with new paths to explore else: newargs = args + (context,) v = f(*newargs) out[v] = logplusexp(out[v], context.lp) # add up the lp for this outcomem except kwargs.get('catchandpass', None) as e: pass except ContextSizeException: # prune that path pass if i >= kwargs.get('maxit', 1000): return out ## TODO: Hmm can either return the partial answer here or raise an exception if len(cs) > kwargs.get('maxcontext', 1000): # sometimes we can generate way too many contexts, so let's avoid that raise TooManyContextsException i += 1 return out
def compute_single_likelihood(self, datum): assert isinstance(datum.output, dict) hp = self(*datum.input) # output dictionary, output->probabilities assert isinstance(hp, dict) s = 0.0 for k, dc in datum.output.items(): lp = -log(self.alphabet_size + 1) * (len(k) + 1) + log( 1.0 - datum.alpha ) # probability of generating under random typing; +1 is for an EOS marker if k in hp: lp = logplusexp( lp, hp[k] + log(datum.alpha)) # if we could have been generated s += dc * lp return s
def lp_propose(self, x, y, resampleProbability=lambdaOne, xZ=None): """ Returns a log probability of starting at x and ending up at y from a regeneration move. Any node is a candidate if the trees are identical except for what's below those nodes (although what's below *can* be identical!) NOTE: This does NOT take into account insert/delete NOTE: Not so simple because we must count multiple paths NOTE: This is currently not correct because it will mess up with bound variables, which now have unique names. Also it seems to add too many rules to the grammar, probably via recurse_up """ RP = -Infinity if isinstance(x, FunctionNode) and isinstance( y, FunctionNode) and x.returntype == y.returntype: # compute the normalizer if xZ is None: xZ = x.sample_node_normalizer( resampleProbability=resampleProbability) # Well we could select x's root to go to Y, but we must recompute y under the current grammar with BVRuleContextManager(self.grammar, x, recurse_up=True): RP = logplusexp( RP, log(1.0 * resampleProbability(x)) - log(xZ) + self.grammar.log_probability(y)) if x.name == y.name and x.args is not None and y.args is not None and len( x.args) == len(y.args): # how many kids are not equal, and where was the last? mismatch_count, mismatch_index = 0, 0 for i, xa, ya in zip(xrange(len(x.args)), x.args, y.args): if xa != ya: # checks whole subtree! mismatch_count += 1 mismatch_index = i if mismatch_count > 1: break # can't win if mismatch_count > 1: # We have to have only selected x,y to regenerate pass elif mismatch_count == 1: # we could propose to x, or x.args[mismatch_index], but nothing else (nothing else will fix the mismatch) with BVRuleContextManager( self.grammar, x, recurse_up=False): # recurse, but keep track of bv RP = logplusexp( RP, self.lp_propose( x.args[mismatch_index], y.args[mismatch_index], resampleProbability=resampleProbability, xZ=xZ)) else: # identical trees -- we could propose to any, so that's just the tree probability below convolved with the resample p for xi in x.iterate_subnodes(self.grammar, recurse_up=True): if xi is not x: # but we already counted ourself (NOTE: Must be "is", not ==) # Here we use grammar.log_probability since the grammar may have changed with bv RP = logplusexp( RP, log(resampleProbability(xi) * 1.0) - log(xZ) + self.grammar.log_probability(xi)) return RP
def process(self, x): v = getattr(x, self.key) if not isnan(v): self.Z = logplusexp(self.Z, v) return x
def equal_d(x, y): peq = -Infinity for a, v in x.items(): peq = logplusexp(peq, v + y.get(a, -Infinity)) # P(x=a,y=a) return {True: peq, False: log1mexp(peq)}
def car_d(x): out = defaultdict(lambdaMinusInfinity) for a, av in x.items(): v = a[1] if len(a) > 1 else '' out[v] = logplusexp(out[v], av) return out
def car_d(x): out = defaultdict(lambdaMinusInfinity) for a, av in x.items(): v = a[1] if len(a) > 1 else "" out[v] = logplusexp(out[v], av) return out
def equal_d(x,y): peq = -Infinity for a,v in x.items(): peq = logplusexp(peq, v + y.get(a,-Infinity)) # P(x=a,y=a) return {True: peq, False:log1mexp(peq)}
def or_d(x,y): out = defaultdict(lambdaMinusInfinity) out[True] = logplusexp(x.get(True,-Infinity) + y.get(False,-Infinity), x.get(False,-Infinity) + y.get(True,-Infinity)) out[False] = log1mexp(out[True]) return out