def compute_proposal_probability(self,grammar, t1, t2, resampleProbability=lambdaOne, recurse=True): chosen_node1 , chosen_node2 = least_common_difference(t1,t2) lps = [] if chosen_node1 is None: # any node in the tree could have been copied for node in t1: could_be_source = lambda x: 1.0 * nodes_equal_except_parents(grammar,x,node) * resampleProbability(x) lp_of_choosing_source = (nicelog(t1.sample_node_normalizer(could_be_source) - could_be_source(node)) - nicelog(t1.sample_node_normalizer(resampleProbability))) lp_of_choosing_target = t1.sampling_log_probability(chosen_node1,resampleProbability=resampleProbability) lps += [lp_of_choosing_source + lp_of_choosing_target] else: # we have a specific path up the tree while chosen_node1: could_be_source = lambda x: 1.0 * nodes_equal_except_parents(grammar,x,chosen_node2) * resampleProbability(x) lp_of_choosing_source = nicelog(t1.sample_node_normalizer(could_be_source)) - nicelog(t1.sample_node_normalizer(resampleProbability)) lp_of_choosing_target = t1.sampling_log_probability(chosen_node1,resampleProbability=resampleProbability) lps += [lp_of_choosing_source + lp_of_choosing_target] if recurse: chosen_node1 = chosen_node1.parent chosen_node2 = chosen_node2.parent else: chosen_node1 = None return logsumexp(lps)
def compute_single_likelihood(self, datum, llcounts, sm=0.1): """ sm smoothing counts are added to existing bins of counts """ assert isinstance(datum.output, dict), "Data supplied to SimpleGenerativeHypothesis must be a dict of function outputs to counts" z = sum(llcounts.values()) return sum([ datum.output[k] * (nicelog(llcounts[k] + sm)-nicelog(sum(z) + sm*len(datum.output.keys())) ) for k in datum.output.keys() ])
def compute_likelihood(self, data, shortcut=-Infinity, nsamples=512, sm=0.1, **kwargs): # For each input, if we don't see its input (via llcounts), recompute it through simulation ll = 0.0 for datum in data: self.ll_counts = self.make_ll_counts(datum.input, nsamples=nsamples) z = sum(self.ll_counts.values()) ll += sum([datum.output[k]*(nicelog(self.ll_counts[k]+sm) - nicelog(z+sm*len(datum.output.keys()))) for k in datum.output.keys()]) if ll < shortcut: return -Infinity return ll / self.likelihood_temperature
def compute_single_likelihood(self, datum, llcounts=None, nsamples=512, sm=0.1): """ sm smoothing counts are added to existing bins of counts (just to prevent badness) This can take an optiona llcounts in order to allow us to cache this externally """ #print self assert isinstance(datum.output, dict), "Data supplied to SimpleGenerativeHypothesis must be a dict (function outputs to counts)" if llcounts is None: # compute if not passed in llcounts = self.make_ll_counts(datum.input, nsamples=nsamples) return sum([ datum.output[k] * (nicelog(llcounts[k] + sm)-nicelog(nsamples + sm*len(datum.output.keys())) ) for k in datum.output.keys() ])
def compute_single_likelihood(self, datum, llcounts, sm=0.1): """ sm smoothing counts are added to existing bins of counts """ assert isinstance( datum.output, dict ), "Data supplied to SimpleGenerativeHypothesis must be a dict of function outputs to counts" z = sum(llcounts.values()) return sum([ datum.output[k] * (nicelog(llcounts[k] + sm) - nicelog(z + sm * len(datum.output.keys()))) for k in datum.output.keys() ])
def compute_proposal_probability(self, grammar, t1, t2, resampleProbability=lambdaOne): node_1, node_2 = least_common_difference(t1, t2) if (node_1 and node_2 and any([ nodes_are_roughly_equal(arg, node_1) for arg in None2Empty(node_2.args) ])): lp_choosing_node_1 = t1.sampling_log_probability( node_1, resampleProbability=lambda t: can_insert_FunctionNode( t, grammar) * resampleProbability(t)) lp_choosing_rule = -nicelog( len( filter(can_insert_GrammarRule, grammar.rules[node_1.returntype]))) lp_choosing_replacement = -nicelog( len( filter( lambda i: node_2.args[i].returntype == node_1. returntype, xrange(len(node_2.args))))) lp_generation = [] for arg in node_2.args: if not (arg.name == node_1.name and arg.returntype == node_1.returntype and arg.args == node_1.args ): # if the nodes are significantly different with BVRuleContextManager(grammar, node_2, recurse_up=True): lp_generation += [grammar.log_probability(arg)] lp_copy_making_node_2 = lp_choosing_rule + lp_choosing_replacement + sum( lp_generation) return lp_choosing_node_1 + lp_copy_making_node_2 else: return -Infinity # the trees cannot be identical if we performed an insertion
def compute_proposal_probability(self, grammar, t1, t2, resampleProbability=lambdaOne, recurse=True): chosen_node1, chosen_node2 = least_common_difference(t1, t2) lps = [] if chosen_node1 is None: # any node in the tree could have been copied for node in t1: could_be_source = lambda x: 1.0 * nodes_equal_except_parents( grammar, x, node) * resampleProbability(x) lp_of_choosing_source = ( nicelog( t1.sample_node_normalizer(could_be_source) - could_be_source(node)) - nicelog(t1.sample_node_normalizer(resampleProbability))) lp_of_choosing_target = t1.sampling_log_probability( chosen_node1, resampleProbability=resampleProbability) lps += [lp_of_choosing_source + lp_of_choosing_target] else: # we have a specific path up the tree while chosen_node1: could_be_source = lambda x: 1.0 * nodes_equal_except_parents( grammar, x, chosen_node2) * resampleProbability(x) lp_of_choosing_source = nicelog( t1.sample_node_normalizer(could_be_source)) - nicelog( t1.sample_node_normalizer(resampleProbability)) lp_of_choosing_target = t1.sampling_log_probability( chosen_node1, resampleProbability=resampleProbability) lps += [lp_of_choosing_source + lp_of_choosing_target] if recurse: chosen_node1 = chosen_node1.parent chosen_node2 = chosen_node2.parent else: chosen_node1 = None return logsumexp(lps)
def compute_proposal_probability(self,grammar, t1, t2, resampleProbability=lambdaOne, **kwargs): """ sum over all possible ways of generating t2 from t1 over all proposers, adjusted for their weight """ lps = [] for idx,proposer in enumerate(self.proposers): lp = proposer.compute_proposal_probability(grammar,t1,t2, resampleProbability=resampleProbability, **kwargs) lw = nicelog(self.proposer_weights[idx]) lps += [lw+lp] return logsumexp(lps)
def compute_proposal_probability(self,grammar,t1,t2,resampleProbability=lambdaOne): node_1,node_2 = least_common_difference(t1,t2) if (node_1 and node_2 and any([nodes_are_roughly_equal(arg,node_2) for arg in None2Empty(node_1.args)])): lp_choosing_node_1 = t1.sampling_log_probability(node_1,dp_rp(resampleProbability)) lp_choosing_child = -nicelog(len(list_replicating_children(node_1))) return lp_choosing_node_1 + lp_choosing_child else: # no possible deletion return -Infinity
def compute_single_likelihood(self, datum, llcounts=None, nsamples=512, sm=0.1): """ sm smoothing counts are added to existing bins of counts (just to prevent badness) This can take an optiona llcounts in order to allow us to cache this externally """ #print self assert isinstance( datum.output, dict ), "Data supplied to SimpleGenerativeHypothesis must be a dict (function outputs to counts)" if llcounts is None: # compute if not passed in llcounts = self.make_ll_counts(datum.input, nsamples=nsamples) return sum([ datum.output[k] * (nicelog(llcounts[k] + sm) - nicelog(nsamples + sm * len(datum.output.keys()))) for k in datum.output.keys() ])
def compute_proposal_probability(self,grammar,t1,t2,resampleProbability=lambdaOne): node_1,node_2 = least_common_difference(t1,t2) if (node_1 and node_2 and any([nodes_are_roughly_equal(arg,node_1) for arg in None2Empty(node_2.args)])): lp_choosing_node_1 = t1.sampling_log_probability(node_1,resampleProbability=can_insert_FunctionNode) lp_choosing_rule = -nicelog(len(filter(can_insert_GrammarRule, grammar.rules[node_1.returntype]))) lp_choosing_replacement = -nicelog(len(filter( lambda i: node_2.args[i].returntype == node_1.returntype, xrange(len(node_2.args))))) lp_generation = [] for arg in node_2.args: if not (arg.name == node_1.name and arg.returntype == node_1.returntype and arg.args == node_1.args): # if the nodes are significantly different with BVRuleContextManager(grammar, node_2, recurse_up=True): lp_generation += [grammar.log_probability(arg)] lp_copy_making_node_2 = lp_choosing_rule + lp_choosing_replacement + sum(lp_generation) return lp_choosing_node_1 + lp_copy_making_node_2 else: return -Infinity # the trees cannot be identical if we performed an insertion
def compute_proposal_probability(self, grammar, t1, t2, resampleProbability=lambdaOne): node_1, node_2 = least_common_difference(t1, t2) if (node_1 and node_2 and any([ nodes_are_roughly_equal(arg, node_2) for arg in None2Empty(node_1.args) ])): lp_choosing_node_1 = t1.sampling_log_probability( node_1, dp_rp(resampleProbability)) lp_choosing_child = -nicelog(len( list_replicating_children(node_1))) return lp_choosing_node_1 + lp_choosing_child else: # no possible deletion return -Infinity
def compute_proposal_probability(self, grammar, t1, t2, resampleProbability=lambdaOne, **kwargs): """ sum over all possible ways of generating t2 from t1 over all proposers, adjusted for their weight """ lps = [] for idx, proposer in enumerate(self.proposers): lp = proposer.compute_proposal_probability( grammar, t1, t2, resampleProbability=resampleProbability, **kwargs) lw = nicelog(self.proposer_weights[idx]) lps += [lw + lp] return logsumexp(lps)
output={'h e s': size, 'm e s': size, 'm e g': size, 'h e g': size, 'm e n': size, 'h e m': size, 'm e k': size, 'k e s': size, 'h e k': size, 'k e N': size, 'k e g': size, 'h e n': size, 'm e N': size, 'k e n': size, 'h e N': size, 'f e N': size, 'g e N': size, 'n e N': size, 'n e s': size, 'f e n': size, 'g e n': size, 'g e m': size, 'f e m': size, 'g e k': size, 'f e k': size, 'f e g': size, 'f e s': size, 'n e g': size, 'k e m': size, 'n e m': size, 'g e s': size, 'n e k': size})] mdata = make_data(100) results = dict() for h in space: #likelihood per data point h.likelihood = h.likelihood/sum(mdata[0].output.values()) for damt in xrange(1,100): posterior_score = [h.prior + h.likelihood * damt for h in space if 'll_counts' in dir(h)] print "Starting analysis for: " + str(damt) + " data points. Ughhhhh" pdata = logsumexp(posterior_score) for w in all_with_vowel: results[(w, damt)] =[np.exp(logsumexp([nicelog(h.ll_counts[w] + 1e-6) - nicelog(sum(h.ll_counts.values())+(1e-6*len(h.ll_counts.keys()))) + (posterior_score[p] - pdata) for p, h in enumerate(space) if 'll_counts' in dir(h)])), damt] print results.keys() print results.values() with open('/home/Jenna/Desktop/Warker/'+str(options.file), 'w') as f: f.write("Word,Probability,Data\n") for k,v in results.iteritems(): f.write(k[0] + ',') f.write(str(v[0])+',') f.write(str(v[1])) f.write('\n')
def insert_delete_proposal(grammar, t): newt = copy(t) if random() < 0.5: # So we insert # Choose a node at random to insert on # TODO: We could precompute the nonterminals we can do this move on, if we wanted try: ni, lp = newt.sample_subnode(can_insert_FunctionNode) except NodeSamplingException: raise ProposalFailedException # Since it's an insert, see if there is a (replicating) rule that expands # from ni.returntype to some ni.returntype replicating_rules = filter(can_insert_GrammarRule, grammar.rules[ni.returntype]) if len(replicating_rules) == 0: return [newt, 0.0] # sample a rule and compute its probability (not under the predicate) r = sample1(replicating_rules) # the functionNode we are building fn = r.make_FunctionNodeStub(grammar, ni.parent) # figure out which arg will be the existing ni replicatingindices = filter( lambda i: fn.args[i] == ni.returntype, xrange(len(fn.args))) assert replicatingindices > 0 # since that's what a replicating rule is replace_i = sample1(replicatingindices) # choose the one to replace fn.args[replace_i] = copy(ni) # the one we replace ## Now expand the other args, with the right rules in the grammar with BVRuleContextManager(grammar, fn, recurse_up=True): # and generate the args below for i,a in enumerate(fn.args): if i != replace_i: fn.args[i] = grammar.generate(a) #else generate like normalized # we need a count of how many kids are the same afterwards after_same_children = sum([x==ni for x in fn.args]) ni.setto(fn) with BVRuleContextManager(grammar, fn, recurse_up=True): # what is the prob mass of the new stuff? new_lp_below = sum([ grammar.log_probability(fn.args[i]) if (i!=replace_i and isFunctionNode(fn.args[i])) else 0. for i in xrange(len(fn.args))]) # What is the new normalizer? newZ = newt.sample_node_normalizer(can_delete_FunctionNode) assert newZ > 0 # To sample forward: choose the node ni, choose the replicating rule, choose which "to" to expand (we could have put it on any of the replicating rules that are identical), and genreate the rest of the tree f = lp + (-log(len(replicating_rules))) + (log(after_same_children)-log(len(replicatingindices))) + new_lp_below # To go backwards, choose the inserted rule, and any of the identical children, out of all replicators b = (log(1.0*can_delete_FunctionNode(fn)) - log(newZ)) + (log(after_same_children) - log(len(fn.args))) else: # A delete move! # Sample a node at random try: ni, lp = newt.sample_subnode(can_delete_FunctionNode) # this could raise exception # Really, it had to be not None if ni.args is None: raise NodeSamplingException except NodeSamplingException: raise ProposalFailedException # Figure out which of my children have the same type as me replicating_kid_indices = filter(lambda i: isFunctionNode(ni.args[i]) and ni.args[i].returntype == ni.returntype, range(len(ni.args))) nrk = len(replicating_kid_indices) # how many replicating kids if nrk == 0: raise ProposalFailedException replicating_rules = filter(can_delete_GrammarRule, grammar.rules[ni.returntype]) assert len(replicating_rules) > 0 # better be some or where did ni come from? samplei = sample1(replicating_kid_indices) # who to promote; NOTE: not done via any weighting # We need to be in the right grammar state to evaluate log_probability with BVRuleContextManager(grammar, ni.args[samplei], recurse_up=True): # Now we must count the multiple ways we could go forward or back # Here, we could have sampled any of them equivalent to ni.args[i] before_same_children = sum([x==ni.args[samplei] for x in ni.args ]) # how many are the same after? # the lp of everything we'd have to create going backwards old_lp_below = sum([ grammar.log_probability(ni.args[i]) if (i!=samplei and isFunctionNode(ni.args[i])) else 0. for i in xrange(len(ni.args))]) # and replace it ni.args[samplei].parent = ni.parent # update this first ;; TODO: IS THIS NECSESARY? ni.setto( ni.args[samplei] ) # And compute f/b probs newZ = newt.sample_node_normalizer(resampleProbability=can_insert_FunctionNode) # To go forward, choose the node, and then from all equivalent children f = lp + (log(before_same_children) - log(nrk)) # To go back, choose the node, choose the replicating rule, choose where to put it, and generate the rest of the tree b = (nicelog(1.0*can_insert_FunctionNode(ni)) - nicelog(newZ)) + -nicelog(len(replicating_rules)) + (nicelog(before_same_children) - nicelog(nrk)) + old_lp_below return [newt, f-b]
def flip_d(p): return {True: nicelog(p), False: nicelog(1.-p)}
def sampling_log_probability(self, node, resampleProbability=lambdaOne): return nicelog(1.0 * resampleProbability(node)) - nicelog( self.sample_node_normalizer( resampleProbability=resampleProbability))
def sample_uniform_d(s): """ return a unifom sample of the set s """ l = -nicelog(len(s)) return {x: l for x in s }
def sampling_log_probability(self,node,resampleProbability=lambdaOne): return nicelog(1.0*resampleProbability(node)) - nicelog(self.sample_node_normalizer(resampleProbability=resampleProbability))
def flip_d(p): return {True: nicelog(p), False: nicelog(1.0 - p)}
def sample_uniform_d(s): """ return a unifom sample of the set s """ l = -nicelog(len(s)) return {x: l for x in s}
def insert_delete_proposal(grammar, t): newt = copy(t) if random() < 0.5: # insert! # Choose a node at random to insert on # TODO: We could precompute the nonterminals we can do this move on, if we wanted try: ni, lp = newt.sample_subnode(can_insert_FunctionNode) except NodeSamplingException: raise ProposalFailedException # is there a rule that expands from ni.returntype to some ni.returntype? replicating_rules = filter(can_insert_GrammarRule, grammar.rules[ni.returntype]) if len(replicating_rules) == 0: raise ProposalFailedException # sample a rule r = sample1(replicating_rules) # the functionNode we are building fn = r.make_FunctionNodeStub(grammar, ni.parent) # figure out which arg will be the existing ni replicatingindices = filter(lambda i: fn.args[i] == ni.returntype, xrange(len(fn.args))) if len(replicatingindices) <= 0: # should never happen raise ProposalFailedException replace_i = sample1(replicatingindices) # choose the one to replace ## Now expand the other args, with the right rules in the grammar with BVRuleContextManager(grammar, fn, recurse_up=True): for i, a in enumerate(fn.args): if i == replace_i: fn.args[i] = copy(ni) # the one we replace else: fn.args[i] = grammar.generate(a) # else generate like normal # we need a count of how many kids are the same afterwards after_same_children = sum([x == ni for x in fn.args]) # perform the insertion ni.setto(fn) # TODO: fix the fact that there are potentially multiple backward steps to give the equivalent tree # need to use the right grammar for log_probability calculations with BVRuleContextManager(grammar, fn, recurse_up=True): # what is the prob mass of the new stuff? new_lp_below = sum( [ grammar.log_probability(fn.args[i]) if (i != replace_i and isFunctionNode(fn.args[i])) else 0.0 for i in xrange(len(fn.args)) ] ) # What is the new normalizer? newZ = newt.sample_node_normalizer(can_delete_FunctionNode) assert newZ > 0 # forward: choose the node ni, choose the replicating rule, choose which "to" to expand, and generate the rest of the tree f = ( lp - nicelog(len(replicating_rules)) + (nicelog(after_same_children) - nicelog(len(replicatingindices))) + new_lp_below ) # backward: choose the inserted node, choose one of the children identical to the original ni, and deterministically delete b = (nicelog(1.0 * can_delete_FunctionNode(fn)) - nicelog(newZ)) + ( nicelog(after_same_children) - nicelog(len(replicatingindices)) ) else: # delete! try: # sample a node at random ni, lp = newt.sample_subnode(can_delete_FunctionNode) # this could raise exception if ni.args is None: # doesn't have children to promote raise NodeSamplingException except NodeSamplingException: raise ProposalFailedException # Figure out which of my children have the same type as me replicating_kid_indices = filter( lambda i: isFunctionNode(ni.args[i]) and ni.args[i].returntype == ni.returntype, range(len(ni.args)) ) nrk = len(replicating_kid_indices) # how many replicating kids if nrk == 0: raise ProposalFailedException replicating_rules = filter(can_delete_GrammarRule, grammar.rules[ni.returntype]) assert len(replicating_rules) > 0 # better be some or where did ni come from? samplei = sample1(replicating_kid_indices) # who to promote; NOTE: not done via any weighting # We need to be in the right grammar state to evaluate log_probability with BVRuleContextManager(grammar, ni.args[samplei], recurse_up=True): # Now we must count the multiple ways we could go forward or back # Here, we could have sampled any of them equivalent to ni.args[i] before_same_children = sum([x == ni.args[samplei] for x in ni.args]) # how many are the same after? # the lp of everything we'd have to create going backwards old_lp_below = sum( [ grammar.log_probability(ni.args[i]) if (i != samplei and isFunctionNode(ni.args[i])) else 0.0 for i in xrange(len(ni.args)) ] ) # and replace it ni.setto(ni.args[samplei]) newZ = newt.sample_node_normalizer(resampleProbability=can_insert_FunctionNode) # forward: choose the node, and then from all equivalent children f = lp + (log(before_same_children) - log(nrk)) # backward: choose the node, choose the replicating rule, choose where to put it, and generate the rest of the tree b = ( (nicelog(1.0 * can_insert_FunctionNode(ni)) - nicelog(newZ)) - nicelog(len(replicating_rules)) + (nicelog(before_same_children) - nicelog(nrk)) + old_lp_below ) return [newt, f - b]