Beispiel #1
0
 def compute_proposal_probability(self,grammar, t1, t2, resampleProbability=lambdaOne, recurse=True):
     chosen_node1 , chosen_node2 = least_common_difference(t1,t2)
 
     lps = []
     if chosen_node1 is None: # any node in the tree could have been copied
         for node in t1:
             could_be_source = lambda x: 1.0 * nodes_equal_except_parents(grammar,x,node) * resampleProbability(x)
             lp_of_choosing_source = (nicelog(t1.sample_node_normalizer(could_be_source) - could_be_source(node)) - nicelog(t1.sample_node_normalizer(resampleProbability)))
             lp_of_choosing_target = t1.sampling_log_probability(chosen_node1,resampleProbability=resampleProbability)
             lps += [lp_of_choosing_source + lp_of_choosing_target]
     else: # we have a specific path up the tree
         while chosen_node1:
             could_be_source = lambda x: 1.0 * nodes_equal_except_parents(grammar,x,chosen_node2) * resampleProbability(x)
 
             lp_of_choosing_source = nicelog(t1.sample_node_normalizer(could_be_source)) - nicelog(t1.sample_node_normalizer(resampleProbability))
             lp_of_choosing_target = t1.sampling_log_probability(chosen_node1,resampleProbability=resampleProbability)
             lps += [lp_of_choosing_source + lp_of_choosing_target]
 
             if recurse:
                 chosen_node1 = chosen_node1.parent
                 chosen_node2 = chosen_node2.parent
             else:
                 chosen_node1 = None
 
     return logsumexp(lps)
    def compute_single_likelihood(self, datum, llcounts, sm=0.1):
        """
                sm smoothing counts are added to existing bins of counts
        """

        assert isinstance(datum.output, dict), "Data supplied to SimpleGenerativeHypothesis must be a dict of function outputs to counts"

        z = sum(llcounts.values())
        return sum([ datum.output[k] * (nicelog(llcounts[k] + sm)-nicelog(sum(z) + sm*len(datum.output.keys())) ) for k in datum.output.keys() ])
Beispiel #3
0
    def compute_likelihood(self, data, shortcut=-Infinity, nsamples=512, sm=0.1, **kwargs):
        # For each input, if we don't see its input (via llcounts), recompute it through simulation

        ll = 0.0
        for datum in data:
            self.ll_counts = self.make_ll_counts(datum.input, nsamples=nsamples)
            z = sum(self.ll_counts.values())
            ll += sum([datum.output[k]*(nicelog(self.ll_counts[k]+sm) - nicelog(z+sm*len(datum.output.keys()))) for k in datum.output.keys()])
            if ll < shortcut:
                return -Infinity

        return ll / self.likelihood_temperature
    def compute_single_likelihood(self, datum, llcounts=None, nsamples=512, sm=0.1):
        """
                sm smoothing counts are added to existing bins of counts (just to prevent badness)
                This can take an optiona llcounts in order to allow us to cache this externally
        """
        #print self
        assert isinstance(datum.output, dict), "Data supplied to SimpleGenerativeHypothesis must be a dict (function outputs to counts)"

        if llcounts is None: # compute if not passed in
            llcounts = self.make_ll_counts(datum.input, nsamples=nsamples)

        return sum([ datum.output[k] * (nicelog(llcounts[k] + sm)-nicelog(nsamples + sm*len(datum.output.keys())) ) for k in datum.output.keys() ])
Beispiel #5
0
    def compute_likelihood(self, data, shortcut=-Infinity, nsamples=512, sm=0.1, **kwargs):
        # For each input, if we don't see its input (via llcounts), recompute it through simulation

        ll = 0.0
        for datum in data:
            self.ll_counts = self.make_ll_counts(datum.input, nsamples=nsamples)
            z = sum(self.ll_counts.values())
            ll += sum([datum.output[k]*(nicelog(self.ll_counts[k]+sm) - nicelog(z+sm*len(datum.output.keys()))) for k in datum.output.keys()])
            if ll < shortcut:
                return -Infinity

        return ll / self.likelihood_temperature
Beispiel #6
0
    def compute_single_likelihood(self, datum, llcounts, sm=0.1):
        """
                sm smoothing counts are added to existing bins of counts
        """

        assert isinstance(
            datum.output, dict
        ), "Data supplied to SimpleGenerativeHypothesis must be a dict of function outputs to counts"

        z = sum(llcounts.values())
        return sum([
            datum.output[k] * (nicelog(llcounts[k] + sm) -
                               nicelog(z + sm * len(datum.output.keys())))
            for k in datum.output.keys()
        ])
Beispiel #7
0
    def compute_proposal_probability(self,
                                     grammar,
                                     t1,
                                     t2,
                                     resampleProbability=lambdaOne):
        node_1, node_2 = least_common_difference(t1, t2)

        if (node_1 and node_2 and any([
                nodes_are_roughly_equal(arg, node_1)
                for arg in None2Empty(node_2.args)
        ])):

            lp_choosing_node_1 = t1.sampling_log_probability(
                node_1,
                resampleProbability=lambda t: can_insert_FunctionNode(
                    t, grammar) * resampleProbability(t))

            lp_choosing_rule = -nicelog(
                len(
                    filter(can_insert_GrammarRule,
                           grammar.rules[node_1.returntype])))
            lp_choosing_replacement = -nicelog(
                len(
                    filter(
                        lambda i: node_2.args[i].returntype == node_1.
                        returntype, xrange(len(node_2.args)))))

            lp_generation = []
            for arg in node_2.args:
                if not (arg.name == node_1.name and arg.returntype
                        == node_1.returntype and arg.args == node_1.args
                        ):  # if the nodes are significantly different
                    with BVRuleContextManager(grammar, node_2,
                                              recurse_up=True):
                        lp_generation += [grammar.log_probability(arg)]

            lp_copy_making_node_2 = lp_choosing_rule + lp_choosing_replacement + sum(
                lp_generation)

            return lp_choosing_node_1 + lp_copy_making_node_2
        else:
            return -Infinity  # the trees cannot be identical if we performed an insertion
Beispiel #8
0
    def compute_proposal_probability(self,
                                     grammar,
                                     t1,
                                     t2,
                                     resampleProbability=lambdaOne,
                                     recurse=True):
        chosen_node1, chosen_node2 = least_common_difference(t1, t2)

        lps = []
        if chosen_node1 is None:  # any node in the tree could have been copied
            for node in t1:
                could_be_source = lambda x: 1.0 * nodes_equal_except_parents(
                    grammar, x, node) * resampleProbability(x)
                lp_of_choosing_source = (
                    nicelog(
                        t1.sample_node_normalizer(could_be_source) -
                        could_be_source(node)) -
                    nicelog(t1.sample_node_normalizer(resampleProbability)))
                lp_of_choosing_target = t1.sampling_log_probability(
                    chosen_node1, resampleProbability=resampleProbability)
                lps += [lp_of_choosing_source + lp_of_choosing_target]
        else:  # we have a specific path up the tree
            while chosen_node1:
                could_be_source = lambda x: 1.0 * nodes_equal_except_parents(
                    grammar, x, chosen_node2) * resampleProbability(x)

                lp_of_choosing_source = nicelog(
                    t1.sample_node_normalizer(could_be_source)) - nicelog(
                        t1.sample_node_normalizer(resampleProbability))
                lp_of_choosing_target = t1.sampling_log_probability(
                    chosen_node1, resampleProbability=resampleProbability)
                lps += [lp_of_choosing_source + lp_of_choosing_target]

                if recurse:
                    chosen_node1 = chosen_node1.parent
                    chosen_node2 = chosen_node2.parent
                else:
                    chosen_node1 = None

        return logsumexp(lps)
Beispiel #9
0
 def compute_proposal_probability(self,grammar, t1, t2, resampleProbability=lambdaOne, **kwargs):
     """
         sum over all possible ways of generating t2 from t1 over all
         proposers, adjusted for their weight
     """
     lps = []
     for idx,proposer in enumerate(self.proposers):
         lp = proposer.compute_proposal_probability(grammar,t1,t2,
                                                    resampleProbability=resampleProbability,
                                                    **kwargs)
         lw = nicelog(self.proposer_weights[idx])
         lps += [lw+lp]
     return logsumexp(lps)
Beispiel #10
0
    def compute_proposal_probability(self,grammar,t1,t2,resampleProbability=lambdaOne):
        node_1,node_2 = least_common_difference(t1,t2)

        if (node_1 and node_2 and
            any([nodes_are_roughly_equal(arg,node_2) for arg in
                 None2Empty(node_1.args)])):

            lp_choosing_node_1 = t1.sampling_log_probability(node_1,dp_rp(resampleProbability))
            lp_choosing_child = -nicelog(len(list_replicating_children(node_1)))
            return lp_choosing_node_1 + lp_choosing_child

        else: # no possible deletion
            return -Infinity
    def compute_single_likelihood(self,
                                  datum,
                                  llcounts=None,
                                  nsamples=512,
                                  sm=0.1):
        """
                sm smoothing counts are added to existing bins of counts (just to prevent badness)
                This can take an optiona llcounts in order to allow us to cache this externally
        """
        #print self
        assert isinstance(
            datum.output, dict
        ), "Data supplied to SimpleGenerativeHypothesis must be a dict (function outputs to counts)"

        if llcounts is None:  # compute if not passed in
            llcounts = self.make_ll_counts(datum.input, nsamples=nsamples)

        return sum([
            datum.output[k] *
            (nicelog(llcounts[k] + sm) -
             nicelog(nsamples + sm * len(datum.output.keys())))
            for k in datum.output.keys()
        ])
Beispiel #12
0
    def compute_proposal_probability(self,grammar,t1,t2,resampleProbability=lambdaOne):
        node_1,node_2 = least_common_difference(t1,t2)

        if (node_1 and node_2 and any([nodes_are_roughly_equal(arg,node_1) for arg in None2Empty(node_2.args)])):

            lp_choosing_node_1 =  t1.sampling_log_probability(node_1,resampleProbability=can_insert_FunctionNode)

            lp_choosing_rule = -nicelog(len(filter(can_insert_GrammarRule, grammar.rules[node_1.returntype])))
            lp_choosing_replacement = -nicelog(len(filter( lambda i: node_2.args[i].returntype == node_1.returntype, xrange(len(node_2.args)))))

            lp_generation = []
            for arg in node_2.args:
                if not (arg.name == node_1.name and
                        arg.returntype == node_1.returntype and
                        arg.args == node_1.args): # if the nodes are significantly different
                    with BVRuleContextManager(grammar, node_2, recurse_up=True):
                        lp_generation += [grammar.log_probability(arg)]

            lp_copy_making_node_2 = lp_choosing_rule + lp_choosing_replacement + sum(lp_generation)

            return lp_choosing_node_1 + lp_copy_making_node_2
        else:
            return -Infinity # the trees cannot be identical if we performed an insertion
Beispiel #13
0
    def compute_proposal_probability(self,
                                     grammar,
                                     t1,
                                     t2,
                                     resampleProbability=lambdaOne):
        node_1, node_2 = least_common_difference(t1, t2)

        if (node_1 and node_2 and any([
                nodes_are_roughly_equal(arg, node_2)
                for arg in None2Empty(node_1.args)
        ])):

            lp_choosing_node_1 = t1.sampling_log_probability(
                node_1, dp_rp(resampleProbability))
            lp_choosing_child = -nicelog(len(
                list_replicating_children(node_1)))
            return lp_choosing_node_1 + lp_choosing_child

        else:  # no possible deletion
            return -Infinity
Beispiel #14
0
 def compute_proposal_probability(self,
                                  grammar,
                                  t1,
                                  t2,
                                  resampleProbability=lambdaOne,
                                  **kwargs):
     """
         sum over all possible ways of generating t2 from t1 over all
         proposers, adjusted for their weight
     """
     lps = []
     for idx, proposer in enumerate(self.proposers):
         lp = proposer.compute_proposal_probability(
             grammar,
             t1,
             t2,
             resampleProbability=resampleProbability,
             **kwargs)
         lw = nicelog(self.proposer_weights[idx])
         lps += [lw + lp]
     return logsumexp(lps)
Beispiel #15
0
                             output={'h e s': size, 'm e s': size, 'm e g': size, 'h e g': size, 'm e n': size, 'h e m': size, 'm e k': size, 'k e s': size, 'h e k': size, 'k e N': size, 'k e g': size, 'h e n': size, 'm e N': size, 'k e n': size, 'h e N': size, 'f e N': size, 'g e N': size, 'n e N': size, 'n e s': size, 'f e n': size, 'g e n': size, 'g e m': size, 'f e m': size, 'g e k': size, 'f e k': size, 'f e g': size, 'f e s': size, 'n e g': size, 'k e m': size, 'n e m': size, 'g e s': size, 'n e k': size})]

mdata = make_data(100)


results = dict()
for h in space:
    #likelihood per data point
    h.likelihood = h.likelihood/sum(mdata[0].output.values())
for damt in xrange(1,100):
    posterior_score = [h.prior + h.likelihood * damt for h in space if 'll_counts' in dir(h)]
    print "Starting analysis for: " + str(damt) + " data points. Ughhhhh"
    pdata = logsumexp(posterior_score)
    for w in all_with_vowel:

        results[(w, damt)] =[np.exp(logsumexp([nicelog(h.ll_counts[w] + 1e-6) - nicelog(sum(h.ll_counts.values())+(1e-6*len(h.ll_counts.keys()))) + (posterior_score[p] - pdata) for p, h in enumerate(space) if 'll_counts' in dir(h)])), damt]
print results.keys()
print results.values()




with open('/home/Jenna/Desktop/Warker/'+str(options.file), 'w') as f:
    f.write("Word,Probability,Data\n")
    for k,v in results.iteritems():
        f.write(k[0] + ',')
        f.write(str(v[0])+',')
        f.write(str(v[1]))
        f.write('\n')

Beispiel #16
0
def insert_delete_proposal(grammar, t):
    newt = copy(t)

    if random() < 0.5: # So we insert

        # Choose a node at random to insert on
        # TODO: We could precompute the nonterminals we can do this move on, if we wanted
        try:
            ni, lp = newt.sample_subnode(can_insert_FunctionNode)
        except NodeSamplingException:
            raise ProposalFailedException

        # Since it's an insert, see if there is a (replicating) rule that expands
        # from ni.returntype to some ni.returntype
        replicating_rules = filter(can_insert_GrammarRule, grammar.rules[ni.returntype])
        if len(replicating_rules) == 0:  return [newt, 0.0]

        # sample a rule and compute its probability (not under the predicate)
        r = sample1(replicating_rules)

        # the functionNode we are building
        fn = r.make_FunctionNodeStub(grammar, ni.parent)

        # figure out which arg will be the existing ni
        replicatingindices = filter( lambda i: fn.args[i] == ni.returntype, xrange(len(fn.args)))
        assert replicatingindices > 0 # since that's what a replicating rule is
        replace_i = sample1(replicatingindices) # choose the one to replace
        fn.args[replace_i] = copy(ni) # the one we replace

        ## Now expand the other args, with the right rules in the grammar
        with BVRuleContextManager(grammar, fn, recurse_up=True):

            # and generate the args below
            for i,a in enumerate(fn.args):
                if i != replace_i:
                    fn.args[i] = grammar.generate(a) #else generate like normalized

        # we need a count of how many kids are the same afterwards
        after_same_children = sum([x==ni for x in fn.args])

        ni.setto(fn)

        with BVRuleContextManager(grammar, fn, recurse_up=True):

            # what is the prob mass of the new stuff?
            new_lp_below =  sum([ grammar.log_probability(fn.args[i]) if (i!=replace_i and isFunctionNode(fn.args[i])) else 0. for i in xrange(len(fn.args))])
            # What is the new normalizer?
            newZ = newt.sample_node_normalizer(can_delete_FunctionNode)
            assert newZ > 0
            # To sample forward: choose the node ni, choose the replicating rule, choose which "to" to expand (we could have put it on any of the replicating rules that are identical), and genreate the rest of the tree
            f = lp + (-log(len(replicating_rules))) + (log(after_same_children)-log(len(replicatingindices))) + new_lp_below
            # To go backwards, choose the inserted rule, and any of the identical children, out of all replicators
            b = (log(1.0*can_delete_FunctionNode(fn)) - log(newZ)) + (log(after_same_children) - log(len(fn.args)))

    else: # A delete move!

        # Sample a node at random
        try:
            ni, lp = newt.sample_subnode(can_delete_FunctionNode) # this could raise exception

            # Really, it had to be not None
            if ni.args is None:
                raise NodeSamplingException

        except NodeSamplingException:
            raise ProposalFailedException

        # Figure out which of my children have the same type as me
        replicating_kid_indices = filter(lambda i: isFunctionNode(ni.args[i]) and ni.args[i].returntype == ni.returntype, range(len(ni.args)))
        nrk = len(replicating_kid_indices) # how many replicating kids
        if nrk == 0:
            raise ProposalFailedException

        replicating_rules = filter(can_delete_GrammarRule, grammar.rules[ni.returntype])
        assert len(replicating_rules) > 0 # better be some or where did ni come from?

        samplei = sample1(replicating_kid_indices) # who to promote; NOTE: not done via any weighting

        # We need to be in the right grammar state to evaluate log_probability
        with BVRuleContextManager(grammar, ni.args[samplei], recurse_up=True):

            # Now we must count the multiple ways we could go forward or back
            # Here, we could have sampled any of them equivalent to ni.args[i]
            before_same_children = sum([x==ni.args[samplei] for x in ni.args ]) # how many are the same after?

            # the lp of everything we'd have to create going backwards
            old_lp_below = sum([ grammar.log_probability(ni.args[i]) if (i!=samplei and isFunctionNode(ni.args[i])) else 0. for i in xrange(len(ni.args))])

            # and replace it
            ni.args[samplei].parent = ni.parent # update this first ;; TODO: IS THIS NECSESARY?
            ni.setto( ni.args[samplei] )

            # And compute f/b probs
            newZ = newt.sample_node_normalizer(resampleProbability=can_insert_FunctionNode)
            # To go forward, choose the node, and then from all equivalent children
            f = lp + (log(before_same_children) - log(nrk))
            # To go back, choose the node, choose the replicating rule, choose where to put it, and generate the rest of the tree
            b = (nicelog(1.0*can_insert_FunctionNode(ni)) - nicelog(newZ))  + -nicelog(len(replicating_rules)) + (nicelog(before_same_children) - nicelog(nrk)) + old_lp_below

    return [newt, f-b]
Beispiel #17
0
def flip_d(p):
    return {True: nicelog(p), False: nicelog(1.-p)}
Beispiel #18
0
 def sampling_log_probability(self, node, resampleProbability=lambdaOne):
     return nicelog(1.0 * resampleProbability(node)) - nicelog(
         self.sample_node_normalizer(
             resampleProbability=resampleProbability))
Beispiel #19
0
def sample_uniform_d(s):
    """ return a unifom sample of the set s """
    l = -nicelog(len(s))
    return {x: l for x in s }
Beispiel #20
0
 def sampling_log_probability(self,node,resampleProbability=lambdaOne):
     return nicelog(1.0*resampleProbability(node)) - nicelog(self.sample_node_normalizer(resampleProbability=resampleProbability))
Beispiel #21
0
def flip_d(p):
    return {True: nicelog(p), False: nicelog(1.0 - p)}
Beispiel #22
0
def sample_uniform_d(s):
    """ return a unifom sample of the set s """
    l = -nicelog(len(s))
    return {x: l for x in s}
Beispiel #23
0
def insert_delete_proposal(grammar, t):
    newt = copy(t)

    if random() < 0.5:  # insert!

        # Choose a node at random to insert on
        # TODO: We could precompute the nonterminals we can do this move on, if we wanted
        try:
            ni, lp = newt.sample_subnode(can_insert_FunctionNode)
        except NodeSamplingException:
            raise ProposalFailedException

        # is there a rule that expands from ni.returntype to some ni.returntype?
        replicating_rules = filter(can_insert_GrammarRule, grammar.rules[ni.returntype])
        if len(replicating_rules) == 0:
            raise ProposalFailedException

        # sample a rule
        r = sample1(replicating_rules)

        # the functionNode we are building
        fn = r.make_FunctionNodeStub(grammar, ni.parent)

        # figure out which arg will be the existing ni
        replicatingindices = filter(lambda i: fn.args[i] == ni.returntype, xrange(len(fn.args)))
        if len(replicatingindices) <= 0:  # should never happen
            raise ProposalFailedException

        replace_i = sample1(replicatingindices)  # choose the one to replace

        ## Now expand the other args, with the right rules in the grammar
        with BVRuleContextManager(grammar, fn, recurse_up=True):

            for i, a in enumerate(fn.args):
                if i == replace_i:
                    fn.args[i] = copy(ni)  # the one we replace
                else:
                    fn.args[i] = grammar.generate(a)  # else generate like normal

        # we need a count of how many kids are the same afterwards
        after_same_children = sum([x == ni for x in fn.args])

        # perform the insertion
        ni.setto(fn)

        # TODO: fix the fact that there are potentially multiple backward steps to give the equivalent tree
        # need to use the right grammar for log_probability calculations
        with BVRuleContextManager(grammar, fn, recurse_up=True):

            # what is the prob mass of the new stuff?
            new_lp_below = sum(
                [
                    grammar.log_probability(fn.args[i]) if (i != replace_i and isFunctionNode(fn.args[i])) else 0.0
                    for i in xrange(len(fn.args))
                ]
            )

            # What is the new normalizer?
            newZ = newt.sample_node_normalizer(can_delete_FunctionNode)
            assert newZ > 0

            # forward: choose the node ni, choose the replicating rule, choose which "to" to expand, and generate the rest of the tree
            f = (
                lp
                - nicelog(len(replicating_rules))
                + (nicelog(after_same_children) - nicelog(len(replicatingindices)))
                + new_lp_below
            )
            # backward: choose the inserted node, choose one of the children identical to the original ni, and deterministically delete
            b = (nicelog(1.0 * can_delete_FunctionNode(fn)) - nicelog(newZ)) + (
                nicelog(after_same_children) - nicelog(len(replicatingindices))
            )

    else:  # delete!

        try:  # sample a node at random
            ni, lp = newt.sample_subnode(can_delete_FunctionNode)  # this could raise exception

            if ni.args is None:  # doesn't have children to promote
                raise NodeSamplingException

        except NodeSamplingException:
            raise ProposalFailedException

        # Figure out which of my children have the same type as me
        replicating_kid_indices = filter(
            lambda i: isFunctionNode(ni.args[i]) and ni.args[i].returntype == ni.returntype, range(len(ni.args))
        )
        nrk = len(replicating_kid_indices)  # how many replicating kids
        if nrk == 0:
            raise ProposalFailedException

        replicating_rules = filter(can_delete_GrammarRule, grammar.rules[ni.returntype])
        assert len(replicating_rules) > 0  # better be some or where did ni come from?

        samplei = sample1(replicating_kid_indices)  # who to promote; NOTE: not done via any weighting

        # We need to be in the right grammar state to evaluate log_probability
        with BVRuleContextManager(grammar, ni.args[samplei], recurse_up=True):

            # Now we must count the multiple ways we could go forward or back
            # Here, we could have sampled any of them equivalent to ni.args[i]
            before_same_children = sum([x == ni.args[samplei] for x in ni.args])  # how many are the same after?

            # the lp of everything we'd have to create going backwards
            old_lp_below = sum(
                [
                    grammar.log_probability(ni.args[i]) if (i != samplei and isFunctionNode(ni.args[i])) else 0.0
                    for i in xrange(len(ni.args))
                ]
            )

            # and replace it
            ni.setto(ni.args[samplei])

            newZ = newt.sample_node_normalizer(resampleProbability=can_insert_FunctionNode)

            # forward: choose the node, and then from all equivalent children
            f = lp + (log(before_same_children) - log(nrk))
            # backward: choose the node, choose the replicating rule, choose where to put it, and generate the rest of the tree
            b = (
                (nicelog(1.0 * can_insert_FunctionNode(ni)) - nicelog(newZ))
                - nicelog(len(replicating_rules))
                + (nicelog(before_same_children) - nicelog(nrk))
                + old_lp_below
            )

    return [newt, f - b]