Beispiel #1
0
    def test_RegenerationProposal(self):
        from LOTlib.Inference.Proposals.RegenerationProposal import RegenerationProposal
        rp = RegenerationProposal(self.grammar)

        for tree in self.trees:
            cnt = Counter()
            for _ in xrange(NSAMPLES):
                p, fb = rp.propose_tree(tree)
                cnt[p] += 1

                # Check the proposal
                self.check_tree(p)

            ## check that the proposals are what they should be -- rp.lp_propose is correct!
            obsc = [cnt[t] for t in self.trees]
            expc = [exp(self.grammar.log_probability(t))*sum(obsc) for t in self.trees]
            csq, pv = chisquare([cnt[t] for t in self.trees],
                                [exp(rp.lp_propose(tree, x))*NSAMPLES for x in self.trees])

            # Look at some
            # print ">>>>>>>>>>>", tree
            # for p in self.trees:
            #     print "||||||||||", p
            #     v = rp.lp_propose(tree,p)
            #     print "V=",v

            for c, e, tt in zip([cnt[t] for t in self.trees],
                               [exp(rp.lp_propose(tree, x))*NSAMPLES for x in self.trees],
                               self.trees):
                print c, e, tt, rp.lp_propose(tree,tt)

            self.assertGreater(pv, 0.001, msg="Sampler failed chi squared!")
Beispiel #2
0
    def test_lp_regenerate_propose_to(self):
        # import the grammar
        from Grammars import lp_regenerate_propose_to_grammar
        self.G = lp_regenerate_propose_to_grammar.g
        # the RegenerationProposal class
        rp = RegenerationProposal(self.G)
        numTests = 100
        # Sample 1000 trees from the grammar, and run a chi-squared test for each of them
        for i in break_ctrlc(range(numTests)):
            # keep track of expected and actual counts
            # expected_counts = defaultdict(int) # a dictionary whose keys are trees and values are the expected number of times we should be proposing to this tree
            actual_counts = defaultdict(int) # same as expected_counts, but stores the actual number of times we proposed to a given tree
            tree = self.G.generate('START')

            # Regenerate some number of trees at random
            numTrees = 1000
            for j in range(numTrees):
                newtree = rp.propose_tree(tree)[0]
                # trees.append(newtree)
                actual_counts[newtree] += 1
            # see if the frequency with which each category of trees is generated matches the
            # expected counts using a chi-squared test
            chisquared, p = self.get_pvalue(tree, actual_counts, numTrees)
            # print chisquared, p
            # if p > 0.01/1000, test passes
            self.assertTrue(p > 0.01/numTests, "Trees are not being generated according to the expected log probabilities")
            if i % 10 == 0 and i != 0: print i, "lp_regenerate_propose_to tests..."
        print numTests, "lp_regenerate_propose_to tests..."
Beispiel #3
0
 def test_log_probability_proposals_FiniteWithoutBVArgs(self):
     # import the grammar
     from Grammars import FiniteWithoutBVArgs
     self.G = FiniteWithoutBVArgs.g
     # the RegenerationProposal class
     rp = RegenerationProposal(self.G)
     # sample from G 100 times
     for i in range(100):
         X = self.G.generate('START')
         # propose to a new tree
         Y = rp.propose_tree(X)[0]
         # count probability manually
         prob = FiniteWithoutBVArgs.log_probability(Y)
         # check that it's equal to .log_probability()
         self.assertTrue(math.fabs(prob - Y.log_probability()) < 0.00000001)
class RestrictedRegenProposal(RegenerationProposal):
    """
    A standard regen proposal but with a restriction on which types are valid to
    regenerate. Specify *EITHER* a whitelist (of valid types) or a blacklist (of
    invalid types)
    """
    def __init__(self, grammar, whitelist=None, blacklist=None, **kwargs):
        self.__dict__.update(locals())
        self.regen_proposal = RegenerationProposal(grammar, **kwargs)

    def propose_tree(self, tree):
        def isvalid(node):
            if self.whitelist:
                return node.returntype in self.whitelist
            elif self.blacklist:
                return node.returntype not in self.blacklist
            else:
                return True

        return self.regen_proposal.propose_tree(tree, resampleProbability=isvalid)
 def __init__(self, grammar, whitelist=None, blacklist=None, **kwargs):
     self.__dict__.update(locals())
     self.regen_proposal = RegenerationProposal(grammar, **kwargs)
Beispiel #6
0
 def propose_tree(self, t):
     return RegenerationProposal.propose_tree(
         self,
         t,
         resampleProbability=lambda x: getattr(x, 'resample_p', 1.0))
Beispiel #7
0
 def propose_tree(self, t):
     return RegenerationProposal.propose_tree(self, t, resampleProbability=lambda x: getattr(x,'resample_p', 1.0))
Beispiel #8
0
    grammar.add_rule('LAMBDA_WORD', 'lambda', ['WORD'], 1.0, bv_type='WORD')
    grammar.add_rule('WORD', 'apply_', ['LAMBDA_WORD', 'WORD'], 1.0)

    p = InverseInlineProposer(grammar)
    """
        # Just look at some proposals
        for _ in xrange(200):
            t = grammar.generate()
            print ">>", t
            #assert t.check_parent_refs()

            for _ in xrange(10):
                t =  p.propose_tree(t)[0]
                print "\t", t

        """
    # Run MCMC -- more informative about f-b errors
    from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler

    from LOTlib.Inference.Proposals.MixtureProposal import MixtureProposal
    from LOTlib.Inference.Proposals.RegenerationProposal import RegenerationProposal

    h = make_h0(proposal_function=MixtureProposal(
        [InverseInlineProposer(grammar),
         RegenerationProposal(grammar)]))
    data = generate_data(100)
    for h in break_ctrlc(MHSampler(h, data)):
        print h.posterior_score, h.prior, h.likelihood, get_knower_pattern(
            h), h