Ejemplo n.º 1
0
def run_one(iteration, proposal_type, proposal_param=None):

    m = None
    if proposal_type == 'InsertDeleteMixture':
        m = MixtureProposal(
            [RegenerationProposal(grammar),
             InsertDeleteProposal(grammar)],
            probs=[proposal_param, 1. - proposal_param])
    elif proposal_type == 'RegenerationProposal':
        m = RegenerationProposal(grammar)
    else:
        raise NotImplementedError(proposal_type)

    # define a wrapper to set this proposal
    def wrapped_make_h0():
        h0 = make_h0()
        h0.set_proposal_function(m)
        return h0

    sampler = MultipleChainMCMC(wrapped_make_h0,
                                data,
                                steps=options.SAMPLES,
                                nchains=options.CHAINS)

    # Run evaluate on it, printing to the right locations
    evaluate_sampler(
        sampler,
        prefix="\t".join(
            map(str,
                [options.MODEL, iteration, proposal_type, proposal_param])),
        out_hypotheses=out_hypotheses,
        out_aggregate=out_aggregate)
Ejemplo n.º 2
0
    def test_lp_regenerate_propose_to(self):
        # import the grammar
        from LOTlibTest.Grammars import lp_regenerate_propose_to_grammar
        self.G = lp_regenerate_propose_to_grammar.g
        # the RegenerationProposal class
        rp = RegenerationProposal(self.G)
        numTests = 100
        # Sample 1000 trees from the grammar, and run a chi-squared test for each of them
        for i in lot_iter(range(numTests)):
            # keep track of expected and actual counts
            # expected_counts = defaultdict(int) # a dictionary whose keys are trees and values are the expected number of times we should be proposing to this tree
            actual_counts = defaultdict(int) # same as expected_counts, but stores the actual number of times we proposed to a given tree
            tree = self.G.generate('START')

            # Regenerate some number of trees at random
            numTrees = 1000
            for j in range(numTrees):
                newtree = rp.propose_tree(tree)[0]
                # trees.append(newtree)
                actual_counts[newtree] += 1
            # see if the frequency with which each category of trees is generated matches the
            # expected counts using a chi-squared test
            chisquared, p = self.get_pvalue(tree, actual_counts, numTrees)
            # print chisquared, p
            # if p > 0.01/1000, test passes
            self.assertTrue(p > 0.01/numTests, "Trees are not being generated according to the expected log probabilities")
            if i % 10 == 0 and i != 0: print i, "lp_regenerate_propose_to tests..."
        print numTests, "lp_regenerate_propose_to tests..."
Ejemplo n.º 3
0
    def test_lp_regenerate_propose_to(self):
        # import the grammar
        from LOTlibTest.Grammars import lp_regenerate_propose_to_grammar
        self.G = lp_regenerate_propose_to_grammar.g
        # the RegenerationProposal class
        rp = RegenerationProposal(self.G)
        numTests = 100
        # Sample 1000 trees from the grammar, and run a chi-squared test for each of them
        for i in lot_iter(range(numTests)):
            # keep track of expected and actual counts
            # expected_counts = defaultdict(int) # a dictionary whose keys are trees and values are the expected number of times we should be proposing to this tree
            actual_counts = defaultdict(
                int
            )  # same as expected_counts, but stores the actual number of times we proposed to a given tree
            tree = self.G.generate('START')

            # Regenerate some number of trees at random
            numTrees = 1000
            for j in range(numTrees):
                newtree = rp.propose_tree(tree)[0]
                # trees.append(newtree)
                actual_counts[newtree] += 1
            # see if the frequency with which each category of trees is generated matches the
            # expected counts using a chi-squared test
            chisquared, p = self.get_pvalue(tree, actual_counts, numTrees)
            # print chisquared, p
            # if p > 0.01/1000, test passes
            self.assertTrue(
                p > 0.01 / numTests,
                "Trees are not being generated according to the expected log probabilities"
            )
            if i % 10 == 0 and i != 0:
                print i, "lp_regenerate_propose_to tests..."
        print numTests, "lp_regenerate_propose_to tests..."
Ejemplo n.º 4
0
 def test_log_probability_proposals_FiniteWithoutBVArgs(self):
     # import the grammar
     from LOTlibTest.Grammars import FiniteWithoutBVArgs
     self.G = FiniteWithoutBVArgs.g
     # the RegenerationProposal class
     rp = RegenerationProposal(self.G)
     # sample from G 100 times
     for i in range(100):
         X = self.G.generate('START')
         # propose to a new tree
         Y = rp.propose_tree(X)[0]
         # count probability manually
         prob = FiniteWithoutBVArgs.log_probability(Y)
         # print X, Y, prob, Y.log_probability(), prob - Y.log_probability()
         # check that it's equal to .log_probability()
         self.assertTrue(math.fabs(prob - Y.log_probability()) < 0.00000001)
Ejemplo n.º 5
0
 def test_log_probability_proposals_FiniteWithoutBVArgs(self):
     # import the grammar
     from LOTlibTest.Grammars import FiniteWithoutBVArgs
     self.G = FiniteWithoutBVArgs.g
     # the RegenerationProposal class
     rp = RegenerationProposal(self.G)
     # sample from G 100 times
     for i in range(100):
         X = self.G.generate('START')
         # propose to a new tree
         Y = rp.propose_tree(X)[0]
         # count probability manually
         prob = FiniteWithoutBVArgs.log_probability(Y)
         # print X, Y, prob, Y.log_probability(), prob - Y.log_probability()
         # check that it's equal to .log_probability()
         self.assertTrue(math.fabs(prob - Y.log_probability()) < 0.00000001)
Ejemplo n.º 6
0
    def __init__(self,
                 grammar,
                 value=None,
                 f=None,
                 start='START',
                 ALPHA=0.9,
                 maxnodes=25,
                 args=['x'],
                 proposal_function=None,
                 **kwargs):
        """
                *grammar* - The grammar for the hypothesis (specified in Grammar.py)

                *value* - the value for the hypothesis

                *f* - if specified, we don't recompile the whole function

                *start* - The start symbol for the grammar

                *ALPHA* - parameter for compute_single_likelihood that

                *maxnodes* - the maximum amount of nodes that the grammar can have

                *args* - The arguments to the function

                *proposal_function* - function that tells the program how to transition from one tree to another
                (by default, it uses the RegenerationProposal function)
        """

        # save all of our keywords (though we don't need v)
        self.__dict__.update(locals())
        if value is None: value = grammar.generate(self.start)

        FunctionHypothesis.__init__(self,
                                    value=value,
                                    f=f,
                                    args=args,
                                    **kwargs)
        # Save a proposal function
        ## TODO: How to handle this in copying?
        if proposal_function is None:
            self.proposal_function = RegenerationProposal(self.grammar)

        self.likelihood = 0.0
Ejemplo n.º 7
0
for a, b in itertools.product(objects, objects):

    myinput = [a, b]

    # opposites (n/p) interact; x interacts with nothing
    myoutput = (a[0] != b[0]) and (a[0] != 'x') and (b[0] != 'x')

    data.append(FunctionData(input=myinput, output=myoutput))

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Run mcmc
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if __name__ == "__main__":

    from LOTlib.Proposals.RegenerationProposal import RegenerationProposal
    #mp = MixtureProposal([RegenerationProposal(grammar), InsertDeleteProposal(grammar)] )
    mp = RegenerationProposal(grammar)

    from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis
    h0 = LOTHypothesis(
        grammar, args=['x', 'y'], ALPHA=0.999, proposal_function=mp
    )  # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary

    from LOTlib.Inference.MetropolisHastings import mh_sample
    for h in mh_sample(h0, data, 4000000, skip=100):
        print h.posterior_score, h.likelihood, h.prior, cleanFunctionNodeString(
            h)
        #print map( lambda d: h(*d.input), data)
        #print "\n"