Ejemplo n.º 1
0
    def __init__(self, h0, data, prior_schedule=None, likelihood_schedule=None, **kwargs):
        MHSampler.__init__(self, h0, data, **kwargs)

        if prior_schedule is None:
            prior_schedule = ConstantSchedule(1.0)
        if likelihood_schedule is None:
            likelihood_schedule = ConstantSchedule(1.0)

        self.prior_schedule = prior_schedule
        self.likelihood_schedule = likelihood_schedule
def run(data_amount):
    print "Starting chain on %s data points" % data_amount
    data = makeVariableLexiconData(eval(options.word),
                                   options.word,
                                   the_context,
                                   n=data_amount,
                                   s=options.s,
                                   alpha=options.alpha,
                                   verbose=True)

    h0 = KinshipLexicon(words=[options.word], alpha=options.alpha)
    h0.set_word(
        options.word,
        LOTHypothesis(grammar, value=None, display='lambda recurse_, C, X:%s'))

    hyps = TopN(N=options.top_count)

    mhs = MHSampler(h0,
                    data,
                    options.steps,
                    likelihood_temperature=options.llt,
                    prior_temperature=options.prior_temp)

    for samples_yielded, h in break_ctrlc(enumerate(mhs)):
        if samples_yielded % 1000 == 0:
            print h.prior, h.likelihood, h
        hyps.add(h)

    return hyps
Ejemplo n.º 3
0
def run(data_pts):
    print "Start run on ", str(data_pts)

    y = [pt.Y for pt in data_pts]
    filename = "".join(y)

    hyps = TopN(N=options.TOP_COUNT)
    h0 = KinshipLexicon(alpha=options.ALPHA)
    h0.set_word('Word', LOTHypothesis(my_grammar, value=None, display='lambda recurse_, C, X:%s'))
    mhs = MHSampler(h0, data_pts, options.STEPS, likelihood_temperature=options.llt)

    for samples_yielded, h in break_ctrlc(enumerate(mhs)):
        hyps.add(h)

    with open(options.OUT_PATH + filename + '.pkl', 'w') as f:
        pickle.dump(hyps, f)

    return filename, hyps
Ejemplo n.º 4
0
def run(hypothesis, data_amount):
    print "Starting chain on %s data points" % data_amount
    data = makeLexiconData(target,
                           four_gen_tree_context,
                           n=data_amount,
                           alpha=options.alpha,
                           verbose=True)

    h0 = KinshipLexicon(alpha=options.alpha)
    for w in target_words:
        h0.set_word(
            w,
            LOTHypothesis(grammar=my_grammar,
                          value=hypothesis.value[w].value,
                          display='lambda recurse_, C, X: %s'))

    hyps = TopN(N=options.top_count)

    mhs = MHSampler(h0,
                    data,
                    options.steps,
                    likelihood_temperature=options.llt,
                    prior_temperature=options.prior_temp)

    for samples_yielded, h in break_ctrlc(enumerate(mhs)):
        if samples_yielded % 100 == 0:
            pass  #print h.likelihood, h.prior, h
        hyps.add(h)

    import pickle
    print 'Writing ' + data[0].X + data[0].Y + str(
        data_amount) + data[0].word + '.pkl'
    with open(
            'Chains/' + data[0].X + data[0].Y + str(data_amount) +
            data[0].word + '.pkl', 'w') as f:
        pickle.dump(hyps, f)

    return hyps
Ejemplo n.º 5
0
Archivo: Shared.py Proyecto: sa-/LOTlib
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Make up some data
# Let's give data from a simple conjunction (note this example data is not exhaustive)

from LOTlib.DataAndObjects import FunctionData, Obj

# FunctionData takes a list of arguments and a return value. The arguments are objects (which are handled correctly automatically
# by is_color_ and is_shape_
data = [ FunctionData( [Obj(shape='square', color='red')], True), \
         FunctionData( [Obj(shape='square', color='blue')], False), \
         FunctionData( [Obj(shape='triangle', color='blue')], False), \
         FunctionData( [Obj(shape='triangle', color='red')], False), \
         ]

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Other standard exports

from LOTlib.Hypotheses.RationalRulesLOTHypothesis import RationalRulesLOTHypothesis


def make_h0(value=None):
    return RationalRulesLOTHypothesis(grammar=DNF, value=value, rrAlpha=1.0)


if __name__ == "__main__":

    from LOTlib.Inference.MetropolisHastings import MHSampler

    for h in MHSampler(make_h0(), data):
        print h
Ejemplo n.º 6
0
    def next(self):
        # Just set the temperatures by the schedules
        self.prior_temperature      = self.prior_schedule.next()
        self.likelihood_temperature = self.likelihood_schedule.next()

        return MHSampler.next(self)
Ejemplo n.º 7
0
def run_one(iteration, sampler_type):

    h0 = make_h0()

    # Create a sampler
    if sampler_type == 'mh_sample_A':
        sampler = MHSampler(h0,
                            data,
                            options.SAMPLES,
                            likelihood_temperature=1.0)
    elif sampler_type == 'mh_sample_B':
        sampler = MHSampler(h0,
                            data,
                            options.SAMPLES,
                            likelihood_temperature=1.1)
    elif sampler_type == 'mh_sample_C':
        sampler = MHSampler(h0,
                            data,
                            options.SAMPLES,
                            likelihood_temperature=1.25)
    elif sampler_type == 'mh_sample_D':
        sampler = MHSampler(h0,
                            data,
                            options.SAMPLES,
                            likelihood_temperature=2.0)
    elif sampler_type == 'mh_sample_E':
        sampler = MHSampler(h0,
                            data,
                            options.SAMPLES,
                            likelihood_temperature=5.0)
    elif sampler_type == 'particle_swarm_s_A':
        sampler = ParticleSwarm(make_h0,
                                data,
                                steps=options.SAMPLES,
                                within_steps=10)
    elif sampler_type == 'particle_swarm_s_B':
        sampler = ParticleSwarm(make_h0,
                                data,
                                steps=options.SAMPLES,
                                within_steps=100)
    elif sampler_type == 'particle_swarm_s_C':
        sampler = ParticleSwarm(make_h0,
                                data,
                                steps=options.SAMPLES,
                                within_steps=200)
    elif sampler_type == 'particle_swarm_t_A':
        sampler = ParticleSwarm(make_h0,
                                data,
                                steps=options.SAMPLES,
                                within_steps=100,
                                temp_sd=0.0001)
    elif sampler_type == 'particle_swarm_t_B':
        sampler = ParticleSwarm(make_h0,
                                data,
                                steps=options.SAMPLES,
                                within_steps=100,
                                temp_sd=0.1)
    elif sampler_type == 'particle_swarm_t_C':
        sampler = ParticleSwarm(make_h0,
                                data,
                                steps=options.SAMPLES,
                                within_steps=100,
                                temp_sd=1.0)
    elif sampler_type == 'particle_swarm_prior_sample_s_A':
        sampler = ParticleSwarmPriorResample(make_h0,
                                             data,
                                             steps=options.SAMPLES,
                                             within_steps=10)
    elif sampler_type == 'particle_swarm_prior_sample_s_B':
        sampler = ParticleSwarmPriorResample(make_h0,
                                             data,
                                             steps=options.SAMPLES,
                                             within_steps=100)
    elif sampler_type == 'particle_swarm_prior_sample_s_C':
        sampler = ParticleSwarmPriorResample(make_h0,
                                             data,
                                             steps=options.SAMPLES,
                                             within_steps=200)
    elif sampler_type == 'particle_swarm_prior_sample_t_A':
        sampler = ParticleSwarmPriorResample(make_h0,
                                             data,
                                             steps=options.SAMPLES,
                                             within_steps=100,
                                             temp_sd=0.0001)
    elif sampler_type == 'particle_swarm_prior_sample_t_B':
        sampler = ParticleSwarmPriorResample(make_h0,
                                             data,
                                             steps=options.SAMPLES,
                                             within_steps=100,
                                             temp_sd=0.1)
    elif sampler_type == 'particle_swarm_prior_sample_t_C':
        sampler = ParticleSwarmPriorResample(make_h0,
                                             data,
                                             steps=options.SAMPLES,
                                             within_steps=100,
                                             temp_sd=1.0)
    elif sampler_type == 'multiple_chains_A':
        sampler = MultipleChainMCMC(make_h0,
                                    data,
                                    steps=options.SAMPLES,
                                    nchains=5)
    elif sampler_type == 'multiple_chains_B':
        sampler = MultipleChainMCMC(make_h0,
                                    data,
                                    steps=options.SAMPLES,
                                    nchains=10)
    elif sampler_type == 'multiple_chains_C':
        sampler = MultipleChainMCMC(make_h0,
                                    data,
                                    steps=options.SAMPLES,
                                    nchains=100)
    elif sampler_type == 'parallel_tempering_A':
        sampler = ParallelTemperingSampler(make_h0,
                                           data,
                                           steps=options.SAMPLES,
                                           within_steps=10,
                                           temperatures=[1.0, 1.025, 1.05],
                                           swaps=1,
                                           yield_only_t0=False)
    elif sampler_type == 'parallel_tempering_B':
        sampler = ParallelTemperingSampler(make_h0,
                                           data,
                                           steps=options.SAMPLES,
                                           within_steps=10,
                                           temperatures=[1.0, 1.25, 1.5],
                                           swaps=1,
                                           yield_only_t0=False)
    elif sampler_type == 'parallel_tempering_C':
        sampler = ParallelTemperingSampler(make_h0,
                                           data,
                                           steps=options.SAMPLES,
                                           within_steps=10,
                                           temperatures=[1.0, 2.0, 5.0],
                                           swaps=1,
                                           yield_only_t0=False)
    elif sampler_type == 'taboo_A':
        sampler = TabooMCMC(h0,
                            data,
                            steps=options.SAMPLES,
                            skip=0,
                            penalty=0.001)
    elif sampler_type == 'taboo_B':
        sampler = TabooMCMC(h0,
                            data,
                            steps=options.SAMPLES,
                            skip=0,
                            penalty=0.010)
    elif sampler_type == 'taboo_C':
        sampler = TabooMCMC(h0,
                            data,
                            steps=options.SAMPLES,
                            skip=0,
                            penalty=0.100)
    elif sampler_type == 'taboo_D':
        sampler = TabooMCMC(h0,
                            data,
                            steps=options.SAMPLES,
                            skip=0,
                            penalty=1.000)
    elif sampler_type == 'taboo_E':
        sampler = TabooMCMC(h0,
                            data,
                            steps=options.SAMPLES,
                            skip=0,
                            penalty=10.000)
    elif sampler_type == 'partitionMCMC_d1':
        sampler = PartitionMCMC(grammar,
                                make_h0,
                                data,
                                1,
                                steps=options.SAMPLES)
    elif sampler_type == 'partitionMCMC_d2':
        sampler = PartitionMCMC(grammar,
                                make_h0,
                                data,
                                2,
                                steps=options.SAMPLES)
    elif sampler_type == 'partitionMCMC_d3':
        sampler = PartitionMCMC(grammar,
                                make_h0,
                                data,
                                3,
                                steps=options.SAMPLES)
    elif sampler_type == 'partitionMCMC_d4':
        sampler = PartitionMCMC(grammar,
                                make_h0,
                                data,
                                4,
                                steps=options.SAMPLES)
    else:
        assert False, "Bad sampler type: %s" % sampler_type

    # Run evaluate on it, printing to the right locations
    evaluate_sampler(sampler,
                     prefix="\t".join(
                         map(str, [options.MODEL, iteration, sampler_type])),
                     out_hypotheses=out_hypotheses,
                     out_aggregate=out_aggregate,
                     print_every=options.PRINTEVERY)
Ejemplo n.º 8
0
# Generate some data

data = generate_data(DATA_SIZE)

# A starting hypothesis (later ones are created by .propose, called in LOTlib.MetropolisHastings
h0 = NumberExpression(grammar)

# store hypotheses we've found
allhyp = FiniteBestSet(max=True, N=1000)

from LOTlib.Inference.MetropolisHastings import MHSampler

# A bunch of different MCMC algorithms to try. mh_sample is from the Rational Rules paper and generally works very well.
#for h in  LOTlib.Inference.TemperedTransitions.tempered_transitions_sample(initial_hyp, data, 500000, skip=0, temperatures=[1.0, 1.25, 1.5]):
#for h in  LOTlib.Inference.ParallelTempering.parallel_tempering_sample(initial_hyp, data, STEPS, within_steps=10, yield_all=True, temperatures=[1.0,1.05, 1.1]):
for h in MHSampler(h0, data, STEPS, skip=SKIP):
    if TRACE:
        print q(get_knower_pattern(
            h)), h.compute_prior(), h.compute_likelihood(data), q(h)

    # add h to our priority queue, with priority of its log probability, h.posterior_score
    allhyp.push(h, h.posterior_score)

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
## now re-evaluate everything we found on new data
#huge_data = generate_data(LARGE_DATA_SIZE)

#save this with a huge data set -- eval with average ll
#H = allhyp.get_sorted()

# compute the posterior for each hypothesis
Ejemplo n.º 9
0
"""
        Define a new kind of LOTHypothesis, that gives regex strings.
        These have a special interpretation function that compiles differently than straight python eval.
"""

from LOTlib import lot_iter
from LOTlib.Inference.MetropolisHastings import MHSampler
from LOTlib.Miscellaneous import qq

from Shared import data, make_h0

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if __name__ == "__main__":

    for h in lot_iter(MHSampler(make_h0(), data, steps=10000)):
        print h.posterior_score, h.prior, h.likelihood, qq(h)