def run(data_amount): print "Starting chain on %s data points" % data_amount data = makeTreeLexiconData(target, the_context, n=data_amount, alpha=options.alpha, epsilon=options.epsilon, verbose=True) h0 = KinshipLexicon(alpha=options.alpha) for w in target_words: h0.set_word( w, LOTHypothesis(my_grammar, display='lambda recurse_, C, X: %s')) hyps = TopN(N=options.top_count) mhs = MHSampler(h0, data, options.steps, likelihood_temperature=options.llt, prior_temperature=options.prior_temp) for samples_yielded, h in break_ctrlc(enumerate(mhs)): if samples_yielded % 100 == 0: print h.prior, h.likelihood, h hyps.add(h) import pickle print 'Writing ' + data[0].X + data[0].Y + str( data_amount) + data[0].word + '.pkl' with open( 'Chains/' + data[0].X + data[0].Y + str(data_amount) + data[0].word + '.pkl', 'w') as f: pickle.dump(hyps, f) return hyps
Run inference on each target concept and save the output """ import pickle from LOTlib import break_ctrlc from LOTlib.TopN import TopN from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler from Model import * from TargetConcepts import TargetConcepts NDATA = 20 # How many data points for each function? NSTEPS = 100000 BEST_N = 500 # How many from each hypothesis to store # Where we keep track of all hypotheses (across concepts) all_hypotheses = TopN(N=BEST_N) if __name__ == "__main__": # Now loop over each target concept and get a set of hypotheses for i, f in enumerate(TargetConcepts): # Set up the hypothesis h0 = make_hypothesis() # Set up some data data = make_data(NDATA, f) # Now run some MCMC fs = TopN(N=BEST_N, key="posterior_score") fs.add(break_ctrlc(MHSampler(h0, data, steps=NSTEPS, trace=False)))
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Hypothesis # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Hypotheses.RationalRulesLOTHypothesis import RationalRulesLOTHypothesis def make_hypothesis(grammar=grammar, **kwargs): return RationalRulesLOTHypothesis(grammar=grammar, rrAlpha=1.0, **kwargs) if __name__ == "__main__": from LOTlib.TopN import TopN hyps = TopN(N=1000) from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler from LOTlib import break_ctrlc mhs = MHSampler(make_hypothesis(), make_data(), 1000000, likelihood_temperature=1., prior_temperature=1.) for samples_yielded, h in break_ctrlc(enumerate(mhs)): h.ll_decay = 0. hyps.add(h) import pickle with open('HypothesisSpace.pkl', 'w') as f:
d.update(pickle.load(f)) if options.filename2 is not None: print "Loading Space 2: " + options.filename2 with open(options.filename2, 'r') as f: d.update(pickle.load(f)) Mass = set() for a in range(1, 25, 2) + range(25, 251, 25): print "Grabbing Top " + str(options.Nsize) + " from " + str(a) + ' dp' data = makeZipfianLexiconData(target, four_gen_tree_context, n=a, alpha=0.9, s=0.0, epsilon=0.0) simplicity_mass = TopN(N=options.Nsize) reuse_mass = TopN(N=options.Nsize) for h in d: h.posterior_score = h.compute_likelihood(data) + compute_reuse_prior(h) reuse_mass.add(h) h.compute_posterior(data) simplicity_mass.add(h) Mass.update(simplicity_mass) Mass.update(reuse_mass) print "Writing output file for " + str(len(Mass)) + ' hypotheses.' with open(options.out_path, 'w') as f: pickle.dump(Mass, f)
c1 = vanilla_conditions(True, False)[0:2] c2 = vanilla_conditions(False, True)[0:1] for to_seq in c1: for from_seq in c2: print_star("") print from_seq, to_seq data = [ FunctionData(alpha=ALPHA, input=[from_seq], output={to_seq: len(to_seq)}) ] h0 = MyHypothesis() step = 0 tn = TopN(N=N_H) # Stream from the sampler to a printer for h in MHSampler(h0, data, steps=STEPS, acceptance_temperature=5.): tn.add(h) print for h in tn.get_all(sorted=True): out = h(from_seq) if len(out) >= len(to_seq): hd = hamming_distance(out[:len(to_seq)], to_seq) else: hd = 15
databundle = [response, weights] generator = MHSampler(learner, databundle, STEPS, skip=SKIP) for g in generator: hypset.add(VectorizedLexicon_to_SimpleLexicon(g), g.posterior_score) return hypset #################################################################################### ## Main running #################################################################################### # Load the trees from a file my_finite_trees = pickle.load(open(IN_PATH)) print "# Done loading", len(my_finite_trees), "trees" # Gives index to each hypothesis for i, h in enumerate(my_finite_trees): hyp2index[h] = i # Compute or load proposals get_proposal_dist(my_finite_trees) DATA_AMOUNTS = range(0, 2050, 100) allret = map(run, DATA_AMOUNTS) # combine into a single hypothesis set and save outhyp = TopN() for r in allret: outhyp.update(r) pickle.dump(outhyp, open(OUT_PATH, 'w'))
def runparts(size, x, p): #problem: right now only recording last partition, never saving from others. print "Start: " + str(x) + " on this many: " + str(size) try: #make new TopN for each data amount topn = TopN(N=200, key="posterior_score") print "Starting on partition ", p # Now we have to go in and fill in the nodes that are nonterminals # We can do this with generate v = grammar.generate(copy(p)) h0 = MyHypothesis(grammar, value=v) data = [ FunctionData(input=[], output={ 'n i k': size, 'h i N': size, 'f a n': size, 'g i f': size, 'm a N': size, 'f a m': size, 'g i k': size, 'k a n': size, 'f a f': size, 'g i n': size, 'g i m': size, 'g i s': size, 's i f': size, 's i n': size, 'n i s': size, 's i m': size, 's i k': size, 'h a N': size, 'f i N': size, 'h i m': size, 'h i n': size, 'h a m': size, 'n i N': size, 'h i k': size, 'f a s': size, 'f i n': size, 'h i f': size, 'n i m': size, 'g i N': size, 'h a g': size, 's i N': size, 'n i n': size, 'f i m': size, 's i s': size, 'h i s': size, 'n a s': size, 'k a s': size, 'f i s': size, 'n i f': size, 'm i n': size, 's a s': size, 'f a g': size, 'k a g': size, 'k a f': size, 's a m': size, 'n a f': size, 'n a g': size, 'm i N': size, 's a g': size, 'f i k': size, 'k a m': size, 'n a n': size, 's a f': size, 'n a m': size, 'm a s': size, 'h a f': size, 'h a s': size, 'n a N': size, 'm i s': size, 's a n': size, 's a N': size, 'm i k': size, 'f a N': size, 'm i m': size, 'm a g': size, 'm a f': size, 'f i f': size, 'k a N': size, 'h a n': size, 'm a n': size, 'm a m': size, 'm i f': size }) ] for h in break_ctrlc( MHSampler(h0, data, steps=options.steps, trace=False)): # print "\t", h.posterior_score, h topn.add(h) return size, set(topn) except Exception as e: print "*** Exception ignored: ", e #if we fail, we can return a blank TopN return size, set()
from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler from LOTlib.Projects.Quantifier.Model import * ALPHA = 0.9 SAMPLES = 100000 DATA_SIZE = 1000 if __name__ == "__main__": ## sample the target data data = generate_data(DATA_SIZE) W = 'every' # Now to use it as a LOTHypothesis, we need data to have an "output" field which is true/false for whether its the target word. This is then used by LOTHypothesis.compute_likelihood to see if we match or not with whether a word was said (ignoring the other words -- that's why its a pseudolikelihood) for di in data: di.output = (di.utterance == W) #print (di.word == W) FBS = TopN(N=100) H = LOTHypothesis(grammar, display='lambda A,B,S: %s', ALPHA=ALPHA) # Now just run the sampler with a LOTHypothesis for s in MHSampler(H, data, SAMPLES, skip=10): #print s.lp, "\t", s.prior, "\t", s.likelihood, "\n", s, "\n\n" FBS.push(s, s.lp) for k in reversed(FBS.get_all(sorted=True)): print k.lp, k.prior, k.likelihood, k
from LOTlib.DataAndObjects import FunctionData # Now our data takes input x=3 and maps it to 12 # What could the function be? data = [FunctionData(input=[3], output=12, alpha=0.95)] ######################################## ## Actually run ######################################## from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler from LOTlib.SampleStream import * ## First let's make a bunch of hypotheses from LOTlib.TopN import TopN tn = TopN(1000) h0 = MyHypothesisX() for h in MHSampler(h0, data, steps=100000): # run more steps tn.add(h) # store these in a list (tn.get_all is defaultly a generator) hypotheses = list(tn.get_all()) # Compute the normalizing constant from LOTlib.Miscellaneous import logsumexp z = logsumexp([h.posterior_score for h in hypotheses]) ## Now compute a matrix of how likely each input is to go ## to each output M = 20 # an MxM matrix of values
collapsed_prob = grammar.log_probability(collapsed_forms[resps]) collapsed_forms[resps].my_log_probability = logplusexp(collapsed_prob, tprior) if tprior > collapsed_forms[resps].display_tree_probability: # display the most concise form collapsed_forms[resps] = t collapsed_forms[resps].display_tree_probability = tprior else: collapsed_forms[resps] = t collapsed_forms[resps].display_tree_probability = tprior t.my_log_probability = tprior # FunctionNode uses this value when we call log_probability() print ">>", all_tree_count, len(collapsed_forms), t, tprior ############################################ ### Now actually enumarate trees for t in grammar.enumerate(d=DEPTH): if 'presup_(False' in str(t): continue if not check_expansion(t): continue if t.count_subnodes() <= MAX_NODES: add_to_collapsed_trees(t) all_tree_count += 1 print ">", t, grammar.log_probability(t) ## for kinder saving and unsaving: upq = TopN() for k in collapsed_forms.values(): upq.add(LOTHypothesis(grammar, k, display='lambda context: %s'), 0.0) pickle.dump(upq, open(OUT, 'w')) print "Total tree count: ", all_tree_count
if self.nsamples in self.adapt_at: ## TODO: Maybe make this faster? self.adapt_temperatures() return ret if __name__ == "__main__": from LOTlib import break_ctrlc from LOTlib.Examples.Number2015.Model import generate_data, make_h0 data = generate_data(300) from LOTlib.TopN import TopN z = Z(unique=True) tn = TopN(N=10) from LOTlib.Miscellaneous import logrange sampler = AdaptiveParallelTemperingSampler(make_h0, data, steps=1000000, \ yield_only_t0=False, whichtemperature='acceptance_temperature', \ temperatures=logrange(1.0, 10.0, 10)) for h in break_ctrlc(tn(z(sampler))): # print sampler.chain_idx, h.posterior_score, h pass for x in tn.get_all(sorted=True): print x.posterior_score, x print z
# from LOTlib.TopN import TopN # topChoice = TopN(N=10) # print "\n\n\n\n\n\n_________1 data point____________\n" # for h in MHSampler(h0, data, steps=5000): # print h.posterior_score # topChoice.add(h) # else: # print "\n\n\n\n\n_________2 data point____________\n" # for h in MHSampler(h0, data_1, steps=5000): # print h.posterior_score # Running and show only the top choice from LOTlib.TopN import TopN topChoice = TopN(N=10) posProbs = [] stepNum = 40000 for step, h in enumerate(MHSampler(h0, make_data(data_size=1), steps=stepNum)): if step % 5000 == 0: print('current step: %d, current posterior score: %f' % (step, h.posterior_score)) posProbs.append(h.posterior_score) topChoice.add(h) h0 = h # for step, h in enumerate(MHSampler(h0, make_data(data_size=2), steps=stepNum)): # if step % 5000 == 0: # print ('current step: %d, current posterior score: %f' % (step, h.posterior_score)) # posProbs.append(h.posterior_score)