def run(llt=1.0): h0 = CCGLexicon(make_hypothesis, words=all_words, alpha=0.9, palpha=0.9, likelihood_temperature=llt) fbs = FiniteBestSet(N=10) from LOTlib.Inference.MetropolisHastings import mh_sample for h in lot_iter(mh_sample(h0, data, SAMPLES)): fbs.add(h, h.posterior_score) return fbs
def run(*args): """The running function.""" # starting hypothesis -- here this generates at random h0 = GaussianLOTHypothesis(grammar) # We store the top 100 from each run pq = FiniteBestSet(N=100, max=True, key="posterior_score") pq.add(MHSampler(h0, data, STEPS, skip=SKIP)) return pq
def run(*args): # starting hypothesis -- here this generates at random h0 = GaussianLOTHypothesis(grammar, prior_temperature=PRIOR_TEMPERATURE) # We store the top 100 from each run pq = FiniteBestSet(100, max=True, key="posterior_score") pq.add( mh_sample(h0, data, STEPS, skip=SKIP) ) return pq
def run(*args): # starting hypothesis -- here this generates at random h0 = GaussianLOTHypothesis(grammar, prior_temperature=PRIOR_TEMPERATURE) # We store the top 100 from each run pq = FiniteBestSet(100, max=True, key="posterior_score") pq.add(mh_sample(h0, data, STEPS, skip=SKIP)) return pq
from LOTlib.FiniteBestSet import FiniteBestSet from LOTlib.Inference.MetropolisHastings import MHSampler from Model import * NDATA = 50 # How many total data points? NSTEPS = 10000 BEST_N = 100 # How many from each hypothesis to store OUTFILE = "hypotheses.pkl" # Where we keep track of all hypotheses (across concepts) all_hypotheses = FiniteBestSet() if __name__ == "__main__": # Now loop over each target concept and get a set of hypotheses for i, f in enumerate(TARGET_CONCEPTS): # Set up the hypothesis h0 = LOTHypothesis(grammar, start='START', args=['x']) # Set up some data data = generate_data(NDATA, f) # Now run some MCMC fs = FiniteBestSet(N=BEST_N, key="posterior_score") fs.add(lot_iter(MHSampler(h0, data, steps=NSTEPS, trace=False))) all_hypotheses.merge(fs) pickle.dump(all_hypotheses, open(OUTFILE, 'w'))
from LOTlib import break_ctrlc from LOTlib.FiniteBestSet import FiniteBestSet from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler from Model import * from TargetConcepts import TargetConcepts NDATA = 20 # How many data points for each function? NSTEPS = 100000 BEST_N = 500 # How many from each hypothesis to store # Where we keep track of all hypotheses (across concepts) all_hypotheses = FiniteBestSet() if __name__ == "__main__": # Now loop over each target concept and get a set of hypotheses for i, f in enumerate(TargetConcepts): # Set up the hypothesis h0 = make_hypothesis() # Set up some data data = make_data(NDATA, f) # Now run some MCMC fs = FiniteBestSet(N=BEST_N, key="posterior_score") fs.add(break_ctrlc(MHSampler(h0, data, steps=NSTEPS, trace=False))) all_hypotheses.merge(fs) pickle.dump(all_hypotheses, open("hypotheses.pkl", "w"))
def novelty_search(h0s, data, grammar, props=10, novelty_advantage=100): """ Search through hypotheses, maintaining a queue of good ones. We propose to ones based on their posterior and how much they've been proposed to in the past. See heapweight(h) below -- it determines how we trade off posterior and prior search there... SO: You are searched further if you are good, and in a "novel" part of the space TODO: We could make this track the performance of proposals from a given hypothesis? """ novelty = defaultdict(float) # last time we proposed here, what proportion were novel? If we haven't done any, set to 1.0 froms = defaultdict(int) # how many times did we propose from this? tos = defaultdict(int) # how many to this? FS = FiniteBestSet(N=10) # When we add something to the heap, what weight does it have? # This should prefer high log probability, but also it should # keep us from looking at things too much def heapweight(h): return -h.lp - novelty[h]*novelty_advantage openset = [] for h0 in h0s: if h0 not in novelty: h0.compute_posterior(data) heapq.heappush( openset, (heapweight(h0),h0) ) novelty[h0] = 1.0 # treat as totally novel FS.add(h0, h0.lp) while not LOTlib.SIG_INTERRUPTED: lph, h = heapq.heappop(openset) froms[h] += 1 #proposals_from[h] += props print "\n" print len(openset), "\t", h.lp, "\t", heapweight(h), "\t", novelty[h], "\t", froms[h], tos[h], "\t", q(h) for x in FS.get_all(sorted=True): print "\t", x.lp, "\t", heapweight(x), "\t", novelty[x], "\t", q(get_knower_pattern(h)), "\t", froms[x], tos[x],"\t", q(x) # Store all together so we know who to update (we make their novelty the same as their parent's) proposals = [ h.propose()[0] for i in xrange(props) ] new_proposals = [] # which are new? novelprop = 0 for p in proposals: if p not in novelty: p.compute_posterior(data) FS.add(p, p.lp) novelty[p] = "ERROR" # just keep track -- should be overwritten later novelprop += 1 new_proposals.append(p) tos[p] += 1 novelty[h] = float(novelprop) / float(props) # use the novelty from the parent for p in new_proposals: novelty[p] = random() * novelty[h] heapq.heappush(openset, (heapweight(p), p) ) # and put myself back on the heap, but with the new proposal numbers heapq.heappush(openset, (heapweight(h), h) )