def construct_hypothesis_space(data_size): all_hypotheses = TopN() print 'Data size: ', data_size for i in range(RUNS): print 'Run: ', i hypotheses = TopN(25) data = generate_data(data_size) learner = GriceanQuantifierLexicon(make_my_hypothesis, my_weight_function) for w in target.all_words(): learner.set_word(w, make_my_hypothesis()) j = 0 for h in MHSampler(learner, data, SAMPLES, skip=0): hypotheses.add(h) j += 1 if j > 0 and j % 1000 == 0: pickle.dump( hypotheses, open( 'data/hypset_' + GRAMMAR_TYPE + '_' + str(data_size) + '_' + str(j) + '.pickle', 'w')) #sstr = str(h) #sstr = re.sub("[_ ]", "", sstr) #sstr = re.sub("presup", u"\u03BB A B . presup", sstr) #print sstr all_hypotheses.update(hypotheses) return all_hypotheses
def get_hypotheses(): all_hypotheses = TopN() for data_size in range(100, MAX_DATA_SIZE, 100): hypotheses = None sample_size = 10000 while hypotheses is None: try: hypotheses = pickle.load( open('data/hypset_' + GRAMMAR_TYPE + '_' + str(data_size) + '_' + str(sample_size) + '.pickle')) except: sample_size -= 1000 all_hypotheses.update(hypotheses) return all_hypotheses
from LOTlib import break_ctrlc from LOTlib.TopN import TopN from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler from Model import * from TargetConcepts import TargetConcepts NDATA = 20 # How many data points for each function? NSTEPS = 100000 BEST_N = 500 # How many from each hypothesis to store # Where we keep track of all hypotheses (across concepts) all_hypotheses = TopN(N=BEST_N) if __name__ == "__main__": # Now loop over each target concept and get a set of hypotheses for i, f in enumerate(TargetConcepts): # Set up the hypothesis h0 = make_hypothesis() # Set up some data data = make_data(NDATA, f) # Now run some MCMC fs = TopN(N=BEST_N, key="posterior_score") fs.add(break_ctrlc(MHSampler(h0, data, steps=NSTEPS, trace=False))) all_hypotheses.update(fs) pickle.dump(all_hypotheses, open("hypotheses.pkl", 'w'))
databundle = [response, weights] generator = MHSampler(learner, databundle, STEPS, skip=SKIP) for g in generator: hypset.add(VectorizedLexicon_to_SimpleLexicon(g), g.posterior_score) return hypset #################################################################################### ## Main running #################################################################################### # Load the trees from a file my_finite_trees = pickle.load(open(IN_PATH)) print "# Done loading", len(my_finite_trees), "trees" # Gives index to each hypothesis for i, h in enumerate(my_finite_trees): hyp2index[h] = i # Compute or load proposals get_proposal_dist(my_finite_trees) DATA_AMOUNTS = range(0, 2050, 100) allret = map(run, DATA_AMOUNTS) # combine into a single hypothesis set and save outhyp = TopN() for r in allret: outhyp.update(r) pickle.dump(outhyp, open(OUT_PATH, 'w'))