def run(data_amount): print "Starting chain on %s data points" % data_amount data = makeVariableLexiconData(eval(options.word), options.word, the_context, n=data_amount, s=options.s, alpha=options.alpha, verbose=True) h0 = KinshipLexicon(words=[options.word], alpha=options.alpha) h0.set_word( options.word, LOTHypothesis(grammar, value=None, display='lambda recurse_, C, X:%s')) hyps = TopN(N=options.top_count) mhs = MHSampler(h0, data, options.steps, likelihood_temperature=options.llt, prior_temperature=options.prior_temp) for samples_yielded, h in break_ctrlc(enumerate(mhs)): if samples_yielded % 1000 == 0: print h.prior, h.likelihood, h hyps.add(h) return hyps
def standard_sample(make_hypothesis, make_data, skip=9, show=True, N=100, save_top='top.pkl', alsoprint='None', **kwargs): """ Just a simplified interface for sampling, allowing printing (showing), returning the top, and saving. This is used by many examples, and is meant to easily allow running with a variety of parameters. NOTE: This skip is a skip *only* on printing **kwargs get passed to sampler """ if LOTlib.SIG_INTERRUPTED: return TopN() # So we don't waste time! h0 = make_hypothesis() data = make_data() best_hypotheses = TopN(N=N) f = eval(alsoprint) for i, h in enumerate(break_ctrlc(MHSampler(h0, data, **kwargs))): best_hypotheses.add(h) if show and i%(skip+1) == 0: print i, \ h.posterior_score, \ h.prior, \ h.likelihood, \ f(h) if f is not None else '', \ qq(cleanFunctionNodeString(h)) if save_top is not None: print "# Saving top hypotheses" with open(save_top, 'w') as f: pickle.dump(best_hypotheses, f) return best_hypotheses
def probe_MHsampler(h, language, options, name, size=64, data=None, init_size=None, iters_per_stage=None, sampler=None, ret_sampler=False): get_data = language.sample_data_as_FuncData evaluation_data = get_data(size, max_length=options.FINITE) if data is None: if init_size is None: data = evaluation_data else: data = get_data(n=size, max_length=init_size) if sampler is None: sampler = MHSampler(h, data) else: sampler.data = data best_hypotheses = TopN(N=options.TOP_COUNT) iter = 0 for h in sampler: if iter == options.STEPS: break if iter % 100 == 0: print '---->', iter best_hypotheses.add(h) if iter % options.PROBE == 0: for h in best_hypotheses: h.compute_posterior(evaluation_data) Z = logsumexp([h.posterior_score for h in best_hypotheses]) pr_data = get_data(1024, max_length=options.FINITE) weighted_score = 0 for h in best_hypotheses: precision, recall = language.estimate_precision_and_recall(h, pr_data) if precision + recall != 0: f_score = precision * recall / (precision + recall) weighted_score += np.exp(h.posterior_score - Z) * f_score weighted_score *= 2 to_file([[iter, Z, weighted_score]], name) if init_size is not None and iter % iters_per_stage == 0: init_size += 2 sampler.data = get_data(n=size, max_length=init_size) iter += 1 if ret_sampler: return sampler
def run(data_amount): print "Starting chain on %s data points"%data_amount data = makeLexiconData(target, four_gen_tree_context, n=data_amount, alpha=options.alpha) h0 = KinshipLexicon(alpha=options.alpha) for w in target_words: h0.set_word(w, LOTHypothesis(my_grammar, args=['recurse_','C', 'X'])) hyps = TopN(N=options.top_count) mhs = MHSampler(h0, data, options.steps, likelihood_temperature=options.llt, prior_temperature=options.prior_temp) for samples_yielded, h in break_ctrlc(enumerate(mhs)): hyps.add(h) return hyps
def run(data_pts): print "Start run on ", str(data_pts) y = [pt.Y for pt in data_pts] filename = "".join(y) hyps = TopN(N=options.TOP_COUNT) h0 = KinshipLexicon(alpha=options.ALPHA) h0.set_word('Word', LOTHypothesis(my_grammar, value=None, display='lambda recurse_, C, X:%s')) mhs = MHSampler(h0, data_pts, options.STEPS, likelihood_temperature=options.llt) for samples_yielded, h in break_ctrlc(enumerate(mhs)): hyps.add(h) with open(options.OUT_PATH + filename + '.pkl', 'w') as f: pickle.dump(hyps, f) return filename, hyps
def run(data_amount): print "Starting chain on %s data points" % data_amount data = makeVariableLexiconData(eval(options.word), options.word, the_context, n=data_amount, s=options.s, alpha=options.alpha, verbose=True) h0 = KinshipLexicon(words=[options.word], alpha=options.alpha) h0.set_word(options.word, LOTHypothesis(grammar, value=None, args=['recurse_', 'C', 'X'])) hyps = TopN(N=options.top_count) mhs = MHSampler(h0, data, options.steps, likelihood_temperature=options.llt, prior_temperature=options.prior_temp) for samples_yielded, h in break_ctrlc(enumerate(mhs)): #if samples_yielded % 100 == 0: # print h.prior, h.likelihood, h hyps.add(h) return hyps
# MPI if options.mpi: hypotheses = set() hypo_sets = MPI_unorderedmap(mpirun, [[d] for d in data * options.chains]) for hypo_set in hypo_sets: hypotheses = hypotheses.union(hypo_set) # No MPI else: hypotheses = set() if options.grammar_scale: grammar = grammar_gamma(grammar, options.grammar_scale) for d in data * options.chains: h0 = NumberGameHypothesis(grammar=grammar, domain=options.domain, alpha=options.alpha) mh_sampler = MHSampler(h0, [d], options.iters) chain_hypos = TopN(N=options.N) for h in break_ctrlc(mh_sampler): chain_hypos.add(h) hypotheses = hypotheses.union(chain_hypos.get_all()) # -------------------------------------------------------------------------------------------------------- # Save hypotheses f = open(options.filename, "wb") pickle.dump(hypotheses, f)
if options.mpi: hypotheses = set() hypo_sets = MPI_unorderedmap(mpirun, [[d] for d in data * options.chains]) for hypo_set in hypo_sets: hypotheses = hypotheses.union(hypo_set) # No MPI else: hypotheses = set() if options.grammar_scale: grammar = grammar_gamma(grammar, options.grammar_scale) for d in data * options.chains: h0 = NumberGameHypothesis(grammar=grammar, domain=options.domain, alpha=options.alpha) mh_sampler = MHSampler(h0, [d], options.iters) chain_hypos = TopN(N=options.N) for h in break_ctrlc(mh_sampler): chain_hypos.add(h) hypotheses = hypotheses.union(chain_hypos.get_all()) # -------------------------------------------------------------------------------------------------------- # Save hypotheses f = open(options.filename, "wb") pickle.dump(hypotheses, f)