def run(make_hypothesis, make_data, data_size): """ This out on the DATA_RANGE amounts of data and returns all hypotheses in top count """ if LOTlib.SIG_INTERRUPTED: return set() return standard_sample(make_hypothesis, lambda: make_data(data_size), N=options.TOP_COUNT, steps=options.STEPS, show=False, save_top=None)
# # # # print check_list[95][0] # # print check_list[95][0].prior, check_list[95][0].likelihood, check_list[95][0].posterior_score # # cnt = 0 # for e in check_list: # if not is_valid(e[1]): continue # if cnt == 100: break # print e[0], len(str(e[0])) - len('lambda recurse_: flatten2str(,sep="")') # print np.exp(e[0].prior), e[0].likelihood, e[0].posterior_score # cnt += 1 for i in xrange(10): print 'estimating for data_size: %i' % i data = make_data(i) seen = set() inf_sum = 0 all_sum = 0 for topN in out: for h in topN: if h not in seen: seen.add(h) poster_mass = h.compute_posterior(data) if not is_valid(poster_mass): continue poster_mass = np.exp(poster_mass) all_sum += poster_mass if len(str(h)) - len('lambda recurse_: flatten2str(,sep="")') > 46: inf_sum += poster_mass prob = 0 if all_sum != 0: prob = inf_sum / all_sum
# Make a list of arguments to map over args = list(itertools.product([make_hypothesis], [make_data], DATA_RANGE)) # run on MPI results = MPI_map(run, args) # collapse all returned sets hypotheses = set() for r in results: hypotheses.update(r) # add the ith's results to the set # Now go through each hypothesis and print out some summary stats for data_size in DATA_RANGE: evaluation_data = make_data(data_size) # Now update everyone's posterior for h in hypotheses: h.compute_posterior(evaluation_data) # compute the normalizing constant. This is the log of the sum of the probabilities Z = logsumexp([h.posterior_score for h in hypotheses]) for h in hypotheses: #compute the number of different strings we generate generated_strings = set([h() for _ in xrange(1000)]) # print out some info. We can here use np.exp(h.posterior_score-Z) because Z is computed via logsumexp, so is more numerically stable # This is the probability at this amount of data print data_size, np.exp(h.posterior_score-Z), h.posterior_score, h.prior, h.likelihood, len(generated_strings), qq(h)