Beispiel #1
0
def run(make_hypothesis, make_data, data_size):
    """
    This out on the DATA_RANGE amounts of data and returns all hypotheses in top count
    """
    if LOTlib.SIG_INTERRUPTED:
        return set()

    return standard_sample(make_hypothesis,
                           lambda: make_data(data_size),
                           N=options.TOP_COUNT,
                           steps=options.STEPS,
                           show=False, save_top=None)
Beispiel #2
0
# #
# # print check_list[95][0]
# # print check_list[95][0].prior, check_list[95][0].likelihood, check_list[95][0].posterior_score
#
# cnt = 0
# for e in check_list:
#     if not is_valid(e[1]): continue
#     if cnt == 100: break
#     print e[0], len(str(e[0])) - len('lambda recurse_: flatten2str(,sep="")')
#     print np.exp(e[0].prior), e[0].likelihood, e[0].posterior_score
#     cnt += 1


for i in xrange(10):
    print 'estimating for data_size: %i' % i
    data = make_data(i)

    seen = set()
    inf_sum = 0
    all_sum = 0
    for topN in out:
        for h in topN:
            if h not in seen:
                seen.add(h)
                poster_mass = h.compute_posterior(data)
                if not is_valid(poster_mass): continue
                poster_mass = np.exp(poster_mass)
                all_sum += poster_mass
                if len(str(h)) - len('lambda recurse_: flatten2str(,sep="")') > 46: inf_sum += poster_mass
    prob = 0
    if all_sum != 0: prob = inf_sum / all_sum
Beispiel #3
0
    # Make a list of arguments to map over
    args = list(itertools.product([make_hypothesis], [make_data], DATA_RANGE))

    # run on MPI
    results = MPI_map(run, args)

    # collapse all returned sets
    hypotheses = set()
    for r in results:
        hypotheses.update(r) # add the ith's results to the set

    # Now go through each hypothesis and print out some summary stats
    for data_size in DATA_RANGE:

        evaluation_data = make_data(data_size)

        # Now update everyone's posterior
        for h in hypotheses:
            h.compute_posterior(evaluation_data)

        # compute the normalizing constant. This is the log of the sum of the probabilities
        Z = logsumexp([h.posterior_score for h in hypotheses])

        for h in hypotheses:
            #compute the number of different strings we generate
            generated_strings = set([h() for _ in xrange(1000)])

            # print out some info. We can here use np.exp(h.posterior_score-Z) because Z is computed via logsumexp, so is more numerically stable
            # This is the probability at this amount of data
            print data_size, np.exp(h.posterior_score-Z), h.posterior_score, h.prior, h.likelihood, len(generated_strings), qq(h)