def run(data_amount):
    print "Starting chain on %s data points" % data_amount
    data = makeVariableLexiconData(eval(options.word),
                                   options.word,
                                   the_context,
                                   n=data_amount,
                                   s=options.s,
                                   alpha=options.alpha,
                                   verbose=True)

    h0 = KinshipLexicon(words=[options.word], alpha=options.alpha)
    h0.set_word(
        options.word,
        LOTHypothesis(grammar, value=None, display='lambda recurse_, C, X:%s'))

    hyps = TopN(N=options.top_count)

    mhs = MHSampler(h0,
                    data,
                    options.steps,
                    likelihood_temperature=options.llt,
                    prior_temperature=options.prior_temp)

    for samples_yielded, h in break_ctrlc(enumerate(mhs)):
        if samples_yielded % 1000 == 0:
            print h.prior, h.likelihood, h
        hyps.add(h)

    return hyps
Example #2
0
def standard_sample(make_hypothesis, make_data, skip=9, show=True, N=100, save_top='top.pkl', alsoprint='None', **kwargs):
    """
        Just a simplified interface for sampling, allowing printing (showing), returning the top, and saving.
        This is used by many examples, and is meant to easily allow running with a variety of parameters.
        NOTE: This skip is a skip *only* on printing
        **kwargs get passed to sampler
    """
    if LOTlib.SIG_INTERRUPTED:
        return TopN()  # So we don't waste time!

    h0 = make_hypothesis()
    data = make_data()

    best_hypotheses = TopN(N=N)

    f = eval(alsoprint)

    for i, h in enumerate(break_ctrlc(MHSampler(h0, data, **kwargs))):
        best_hypotheses.add(h)

        if show and i%(skip+1) == 0:
            print i, \
                h.posterior_score, \
                h.prior, \
                h.likelihood, \
                f(h) if f is not None else '', \
                qq(cleanFunctionNodeString(h))

    if save_top is not None:
        print "# Saving top hypotheses"
        with open(save_top, 'w') as f:
            pickle.dump(best_hypotheses, f)

    return best_hypotheses
Example #3
0
def probe_MHsampler(h, language, options, name, size=64, data=None, init_size=None, iters_per_stage=None, sampler=None, ret_sampler=False):
    get_data = language.sample_data_as_FuncData
    evaluation_data = get_data(size, max_length=options.FINITE)

    if data is None:
        if init_size is None:
            data = evaluation_data
        else:
            data = get_data(n=size, max_length=init_size)

    if sampler is None:
        sampler = MHSampler(h, data)
    else:
        sampler.data = data

    best_hypotheses = TopN(N=options.TOP_COUNT)

    iter = 0

    for h in sampler:
        if iter == options.STEPS: break
        if iter % 100 == 0: print '---->', iter

        best_hypotheses.add(h)

        if iter % options.PROBE == 0:

            for h in best_hypotheses:
                h.compute_posterior(evaluation_data)
            Z = logsumexp([h.posterior_score for h in best_hypotheses])

            pr_data = get_data(1024, max_length=options.FINITE)
            weighted_score = 0
            for h in best_hypotheses:
                precision, recall = language.estimate_precision_and_recall(h, pr_data)
                if precision + recall != 0:
                    f_score = precision * recall / (precision + recall)
                    weighted_score += np.exp(h.posterior_score - Z) * f_score
            weighted_score *= 2

            to_file([[iter, Z, weighted_score]], name)

        if init_size is not None and iter % iters_per_stage == 0:
            init_size += 2
            sampler.data = get_data(n=size, max_length=init_size)

        iter += 1

    if ret_sampler:
        return sampler
def run(data_amount):
    print "Starting chain on %s data points"%data_amount
    data = makeLexiconData(target, four_gen_tree_context, n=data_amount, alpha=options.alpha)

    h0 = KinshipLexicon(alpha=options.alpha)
    for w in target_words:
        h0.set_word(w, LOTHypothesis(my_grammar, args=['recurse_','C', 'X']))

    hyps = TopN(N=options.top_count)

    mhs = MHSampler(h0, data, options.steps, likelihood_temperature=options.llt, prior_temperature=options.prior_temp)

    for samples_yielded, h in break_ctrlc(enumerate(mhs)):
        hyps.add(h)

    return hyps
Example #5
0
def run(data_pts):
    print "Start run on ", str(data_pts)

    y = [pt.Y for pt in data_pts]
    filename = "".join(y)

    hyps = TopN(N=options.TOP_COUNT)
    h0 = KinshipLexicon(alpha=options.ALPHA)
    h0.set_word('Word', LOTHypothesis(my_grammar, value=None, display='lambda recurse_, C, X:%s'))
    mhs = MHSampler(h0, data_pts, options.STEPS, likelihood_temperature=options.llt)

    for samples_yielded, h in break_ctrlc(enumerate(mhs)):
        hyps.add(h)

    with open(options.OUT_PATH + filename + '.pkl', 'w') as f:
        pickle.dump(hyps, f)

    return filename, hyps
def run(data_amount):
    print "Starting chain on %s data points" % data_amount
    data = makeVariableLexiconData(eval(options.word), options.word, the_context, n=data_amount, s=options.s,
                                   alpha=options.alpha, verbose=True)

    h0 = KinshipLexicon(words=[options.word], alpha=options.alpha)
    h0.set_word(options.word, LOTHypothesis(grammar, value=None, args=['recurse_', 'C', 'X']))

    hyps = TopN(N=options.top_count)

    mhs = MHSampler(h0, data, options.steps, likelihood_temperature=options.llt,
                    prior_temperature=options.prior_temp)

    for samples_yielded, h in break_ctrlc(enumerate(mhs)):
        #if samples_yielded % 100 == 0:
         #   print h.prior, h.likelihood, h
        hyps.add(h)

    return hyps
Example #7
0
    # MPI
    if options.mpi:
        hypotheses = set()
        hypo_sets = MPI_unorderedmap(mpirun, [[d] for d in data * options.chains])
        for hypo_set in hypo_sets:
            hypotheses = hypotheses.union(hypo_set)

    # No MPI
    else:
        hypotheses = set()

        if options.grammar_scale:
            grammar = grammar_gamma(grammar, options.grammar_scale)

        for d in data * options.chains:
            h0 = NumberGameHypothesis(grammar=grammar, domain=options.domain, alpha=options.alpha)
            mh_sampler = MHSampler(h0, [d], options.iters)

            chain_hypos = TopN(N=options.N)
            for h in break_ctrlc(mh_sampler):
                chain_hypos.add(h)
            hypotheses = hypotheses.union(chain_hypos.get_all())

    # --------------------------------------------------------------------------------------------------------
    # Save hypotheses

    f = open(options.filename, "wb")
    pickle.dump(hypotheses, f)

Example #8
0
    if options.mpi:
        hypotheses = set()
        hypo_sets = MPI_unorderedmap(mpirun,
                                     [[d] for d in data * options.chains])
        for hypo_set in hypo_sets:
            hypotheses = hypotheses.union(hypo_set)

    # No MPI
    else:
        hypotheses = set()

        if options.grammar_scale:
            grammar = grammar_gamma(grammar, options.grammar_scale)

        for d in data * options.chains:
            h0 = NumberGameHypothesis(grammar=grammar,
                                      domain=options.domain,
                                      alpha=options.alpha)
            mh_sampler = MHSampler(h0, [d], options.iters)

            chain_hypos = TopN(N=options.N)
            for h in break_ctrlc(mh_sampler):
                chain_hypos.add(h)
            hypotheses = hypotheses.union(chain_hypos.get_all())

    # --------------------------------------------------------------------------------------------------------
    # Save hypotheses

    f = open(options.filename, "wb")
    pickle.dump(hypotheses, f)