Exemple #1
0
def run(llt=1.0):

    h0 = CCGLexicon(make_hypothesis, words=all_words, alpha=0.9, palpha=0.9, likelihood_temperature=llt)

    fbs = FiniteBestSet(N=10)
    from LOTlib.Inference.MetropolisHastings import mh_sample
    for h in lot_iter(mh_sample(h0, data, SAMPLES)):
        fbs.add(h, h.posterior_score)

    return fbs
Exemple #2
0
    def run(*args):

        # starting hypothesis -- here this generates at random
        h0 = GaussianLOTHypothesis(grammar,
                                   prior_temperature=PRIOR_TEMPERATURE)

        # We store the top 100 from each run
        pq = FiniteBestSet(100, max=True, key="posterior_score")
        pq.add(mh_sample(h0, data, STEPS, skip=SKIP))

        return pq
def run(data_size):

    print "Running ", data_size

    # We store the top 100 from each run
    hypset = FiniteBestSet(TOP_COUNT, max=True)

    # initialize the data
    data = generate_data(data_size)

    # starting hypothesis -- here this generates at random
    learner = GriceanQuantifierLexicon(make_my_hypothesis, my_weight_function)

    # We will defautly generate from null the grammar if no value is specified
    for w in target.all_words():
        learner.set_word(w)

    # populate the finite sample by running the sampler for this many steps
    for x in mh_sample(learner, data, SAMPLES, skip=0):
        hypset.push(x, x.posterior_score)

    return hypset
from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis
from LOTlib.Inference.Samplers.MetropolisHastings import mh_sample
from LOTlib.Examples.Quantifier.Model import *

ALPHA = 0.9
SAMPLES = 100000
DATA_SIZE = 1000

if __name__ == "__main__":

    ## sample the target data
    data = generate_data(DATA_SIZE)

    W = 'every'

    # Now to use it as a LOTHypothesis, we need data to have an "output" field which is true/false for whether its the target word. This is then used by LOTHypothesis.compute_likelihood to see if we match or not with whether a word was said (ignoring the other words -- that's why its a pseudolikelihood)
    for di in data:
        di.output = (di.word == W)
        #print (di.word == W)

    FBS = FiniteBestSet(max=True, N=100)

    H = LOTHypothesis(grammar, args=['A', 'B', 'S'], ALPHA=ALPHA)
    # Now just run the sampler with a LOTHypothesis
    for s in mh_sample(H, data, SAMPLES, skip=10):
        #print s.lp, "\t", s.prior, "\t", s.likelihood, "\n", s, "\n\n"
        FBS.push(s, s.lp)

    for k in reversed(FBS.get_all(sorted=True)):
        print k.lp, k.prior, k.likelihood, k
    learner = GriceanQuantifierLexicon(make_my_hypothesis, my_weight_function)

    # We will defautly generate from null the grammar if no value is specified
    for w in target.all_words():
        learner.set_word(w)

    # populate the finite sample by running the sampler for this many steps
    for x in mh_sample(learner, data, SAMPLES, skip=0):
        hypset.push(x, x.posterior_score)

    return hypset


if __name__ == "__main__":

    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    # MPI interface

    # Map. SimpleMPI will use a normal MAP if we are not running in MPI
    allret = MPI_map(run, map(lambda x: [x],
                              DATA_AMOUNTS * CHAINS))  # this many chains

    ## combine into a single hypothesis set and save
    outhyp = FiniteBestSet(max=True)
    for r in allret:
        print "# Merging ", len(r)
        outhyp.merge(r)

    import pickle
    pickle.dump(outhyp, open(OUT_PATH, 'w'))
Exemple #6
0
def run(*args):
    #print "# Running data"

    global hypotheses

    data_size = args[0]

    p_representation = defaultdict(
        int)  # how often do you get the right representation
    p_response = defaultdict(int)  # how often do you get the right response?
    p_representation_literal = defaultdict(
        int)  # how often do you get the right representation
    p_response_literal = defaultdict(
        int)  # how often do you get the right response?
    p_representation_presup = defaultdict(
        int)  # how often do you get the right representation
    p_response_presup = defaultdict(
        int)  # how often do you get the right response?

    #print "# Generating data"
    data = generate_data(data_size)

    # recompute these
    #print "# Computing posterior"
    #[ x.unclear_functions() for x in hypotheses ]
    [x.compute_posterior(data) for x in hypotheses]

    # normalize the posterior in fs
    #print "# Computing normalizer"
    Z = logsumexp([x.posterior_score for x in hypotheses])

    # and output the top hypotheses
    qq = FiniteBestSet(max=True, N=25)
    for h in hypotheses:
        qq.push(h, h.posterior_score)  # get the tops
    for i, h in enumerate(qq.get_all(sorted=True)):
        for w in h.all_words():
            fprintn(8,
                    data_size,
                    i,
                    w,
                    h.posterior_score,
                    q(h.value[w]),
                    f=options.OUT_PATH + "-hypotheses." + str(get_rank()) +
                    ".txt")

    # and compute the probability of being correct
    #print "# Computing correct probability"
    for h in hypotheses:
        hstr = str(h)
        #print data_size, len(data), exp(h.posterior_score), correct[ str(h)+":"+w ]
        for w in words:
            p = exp(h.posterior_score - Z)
            key = w + ":" + hstr

            p_representation[w] += p * (agree_pct[key] == 1.)
            p_representation_presup[w] += p * (
                agree_pct_presup[key] == 1.
            )  # if we always agree with the target, then we count as the right rep.
            p_representation_literal[w] += p * (agree_pct_literal[key] == 1.)

            # and just how often does the hypothesis agree?
            p_response[w] += p * agree_pct[key]
            p_response_presup[w] += p * agree_pct_presup[key]
            p_response_literal[w] += p * agree_pct_literal[key]

    #print "# Outputting"

    for w in words:
        fprintn(10,
                str(get_rank()),
                q(w),
                data_size,
                p_representation[w],
                p_representation_presup[w],
                p_representation_literal[w],
                p_response[w],
                p_response_presup[w],
                p_response_literal[w],
                f=options.OUT_PATH + "-stats." + str(get_rank()) + ".txt")

    return 0
Exemple #7
0

def make_h0(value=None):
    return GaussianLOTHypothesis(grammar, value=value)


if __name__ == "__main__":

    # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    # the running function

    def run(*args):

        # starting hypothesis -- here this generates at random
        h0 = GaussianLOTHypothesis(grammar,
                                   prior_temperature=PRIOR_TEMPERATURE)

        # We store the top 100 from each run
        pq = FiniteBestSet(100, max=True, key="posterior_score")
        pq.add(mh_sample(h0, data, STEPS, skip=SKIP))

        return pq

    finitesample = FiniteBestSet(max=True)  # the finite sample of all
    results = map(run, [[None]] * CHAINS)  # Run on a single core
    finitesample.merge(results)

    ## and display
    for r in finitesample.get_all(decreasing=False, sorted=True):
        print r.posterior_score, r.prior, r.likelihood, qq(str(r))
Exemple #8
0
        fbs.add(h, h.posterior_score)

    return fbs


## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
### MPI map
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from SimpleMPI.MPI_map import MPI_map, is_master_process

allret = MPI_map(run, map(lambda x: [x], [0.01, 0.1, 1.0] * 100 ))

if is_master_process():

    allfbs = FiniteBestSet(max=True)
    allfbs.merge(allret)

    H = allfbs.get_all()

    for h in H:
        h.likelihood_temperature = 0.01 # on what set of data we want?
        h.compute_posterior(data)

    # show the *average* ll for each hypothesis
    for h in sorted(H, key=lambda h: h.posterior_score):
        print h.posterior_score, h.prior, h.likelihood, h.likelihood_temperature
        print h

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Play around with some different inference schemes