def run(data_amount):
    print "Starting chain on %s data points" % data_amount
    data = makeVariableLexiconData(eval(options.word),
                                   options.word,
                                   the_context,
                                   n=data_amount,
                                   s=options.s,
                                   alpha=options.alpha,
                                   verbose=True)

    h0 = KinshipLexicon(words=[options.word], alpha=options.alpha)
    h0.set_word(
        options.word,
        LOTHypothesis(grammar, value=None, display='lambda recurse_, C, X:%s'))

    hyps = TopN(N=options.top_count)

    mhs = MHSampler(h0,
                    data,
                    options.steps,
                    likelihood_temperature=options.llt,
                    prior_temperature=options.prior_temp)

    for samples_yielded, h in break_ctrlc(enumerate(mhs)):
        if samples_yielded % 1000 == 0:
            print h.prior, h.likelihood, h
        hyps.add(h)

    return hyps
Ejemplo n.º 2
0
def scheme_generate():
    """ This generates random scheme code with cons, cdr, and car, and evaluates it on some simple list
    structures.

    No inference here -- just random sampling from a grammar.
    """

    example_input = [
        [],
        [[]],
        [[], []],
        [[[]]]
        ]

    ## Generate some and print out unique ones
    seen = set()
    for i in break_ctrlc(xrange(10000)):
        x = grammar.generate('START')

        if x not in seen:
            seen.add(x)

            # make the function node version
            f = LOTHypothesis(grammar, value=x, args=['x'])

            print x.log_probability(), x
            for ei in example_input:
                print "\t", ei, " -> ", f(ei)
Ejemplo n.º 3
0
def make_hypothesis():

    h = CCGLexicon(alpha=0.9, palpha=0.9, likelihood_temperature=1.0)
    for w in all_words:
        h.set_word(w, LOTHypothesis(grammar, args=['C']))

    return h
Ejemplo n.º 4
0
def run(damount):
    lexicon, L, hugeData = normalize(damount)
    words = target.all_words()

    def propose(current_state, bag=lexicon, probs=L):
        mod = len(current_state.all_words())
        proposal = copy(current_state)
        proposal.value[words[propose.inx % mod]].value = weighted_sample(
            bag[words[propose.inx % mod]],
            probs=probs[words[propose.inx % mod]],
            log=True).value
        propose.inx += 1
        return proposal

    propose.inx = 0
    proposer = lambda x: propose(x)

    h0 = KinshipLexicon(alpha=options.alpha,
                        epsilon=options.epsilon,
                        s=options.s)
    for w in target.all_words():
        h0.set_word(
            w, LOTHypothesis(my_grammar, display='lambda recurse_, C, X: %s'))

    gs = Gibbs(h0, hugeData, proposer=proposer, steps=options.samples)
    hyps = TopN(N=options.top_count)
    for s, h in enumerate(gs):
        hyps.add(h)
        print h.prior, \
            h.likelihood, \
            h
    return hyps
Ejemplo n.º 5
0
def make_hypothesis(**kwargs):

    h = EvenOddLexicon(**kwargs)

    for w in WORDS:
        h.set_word(w, LOTHypothesis(grammar, args=['lexicon', 'x']))

    return h
Ejemplo n.º 6
0
def make_hypothesis(**kwargs):

    h = EvenOddLexicon(**kwargs)

    for w in WORDS:
        h.set_word(w, LOTHypothesis(grammar, display='lambda lexicon, x: %s'))

    return h
Ejemplo n.º 7
0
def run(data_pts):
    print "Start run on ", str(data_pts)

    y = [pt.Y for pt in data_pts]
    filename = "".join(y)

    hyps = TopN(N=options.TOP_COUNT)
    h0 = KinshipLexicon(alpha=options.ALPHA)
    h0.set_word('Word', LOTHypothesis(my_grammar, value=None, display='lambda recurse_, C, X:%s'))
    mhs = MHSampler(h0, data_pts, options.STEPS, likelihood_temperature=options.llt)

    for samples_yielded, h in break_ctrlc(enumerate(mhs)):
        hyps.add(h)

    with open(options.OUT_PATH + filename + '.pkl', 'w') as f:
        pickle.dump(hyps, f)

    return filename, hyps
Ejemplo n.º 8
0
def run(hypothesis, data_amount):
    print "Starting chain on %s data points" % data_amount
    data = makeLexiconData(target,
                           four_gen_tree_context,
                           n=data_amount,
                           alpha=options.alpha,
                           verbose=True)

    h0 = KinshipLexicon(alpha=options.alpha)
    for w in target_words:
        h0.set_word(
            w,
            LOTHypothesis(grammar=my_grammar,
                          value=hypothesis.value[w].value,
                          display='lambda recurse_, C, X: %s'))

    hyps = TopN(N=options.top_count)

    mhs = MHSampler(h0,
                    data,
                    options.steps,
                    likelihood_temperature=options.llt,
                    prior_temperature=options.prior_temp)

    for samples_yielded, h in break_ctrlc(enumerate(mhs)):
        if samples_yielded % 100 == 0:
            pass  #print h.likelihood, h.prior, h
        hyps.add(h)

    import pickle
    print 'Writing ' + data[0].X + data[0].Y + str(
        data_amount) + data[0].word + '.pkl'
    with open(
            'Chains/' + data[0].X + data[0].Y + str(data_amount) +
            data[0].word + '.pkl', 'w') as f:
        pickle.dump(hyps, f)

    return hyps
Ejemplo n.º 9
0

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Main running code
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

if __name__ == "__main__":

    from optparse import OptionParser
    from LOTlib import break_ctrlc
    from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler

    parser = OptionParser()
    parser.add_option("--in",
                      dest="IN",
                      type="string",
                      help="Input data file",
                      default=DEFAULT_DATA)
    options, _ = parser.parse_args()

    words, data = load_words_and_data(options.IN)
    L0 = PureLambdaLexicon(likelihood_temperature=1.0)
    for w in words:
        L0.set_word(w, LOTHypothesis(grammar, args=[], maxnodes=15))

    for L in break_ctrlc(MHSampler(L0, data)):
        # print_lexicon_and_data(L, data) # If you want to see all the output for each data point, use this

        print L.posterior_score, L.prior, L.likelihood
        print L, "\n"
Ejemplo n.º 10
0
def make_ho(value=None):
    return LOTHypothesis(
        grammar, value=value, args=['x', 'y'], ALPHA=0.999
    )  # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary
Ejemplo n.º 11
0
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Standard exports
from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis


def make_ho(value=None):
    return LOTHypothesis(
        grammar, value=value, args=['x', 'y'], ALPHA=0.999
    )  # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary


if __name__ == "__main__":
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Run mcmc
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    from LOTlib.Proposals.RegenerationProposal import *
    #mp = MixtureProposal([RegenerationProposal(grammar), InsertDeleteProposal(grammar)] )
    mp = RegenerationProposal(grammar)

    h0 = LOTHypothesis(
        grammar, args=['x', 'y'], ALPHA=0.999, proposal_function=mp
    )  # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary

    from LOTlib.Inference.MetropolisHastings import mh_sample
    for h in mh_sample(h0, data, 4000000, skip=100):
        print h.posterior_score, h.likelihood, h.prior, cleanFunctionNodeString(
            h)
        print map(lambda d: h(*d.input), data)
        print "\n"
Ejemplo n.º 12
0
def make_hypothesis():
    return LOTHypothesis(grammar, args=['C'])
Ejemplo n.º 13
0
        collapsed_prob = grammar.log_probability(collapsed_forms[resps])
        collapsed_forms[resps].my_log_probability = logplusexp(collapsed_prob, tprior)
        if tprior > collapsed_forms[resps].display_tree_probability: # display the most concise form
            collapsed_forms[resps] = t
            collapsed_forms[resps].display_tree_probability = tprior
    else:
        collapsed_forms[resps] = t
        collapsed_forms[resps].display_tree_probability = tprior
        t.my_log_probability = tprior # FunctionNode uses this value when we call log_probability()
        print ">>", all_tree_count, len(collapsed_forms),  t, tprior

############################################
### Now actually enumarate trees
for t in grammar.enumerate(d=DEPTH):
    if 'presup_(False' in str(t):
        continue
    if not check_expansion(t):
        continue
    if t.count_subnodes() <= MAX_NODES:
        add_to_collapsed_trees(t)
        all_tree_count += 1
        print ">", t, grammar.log_probability(t)

## for kinder saving and unsaving:
upq = TopN()
for k in collapsed_forms.values():
    upq.add(LOTHypothesis(grammar, k, display='lambda context: %s'), 0.0)
pickle.dump(upq, open(OUT, 'w'))

print "Total tree count: ", all_tree_count
Ejemplo n.º 14
0
    #'9': lambda context: (presup_(cardinalityeq_(context.A, context.B), nonempty_(context.A))),
    #'10': lambda context: (presup_(cardinalitygt_(context.B, context.A), nonempty_(context.A))),

    #
    # 'few': lambda context: presup_(
    #     True, cardinalitygt_(3, intersection_(context.A, context.B))),
    # 'many': lambda context: presup_(
    #     True, cardinalitygt_(intersection_(context.A, context.B), 3)),
    # 'half': lambda context: presup_(
    #     nonempty_(context.A), cardinalityeq_(intersection_(context.A, context.B),
    #                                          setdifference_(context.A, context.B)))
}

target = H.GriceanQuantifierLexicon(make_my_hypothesis, my_weight_function)
for w, f in target_functions.items():
    target.set_word(w, LOTHypothesis(G.grammar, value='SET_IN_TARGET', f=f))

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
#~~~ Generate data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


def generate_data(data_size):
    all_words = target.all_words()
    data = []

    for i in break_ctrlc(xrange(data_size)):
        # a context is a set of men, pirates, and everything. functions are applied to this to get truth values
        context = sample_context()
        word = target.sample_utterance(all_words, context)
        data.append(
Ejemplo n.º 15
0
# Or we can make them as hypotheses (functions of S):
#for i in xrange(100):
#print LOTHypothesis(grammar, args=['S'])

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Or real inference:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from LOTlib.DataAndObjects import FunctionData, Obj  # for nicely managing data
from LOTlib.Inference.MetropolisHastings import mh_sample  # for running MCMC

# Make up some data -- here just one set containing {red, red, green} colors
data = [ FunctionData(input=[ {Obj(color='red'), Obj(color='red'), Obj(color='green')} ], \
                      output=True) ]

# Create an initial hypothesis
h0 = LOTHypothesis(grammar, args=['S'])

# OR if we want to specify and use insert/delete proposals
#from LOTlib.Proposals import *
#h0 = LOTHypothesis(grammar, proposal_function=MixtureProposal(grammar, [RegenerationProposal(grammar), InsertDeleteProposal(grammar)] ) )

if __name__ == "__main__":

    # MCMC!
    for h in mh_sample(h0, data, 4000):  # run sampler
        #for h in unique(mh_sample(h0, data, 4000)): # get unique samples
        # hypotheses' .prior, .likelihood, and .posterior_score are set in mh_sample
        print h.likelihood, h.prior, h.posterior_score, h
Ejemplo n.º 16
0
Archivo: Shared.py Proyecto: sa-/LOTlib
def make_my_hypothesis():
    return LOTHypothesis(grammar, args=['context'])
Ejemplo n.º 17
0
def make_hyps():
    return LOTHypothesis(default_grammar,
                         value=None,
                         display='lambda recurse_, C, X:%s')
Ejemplo n.º 18
0
def updateLexicon(lexicon, grammar=default_grammar, **kwargs):
    h = KinshipLexicon(**kwargs)
    for w in lexicon.all_words():
        hw = lexicon.value[w]
        hw.grammar = grammar
        h.set_word(w, hw)
    return h


if __name__ == "__main__":

    from Model.Givens import english_words, four_gen_tree_context, english
    from Model.Data import makeTreeLexiconData, makeZipfianLexiconData, engFreq
    from Grammar import makeGrammar
    #rgrammar = makeGrammar(['Mira','Snow','charming','rump','neal','baelfire','Emma','Regina','henry','Maryann','ego'],
    #                         compositional=True, terms=['X','objects','all'], nterms=['Tree', 'Set', 'Gender'],
    #                         recursive=True, words=english_words)
    gramm = makeGrammar(four_gen_tree_context.objects,
                        nterms=['Tree', 'Set', 'Gender', 'Generation'])
    h0 = KinshipLexicon(alpha=0.9, epsilon=0.99, s=0.0)
    for w in english_words:
        h0.set_word(w, LOTHypothesis(gramm,
                                     display='lambda recurse_, C, X: %s'))

    for _ in xrange(10):
        dat = makeZipfianLexiconData(english,
                                     four_gen_tree_context,
                                     engFreq,
                                     n=10)
        print h0.compute_posterior(dat)
Ejemplo n.º 19
0
from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis
from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler
from LOTlib.Projects.Quantifier.Model import *

ALPHA = 0.9
SAMPLES = 100000
DATA_SIZE = 1000

if __name__ == "__main__":

    ## sample the target data
    data = generate_data(DATA_SIZE)

    W = 'every'

    # Now to use it as a LOTHypothesis, we need data to have an "output" field which is true/false for whether its the target word. This is then used by LOTHypothesis.compute_likelihood to see if we match or not with whether a word was said (ignoring the other words -- that's why its a pseudolikelihood)
    for di in data:
        di.output = (di.utterance == W)
        #print (di.word == W)

    FBS = TopN(N=100)

    H = LOTHypothesis(grammar, display='lambda A,B,S: %s', ALPHA=ALPHA)
    # Now just run the sampler with a LOTHypothesis
    for s in MHSampler(H, data, SAMPLES, skip=10):
        #print s.lp, "\t", s.prior, "\t", s.likelihood, "\n", s, "\n\n"
        FBS.push(s, s.lp)

    for k in reversed(FBS.get_all(sorted=True)):
        print k.lp, k.prior, k.likelihood, k
Ejemplo n.º 20
0
from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis
from LOTlib.Inference.Samplers.MetropolisHastings import mh_sample
from LOTlib.Examples.Quantifier.Model import *

ALPHA = 0.9
SAMPLES = 100000
DATA_SIZE = 1000

if __name__ == "__main__":

    ## sample the target data
    data = generate_data(DATA_SIZE)

    W = 'every'

    # Now to use it as a LOTHypothesis, we need data to have an "output" field which is true/false for whether its the target word. This is then used by LOTHypothesis.compute_likelihood to see if we match or not with whether a word was said (ignoring the other words -- that's why its a pseudolikelihood)
    for di in data:
        di.output = (di.word == W)
        #print (di.word == W)

    FBS = FiniteBestSet(max=True, N=100)

    H = LOTHypothesis(grammar, args=['A', 'B', 'S'], ALPHA=ALPHA)
    # Now just run the sampler with a LOTHypothesis
    for s in mh_sample(H, data, SAMPLES, skip=10):
        #print s.lp, "\t", s.prior, "\t", s.likelihood, "\n", s, "\n\n"
        FBS.push(s, s.lp)

    for k in reversed(FBS.get_all(sorted=True)):
        print k.lp, k.prior, k.likelihood, k
Ejemplo n.º 21
0
def make_hypothesis(data=DEFAULT_DATA, **kwargs):
    words, data = load_words_and_data(data)
    L0 = PureLambdaLexicon(**kwargs)
    for w in words:
        L0.set_word(w, LOTHypothesis(grammar, args=[], maxnodes=15))
    return L0
Ejemplo n.º 22
0
def make_my_hypothesis():
    return LOTHypothesis(G.grammar, display='lambda context: %s')
Ejemplo n.º 23
0
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# if_ gets printed specially (see LOTlib.FunctionNode.__str__). Here COND is a name that is made up
# here for conditional expressions
grammar.add_rule('EXPR', 'if_', ['COND', 'EXPR', 'EXPR'], 1.0)
grammar.add_rule('COND', 'gt_', ['EXPR', 'EXPR'], 1.0)
grammar.add_rule('COND', 'eq_', ['EXPR', 'EXPR'], 1.0)

# Note that because if_ prints specially in FunctionNode, it is correctly handled (via short circuit evaluation)
# so that we don't eval both branches unnecessarily

if __name__ == "__main__":

    for _ in xrange(1000):

        t = grammar.generate(
        )  # Default is to generate from 'START'; else use 'START=t' to generate from type t

        # We can make this into a function by adding a lambda and a variable name, corresponding to
        # the argument "x" that we built into the grammar. This step is defaultly done by a a LOTHypothesis (see below)

        f = evaluate_expression('lambda x:%s' % t)

        print t  # will call x.__str__ and display as a pythonesque string
        print map(f, range(0, 10))

        # Alternatively, we can just make a LOTHypothesis, which is typically the only place in LOTlib we use trees
        from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis
        h = LOTHypothesis(grammar, value=t, args=['x'])
        print map(h, range(0, 10))