def run(data_amount): print "Starting chain on %s data points" % data_amount data = makeVariableLexiconData(eval(options.word), options.word, the_context, n=data_amount, s=options.s, alpha=options.alpha, verbose=True) h0 = KinshipLexicon(words=[options.word], alpha=options.alpha) h0.set_word( options.word, LOTHypothesis(grammar, value=None, display='lambda recurse_, C, X:%s')) hyps = TopN(N=options.top_count) mhs = MHSampler(h0, data, options.steps, likelihood_temperature=options.llt, prior_temperature=options.prior_temp) for samples_yielded, h in break_ctrlc(enumerate(mhs)): if samples_yielded % 1000 == 0: print h.prior, h.likelihood, h hyps.add(h) return hyps
def scheme_generate(): """ This generates random scheme code with cons, cdr, and car, and evaluates it on some simple list structures. No inference here -- just random sampling from a grammar. """ example_input = [ [], [[]], [[], []], [[[]]] ] ## Generate some and print out unique ones seen = set() for i in break_ctrlc(xrange(10000)): x = grammar.generate('START') if x not in seen: seen.add(x) # make the function node version f = LOTHypothesis(grammar, value=x, args=['x']) print x.log_probability(), x for ei in example_input: print "\t", ei, " -> ", f(ei)
def make_hypothesis(): h = CCGLexicon(alpha=0.9, palpha=0.9, likelihood_temperature=1.0) for w in all_words: h.set_word(w, LOTHypothesis(grammar, args=['C'])) return h
def run(damount): lexicon, L, hugeData = normalize(damount) words = target.all_words() def propose(current_state, bag=lexicon, probs=L): mod = len(current_state.all_words()) proposal = copy(current_state) proposal.value[words[propose.inx % mod]].value = weighted_sample( bag[words[propose.inx % mod]], probs=probs[words[propose.inx % mod]], log=True).value propose.inx += 1 return proposal propose.inx = 0 proposer = lambda x: propose(x) h0 = KinshipLexicon(alpha=options.alpha, epsilon=options.epsilon, s=options.s) for w in target.all_words(): h0.set_word( w, LOTHypothesis(my_grammar, display='lambda recurse_, C, X: %s')) gs = Gibbs(h0, hugeData, proposer=proposer, steps=options.samples) hyps = TopN(N=options.top_count) for s, h in enumerate(gs): hyps.add(h) print h.prior, \ h.likelihood, \ h return hyps
def make_hypothesis(**kwargs): h = EvenOddLexicon(**kwargs) for w in WORDS: h.set_word(w, LOTHypothesis(grammar, args=['lexicon', 'x'])) return h
def make_hypothesis(**kwargs): h = EvenOddLexicon(**kwargs) for w in WORDS: h.set_word(w, LOTHypothesis(grammar, display='lambda lexicon, x: %s')) return h
def run(data_pts): print "Start run on ", str(data_pts) y = [pt.Y for pt in data_pts] filename = "".join(y) hyps = TopN(N=options.TOP_COUNT) h0 = KinshipLexicon(alpha=options.ALPHA) h0.set_word('Word', LOTHypothesis(my_grammar, value=None, display='lambda recurse_, C, X:%s')) mhs = MHSampler(h0, data_pts, options.STEPS, likelihood_temperature=options.llt) for samples_yielded, h in break_ctrlc(enumerate(mhs)): hyps.add(h) with open(options.OUT_PATH + filename + '.pkl', 'w') as f: pickle.dump(hyps, f) return filename, hyps
def run(hypothesis, data_amount): print "Starting chain on %s data points" % data_amount data = makeLexiconData(target, four_gen_tree_context, n=data_amount, alpha=options.alpha, verbose=True) h0 = KinshipLexicon(alpha=options.alpha) for w in target_words: h0.set_word( w, LOTHypothesis(grammar=my_grammar, value=hypothesis.value[w].value, display='lambda recurse_, C, X: %s')) hyps = TopN(N=options.top_count) mhs = MHSampler(h0, data, options.steps, likelihood_temperature=options.llt, prior_temperature=options.prior_temp) for samples_yielded, h in break_ctrlc(enumerate(mhs)): if samples_yielded % 100 == 0: pass #print h.likelihood, h.prior, h hyps.add(h) import pickle print 'Writing ' + data[0].X + data[0].Y + str( data_amount) + data[0].word + '.pkl' with open( 'Chains/' + data[0].X + data[0].Y + str(data_amount) + data[0].word + '.pkl', 'w') as f: pickle.dump(hyps, f) return hyps
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Main running code # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if __name__ == "__main__": from optparse import OptionParser from LOTlib import break_ctrlc from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler parser = OptionParser() parser.add_option("--in", dest="IN", type="string", help="Input data file", default=DEFAULT_DATA) options, _ = parser.parse_args() words, data = load_words_and_data(options.IN) L0 = PureLambdaLexicon(likelihood_temperature=1.0) for w in words: L0.set_word(w, LOTHypothesis(grammar, args=[], maxnodes=15)) for L in break_ctrlc(MHSampler(L0, data)): # print_lexicon_and_data(L, data) # If you want to see all the output for each data point, use this print L.posterior_score, L.prior, L.likelihood print L, "\n"
def make_ho(value=None): return LOTHypothesis( grammar, value=value, args=['x', 'y'], ALPHA=0.999 ) # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Standard exports from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis def make_ho(value=None): return LOTHypothesis( grammar, value=value, args=['x', 'y'], ALPHA=0.999 ) # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary if __name__ == "__main__": # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Run mcmc # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Proposals.RegenerationProposal import * #mp = MixtureProposal([RegenerationProposal(grammar), InsertDeleteProposal(grammar)] ) mp = RegenerationProposal(grammar) h0 = LOTHypothesis( grammar, args=['x', 'y'], ALPHA=0.999, proposal_function=mp ) # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary from LOTlib.Inference.MetropolisHastings import mh_sample for h in mh_sample(h0, data, 4000000, skip=100): print h.posterior_score, h.likelihood, h.prior, cleanFunctionNodeString( h) print map(lambda d: h(*d.input), data) print "\n"
def make_hypothesis(): return LOTHypothesis(grammar, args=['C'])
collapsed_prob = grammar.log_probability(collapsed_forms[resps]) collapsed_forms[resps].my_log_probability = logplusexp(collapsed_prob, tprior) if tprior > collapsed_forms[resps].display_tree_probability: # display the most concise form collapsed_forms[resps] = t collapsed_forms[resps].display_tree_probability = tprior else: collapsed_forms[resps] = t collapsed_forms[resps].display_tree_probability = tprior t.my_log_probability = tprior # FunctionNode uses this value when we call log_probability() print ">>", all_tree_count, len(collapsed_forms), t, tprior ############################################ ### Now actually enumarate trees for t in grammar.enumerate(d=DEPTH): if 'presup_(False' in str(t): continue if not check_expansion(t): continue if t.count_subnodes() <= MAX_NODES: add_to_collapsed_trees(t) all_tree_count += 1 print ">", t, grammar.log_probability(t) ## for kinder saving and unsaving: upq = TopN() for k in collapsed_forms.values(): upq.add(LOTHypothesis(grammar, k, display='lambda context: %s'), 0.0) pickle.dump(upq, open(OUT, 'w')) print "Total tree count: ", all_tree_count
#'9': lambda context: (presup_(cardinalityeq_(context.A, context.B), nonempty_(context.A))), #'10': lambda context: (presup_(cardinalitygt_(context.B, context.A), nonempty_(context.A))), # # 'few': lambda context: presup_( # True, cardinalitygt_(3, intersection_(context.A, context.B))), # 'many': lambda context: presup_( # True, cardinalitygt_(intersection_(context.A, context.B), 3)), # 'half': lambda context: presup_( # nonempty_(context.A), cardinalityeq_(intersection_(context.A, context.B), # setdifference_(context.A, context.B))) } target = H.GriceanQuantifierLexicon(make_my_hypothesis, my_weight_function) for w, f in target_functions.items(): target.set_word(w, LOTHypothesis(G.grammar, value='SET_IN_TARGET', f=f)) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~# #~~~ Generate data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~# #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~# def generate_data(data_size): all_words = target.all_words() data = [] for i in break_ctrlc(xrange(data_size)): # a context is a set of men, pirates, and everything. functions are applied to this to get truth values context = sample_context() word = target.sample_utterance(all_words, context) data.append(
# Or we can make them as hypotheses (functions of S): #for i in xrange(100): #print LOTHypothesis(grammar, args=['S']) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Or real inference: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.DataAndObjects import FunctionData, Obj # for nicely managing data from LOTlib.Inference.MetropolisHastings import mh_sample # for running MCMC # Make up some data -- here just one set containing {red, red, green} colors data = [ FunctionData(input=[ {Obj(color='red'), Obj(color='red'), Obj(color='green')} ], \ output=True) ] # Create an initial hypothesis h0 = LOTHypothesis(grammar, args=['S']) # OR if we want to specify and use insert/delete proposals #from LOTlib.Proposals import * #h0 = LOTHypothesis(grammar, proposal_function=MixtureProposal(grammar, [RegenerationProposal(grammar), InsertDeleteProposal(grammar)] ) ) if __name__ == "__main__": # MCMC! for h in mh_sample(h0, data, 4000): # run sampler #for h in unique(mh_sample(h0, data, 4000)): # get unique samples # hypotheses' .prior, .likelihood, and .posterior_score are set in mh_sample print h.likelihood, h.prior, h.posterior_score, h
def make_my_hypothesis(): return LOTHypothesis(grammar, args=['context'])
def make_hyps(): return LOTHypothesis(default_grammar, value=None, display='lambda recurse_, C, X:%s')
def updateLexicon(lexicon, grammar=default_grammar, **kwargs): h = KinshipLexicon(**kwargs) for w in lexicon.all_words(): hw = lexicon.value[w] hw.grammar = grammar h.set_word(w, hw) return h if __name__ == "__main__": from Model.Givens import english_words, four_gen_tree_context, english from Model.Data import makeTreeLexiconData, makeZipfianLexiconData, engFreq from Grammar import makeGrammar #rgrammar = makeGrammar(['Mira','Snow','charming','rump','neal','baelfire','Emma','Regina','henry','Maryann','ego'], # compositional=True, terms=['X','objects','all'], nterms=['Tree', 'Set', 'Gender'], # recursive=True, words=english_words) gramm = makeGrammar(four_gen_tree_context.objects, nterms=['Tree', 'Set', 'Gender', 'Generation']) h0 = KinshipLexicon(alpha=0.9, epsilon=0.99, s=0.0) for w in english_words: h0.set_word(w, LOTHypothesis(gramm, display='lambda recurse_, C, X: %s')) for _ in xrange(10): dat = makeZipfianLexiconData(english, four_gen_tree_context, engFreq, n=10) print h0.compute_posterior(dat)
from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler from LOTlib.Projects.Quantifier.Model import * ALPHA = 0.9 SAMPLES = 100000 DATA_SIZE = 1000 if __name__ == "__main__": ## sample the target data data = generate_data(DATA_SIZE) W = 'every' # Now to use it as a LOTHypothesis, we need data to have an "output" field which is true/false for whether its the target word. This is then used by LOTHypothesis.compute_likelihood to see if we match or not with whether a word was said (ignoring the other words -- that's why its a pseudolikelihood) for di in data: di.output = (di.utterance == W) #print (di.word == W) FBS = TopN(N=100) H = LOTHypothesis(grammar, display='lambda A,B,S: %s', ALPHA=ALPHA) # Now just run the sampler with a LOTHypothesis for s in MHSampler(H, data, SAMPLES, skip=10): #print s.lp, "\t", s.prior, "\t", s.likelihood, "\n", s, "\n\n" FBS.push(s, s.lp) for k in reversed(FBS.get_all(sorted=True)): print k.lp, k.prior, k.likelihood, k
from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis from LOTlib.Inference.Samplers.MetropolisHastings import mh_sample from LOTlib.Examples.Quantifier.Model import * ALPHA = 0.9 SAMPLES = 100000 DATA_SIZE = 1000 if __name__ == "__main__": ## sample the target data data = generate_data(DATA_SIZE) W = 'every' # Now to use it as a LOTHypothesis, we need data to have an "output" field which is true/false for whether its the target word. This is then used by LOTHypothesis.compute_likelihood to see if we match or not with whether a word was said (ignoring the other words -- that's why its a pseudolikelihood) for di in data: di.output = (di.word == W) #print (di.word == W) FBS = FiniteBestSet(max=True, N=100) H = LOTHypothesis(grammar, args=['A', 'B', 'S'], ALPHA=ALPHA) # Now just run the sampler with a LOTHypothesis for s in mh_sample(H, data, SAMPLES, skip=10): #print s.lp, "\t", s.prior, "\t", s.likelihood, "\n", s, "\n\n" FBS.push(s, s.lp) for k in reversed(FBS.get_all(sorted=True)): print k.lp, k.prior, k.likelihood, k
def make_hypothesis(data=DEFAULT_DATA, **kwargs): words, data = load_words_and_data(data) L0 = PureLambdaLexicon(**kwargs) for w in words: L0.set_word(w, LOTHypothesis(grammar, args=[], maxnodes=15)) return L0
def make_my_hypothesis(): return LOTHypothesis(G.grammar, display='lambda context: %s')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # if_ gets printed specially (see LOTlib.FunctionNode.__str__). Here COND is a name that is made up # here for conditional expressions grammar.add_rule('EXPR', 'if_', ['COND', 'EXPR', 'EXPR'], 1.0) grammar.add_rule('COND', 'gt_', ['EXPR', 'EXPR'], 1.0) grammar.add_rule('COND', 'eq_', ['EXPR', 'EXPR'], 1.0) # Note that because if_ prints specially in FunctionNode, it is correctly handled (via short circuit evaluation) # so that we don't eval both branches unnecessarily if __name__ == "__main__": for _ in xrange(1000): t = grammar.generate( ) # Default is to generate from 'START'; else use 'START=t' to generate from type t # We can make this into a function by adding a lambda and a variable name, corresponding to # the argument "x" that we built into the grammar. This step is defaultly done by a a LOTHypothesis (see below) f = evaluate_expression('lambda x:%s' % t) print t # will call x.__str__ and display as a pythonesque string print map(f, range(0, 10)) # Alternatively, we can just make a LOTHypothesis, which is typically the only place in LOTlib we use trees from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis h = LOTHypothesis(grammar, value=t, args=['x']) print map(h, range(0, 10))