def __init__(self, **kwargs): LOTHypothesis.__init__(self, grammar=grammar, maxnodes=400, display='lambda from_seq: %s', **kwargs)
def __init__(self, grammar=None, display="lambda C, lex_, x: %s", **kwargs): # lexicon, x arg, context LOTHypothesis.__init__(self, grammar=grammar, display=display, **kwargs)
def __init__(self, **kwargs): self.start_counts = {} LOTHypothesis.__init__(self, grammar=grammar, maxnodes=400, display='lambda : %s', **kwargs)
def __init__(self, constant_sd=1.0, fit_only_once=True, **kwargs): """ :param constant_sd: The SD of our constants in the prior :param fit_only_once: Do we fit multiple times or just take the first? """ LOTHypothesis.__init__(self, grammar, display='lambda x,'+','.join(CONSTANT_NAMES)+": %s", **kwargs) self.constant_sd=constant_sd # also the prior SD self.parameters = self.sample_constants() self.fit_only_once = fit_only_once
def __init__(self, **kwargs): LOTHypothesis.__init__(self, grammar=grammar, maxnodes=400, display="lambda C: %s", **kwargs) if 'sp' in kwargs: self.use_size_principle = kwargs['sp'] else: self.use_size_principle = False
def __init__(self, grammar, value=None, f=None, proposal_function=None, **kwargs): LOTHypothesis.__init__(self, grammar, proposal_function=proposal_function, **kwargs) if value is None: self.set_value(grammar.generate('WORD'), f) else: self.set_value(value, f)
def __init__(self, constant_sd=1.0, fit_only_once=True, **kwargs): """ :param constant_sd: The SD of our constants in the prior :param fit_only_once: Do we fit multiple times or just take the first? """ LOTHypothesis.__init__(self, grammar, display='lambda x,' + ','.join(CONSTANT_NAMES) + ": %s", **kwargs) self.constant_sd = constant_sd # also the prior SD self.parameters = self.sample_constants() self.fit_only_once = fit_only_once
def __call__(self, *vals): """ Must overwrite call in order to include the constants """ vals = list(vals) vals.extend(self.parameters) return LOTHypothesis.__call__(self, *vals)
def run(data_amount): print "Starting chain on %s data points" % data_amount data = makeVariableLexiconData(eval(options.word), options.word, the_context, n=data_amount, s=options.s, alpha=options.alpha, verbose=True) h0 = KinshipLexicon(words=[options.word], alpha=options.alpha) h0.set_word( options.word, LOTHypothesis(grammar, value=None, display='lambda recurse_, C, X:%s')) hyps = TopN(N=options.top_count) mhs = MHSampler(h0, data, options.steps, likelihood_temperature=options.llt, prior_temperature=options.prior_temp) for samples_yielded, h in break_ctrlc(enumerate(mhs)): if samples_yielded % 1000 == 0: print h.prior, h.likelihood, h hyps.add(h) return hyps
def make_hypothesis(): h = CCGLexicon(alpha=0.9, palpha=0.9, likelihood_temperature=1.0) for w in all_words: h.set_word(w, LOTHypothesis(grammar, args=['C'])) return h
def __call__(self, *vals): """ Must overwrite call in order to include the constants """ vals = list(vals) vals.extend(self.CONSTANT_VALUES) return LOTHypothesis.__call__(self, *vals)
def __call__(self, *args): try: # try to do it from the superclass return LOTHypothesis.__call__(self, *args) except ZeroDivisionError: # and if we get an error, return nan return float("nan")
def run(damount): lexicon, L, hugeData = normalize(damount) words = target.all_words() def propose(current_state, bag=lexicon, probs=L): mod = len(current_state.all_words()) proposal = copy(current_state) proposal.value[words[propose.inx % mod]].value = weighted_sample( bag[words[propose.inx % mod]], probs=probs[words[propose.inx % mod]], log=True).value propose.inx += 1 return proposal propose.inx = 0 proposer = lambda x: propose(x) h0 = KinshipLexicon(alpha=options.alpha, epsilon=options.epsilon, s=options.s) for w in target.all_words(): h0.set_word( w, LOTHypothesis(my_grammar, display='lambda recurse_, C, X: %s')) gs = Gibbs(h0, hugeData, proposer=proposer, steps=options.samples) hyps = TopN(N=options.top_count) for s, h in enumerate(gs): hyps.add(h) print h.prior, \ h.likelihood, \ h return hyps
def scheme_generate(): """ This generates random scheme code with cons, cdr, and car, and evaluates it on some simple list structures. No inference here -- just random sampling from a grammar. """ example_input = [ [], [[]], [[], []], [[[]]] ] ## Generate some and print out unique ones seen = set() for i in break_ctrlc(xrange(10000)): x = grammar.generate('START') if x not in seen: seen.add(x) # make the function node version f = LOTHypothesis(grammar, value=x, args=['x']) print x.log_probability(), x for ei in example_input: print "\t", ei, " -> ", f(ei)
def to_maximize(fit_params): self.parameters = fit_params.tolist() # set these # And return the original likelihood, which by get_function_responses above uses this constant_prior = sum( map(lambda x: normlogpdf(x, 0.0, self.constant_sd), self.parameters)) return -(LOTHypothesis.compute_likelihood(self, data) + constant_prior)
def __call__(self, *args, **kwargs): # we have to mod this to insert the spaces since they aren't part of cons above ret = LOTHypothesis.__call__(self, *args, **kwargs) out = dict() for k, v in ret.items(): out[' '.join(k)] = v return out
def make_hypothesis(**kwargs): h = EvenOddLexicon(**kwargs) for w in WORDS: h.set_word(w, LOTHypothesis(grammar, args=['lexicon', 'x'])) return h
def __call__(self, *args, **kwargs): # we have to mod this to insert the spaces since they aren't part of cons above ret = LOTHypothesis.__call__(self, *args, **kwargs) out = dict() for k, v in ret.items(): out[" ".join(k)] = v return out
def make_hypothesis(**kwargs): h = EvenOddLexicon(**kwargs) for w in WORDS: h.set_word(w, LOTHypothesis(grammar, display='lambda lexicon, x: %s')) return h
def prior_sample(h0, data, N): """ Just use the grammar and returntype of h0 to sample from the prior NOTE: Only implemented for LOTHypothesis """ assert isinstance(h0, LOTHypothesis) # extract from the grammar G = h0.grammar rt = h0.value.returntype for i in xrange(N): if LOTlib.SIG_INTERRUPTED: break h = LOTHypothesis(G, start=rt) h.compute_posterior(data) yield h
def __call__(self, *args, **kwargs): d = LOTHypothesis.__call__(self, *args, **kwargs) # now returns a dictionary # go through and reformat the keys to have spaces #NOTE: requires that terminals are each single chars, see assertion above out = dict() for k, v in d.items(): out[' '.join(k)] = v return out
def __call__(self, *args, **kwargs): if self.value_set is None: value_set = LOTHypothesis.__call__(self) # Restrict our concept to being within our domain; also handle 'None' call values if isinstance(value_set, set): value_set = [x for x in value_set if x <= self.domain] else: value_set = [] self.value_set = value_set return self.value_set
def __call__(self, *args, **kwargs): # Sometimes self.value has too many nodes try: value_set = LOTHypothesis.__call__(self) except TooBigException: value_set = set() if isinstance(value_set, set): # Restrict our concept to being within our domain value_set = [x for x in value_set if (1 <= x <= self.domain)] else: # Sometimes self() returns None value_set = set() return value_set
def run(data_pts): print "Start run on ", str(data_pts) y = [pt.Y for pt in data_pts] filename = "".join(y) hyps = TopN(N=options.TOP_COUNT) h0 = KinshipLexicon(alpha=options.ALPHA) h0.set_word('Word', LOTHypothesis(my_grammar, value=None, display='lambda recurse_, C, X:%s')) mhs = MHSampler(h0, data_pts, options.STEPS, likelihood_temperature=options.llt) for samples_yielded, h in break_ctrlc(enumerate(mhs)): hyps.add(h) with open(options.OUT_PATH + filename + '.pkl', 'w') as f: pickle.dump(hyps, f) return filename, hyps
def run(hypothesis, data_amount): print "Starting chain on %s data points" % data_amount data = makeLexiconData(target, four_gen_tree_context, n=data_amount, alpha=options.alpha, verbose=True) h0 = KinshipLexicon(alpha=options.alpha) for w in target_words: h0.set_word( w, LOTHypothesis(grammar=my_grammar, value=hypothesis.value[w].value, display='lambda recurse_, C, X: %s')) hyps = TopN(N=options.top_count) mhs = MHSampler(h0, data, options.steps, likelihood_temperature=options.llt, prior_temperature=options.prior_temp) for samples_yielded, h in break_ctrlc(enumerate(mhs)): if samples_yielded % 100 == 0: pass #print h.likelihood, h.prior, h hyps.add(h) import pickle print 'Writing ' + data[0].X + data[0].Y + str( data_amount) + data[0].word + '.pkl' with open( 'Chains/' + data[0].X + data[0].Y + str(data_amount) + data[0].word + '.pkl', 'w') as f: pickle.dump(hyps, f) return hyps
def __init__(self, *args, **kwargs ): LOTHypothesis.__init__(self, grammar, display='lambda x,y: %s', **kwargs) super(CRHypothesis, self).__init__(*args, **kwargs)
def __init__(self, ALPHA=0.9, **kwargs): LOTHypothesis.__init__(self, grammar, **kwargs) self.ALPHA = ALPHA
def __init__(self, value=None, alpha=0.99, baserate=0.5): LOTHypothesis.__init__(self, grammar, value=value, display='lambda S, x: %s', alpha=alpha, baserate=baserate)
def __init__(self, value=None, base_facts="", **kwargs): self.base_facts = base_facts # must be set before initializer LOTHypothesis.__init__(self, grammar, value=value, args=None, **kwargs)
def compile_function(self): self.value_set = None return LOTHypothesis.compile_function(self)
def to_maximize(fit_params): self.parameters = fit_params.tolist() # set these # And return the original likelihood, which by get_function_responses above uses this constant_prior = sum(map(lambda x: normlogpdf(x,0.0,self.constant_sd), self.parameters)) return -(LOTHypothesis.compute_likelihood(self, data) + constant_prior)
def __init__(self, grammar=grammar, **kwargs): LOTHypothesis.__init__(self, grammar, display='lambda C : %s', maxnodes=200, **kwargs) # self.outlier = -100 # for MultinomialLikelihoodLog self.alphabet_size = len(TERMINALS)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Standard exports from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis def make_ho(value=None): return LOTHypothesis( grammar, value=value, args=['x', 'y'], ALPHA=0.999 ) # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary if __name__ == "__main__": # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Run mcmc # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Proposals.RegenerationProposal import * #mp = MixtureProposal([RegenerationProposal(grammar), InsertDeleteProposal(grammar)] ) mp = RegenerationProposal(grammar) h0 = LOTHypothesis( grammar, args=['x', 'y'], ALPHA=0.999, proposal_function=mp ) # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary from LOTlib.Inference.MetropolisHastings import mh_sample for h in mh_sample(h0, data, 4000000, skip=100): print h.posterior_score, h.likelihood, h.prior, cleanFunctionNodeString( h) print map(lambda d: h(*d.input), data) print "\n"
def __init__(self, grammar=None, display="lambda recurse_: %s", **kwargs): LOTHypothesis.__init__(self, grammar=grammar, display=display, **kwargs)
def __init__(self, grammar=None, **kwargs): LOTHypothesis.__init__(self, grammar, display="lambda : %s", **kwargs) self.outlier = -1000 # for MultinomialLikelihood
def __init__(self, grammar=None, value=None, domain=100, **kwargs): LOTHypothesis.__init__(self, grammar=grammar, value=value, args=[], **kwargs) self.domain = domain
def __init__(self, **kwargs): LOTHypothesis.__init__(self, grammar, **kwargs)
def __call__(self, *args): try: return LOTHypothesis.__call__(self, *args) except EvaluationException: return None
def __init__(self, grammar, alpha=0.9, domain=100, **kwargs): LOTHypothesis.__init__(self, grammar, args=[], **kwargs) self.alpha = alpha self.domain = domain
def make_ho(value=None): return LOTHypothesis( grammar, value=value, args=['x', 'y'], ALPHA=0.999 ) # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary
def __init__(self, **kwargs ): LOTHypothesis.__init__(self, grammar, args=['x', 'y'], **kwargs)
def compute_prior(self): # Add together the LOT prior and the constant prior, here just a gaussian return LOTHypothesis.compute_prior(self) +\ sum(map(lambda x: normlogpdf(x,0.0,self.constant_sd), self.parameters))
def __init__(self, grammar, value=None, f=None, proposal_function=None, **kwargs): LOTHypothesis.__init__(self,grammar,proposal_function=proposal_function, **kwargs) if value is None: self.set_value(grammar.generate('WORD'), f) else: self.set_value(value, f)
def __init__(self, *args, **kwargs): LOTHypothesis.__init__(self, grammar, display='lambda x,y: %s', **kwargs) super(CRHypothesis, self).__init__(*args, **kwargs)
#'9': lambda context: (presup_(cardinalityeq_(context.A, context.B), nonempty_(context.A))), #'10': lambda context: (presup_(cardinalitygt_(context.B, context.A), nonempty_(context.A))), # # 'few': lambda context: presup_( # True, cardinalitygt_(3, intersection_(context.A, context.B))), # 'many': lambda context: presup_( # True, cardinalitygt_(intersection_(context.A, context.B), 3)), # 'half': lambda context: presup_( # nonempty_(context.A), cardinalityeq_(intersection_(context.A, context.B), # setdifference_(context.A, context.B))) } target = H.GriceanQuantifierLexicon(make_my_hypothesis, my_weight_function) for w, f in target_functions.items(): target.set_word(w, LOTHypothesis(G.grammar, value='SET_IN_TARGET', f=f)) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~# #~~~ Generate data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~# #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~# def generate_data(data_size): all_words = target.all_words() data = [] for i in break_ctrlc(xrange(data_size)): # a context is a set of men, pirates, and everything. functions are applied to this to get truth values context = sample_context() word = target.sample_utterance(all_words, context) data.append(
from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis from LOTlib.Inference.Samplers.MetropolisHastings import mh_sample from LOTlib.Examples.Quantifier.Model import * ALPHA = 0.9 SAMPLES = 100000 DATA_SIZE = 1000 if __name__ == "__main__": ## sample the target data data = generate_data(DATA_SIZE) W = 'every' # Now to use it as a LOTHypothesis, we need data to have an "output" field which is true/false for whether its the target word. This is then used by LOTHypothesis.compute_likelihood to see if we match or not with whether a word was said (ignoring the other words -- that's why its a pseudolikelihood) for di in data: di.output = (di.word == W) #print (di.word == W) FBS = FiniteBestSet(max=True, N=100) H = LOTHypothesis(grammar, args=['A', 'B', 'S'], ALPHA=ALPHA) # Now just run the sampler with a LOTHypothesis for s in mh_sample(H, data, SAMPLES, skip=10): #print s.lp, "\t", s.prior, "\t", s.likelihood, "\n", s, "\n\n" FBS.push(s, s.lp) for k in reversed(FBS.get_all(sorted=True)): print k.lp, k.prior, k.likelihood, k
def __init__(self, grammar=grammar, **kwargs): LOTHypothesis.__init__(self, grammar=grammar, args=['x'], **kwargs)
def __init__(self, grammar, domain=100, noise=0.9, args=['n'], **kwargs): LOTHypothesis.__init__(self, grammar, args=args, **kwargs) self.domain = domain self.noise = noise
def make_my_hypothesis(): return LOTHypothesis(G.grammar, display='lambda context: %s')
def __init__(self, grammar=grammar, **kwargs): LOTHypothesis.__init__(self, grammar=grammar, args=["x"], **kwargs)
def __init__(self, grammar, value=None, alpha=0.9, domain=100, **kwargs): LOTHypothesis.__init__(self, grammar, value=value, args=[], **kwargs) self.alpha = alpha self.domain = domain self.value_set = None
def __call__(self): out = LOTHypothesis.__call__(self) #if len(out) != MAX: #want to be able to generalize to N-lengthed sequences #sout = "" return out
def __init__(self, grammar=None, **kwargs): LOTHypothesis.__init__(self, grammar, display='lambda : %s', **kwargs)
def __init__(self, **kwargs): LOTHypothesis.__init__(self, grammar, display="lambda x,y: %s", **kwargs)