def canIrecurse(self, data, trueset): d = [(datum.word, datum.X, datum.Y) for datum in data] hyps = [self.value[w] for w in self.all_words()] try: grammar = hyps[0].grammar except: return True # Because if it doesn't have a grammar it's a force function counts, inx, _ = create_counts(grammar, hyps) relinx = [(k[2], inx[k]) for k in inx.keys() if k[1] == 'recurse_'] if len(relinx) == 0: return True counts = np.sum(counts['SET'], axis=0) F1s = [] for wi, w in enumerate(self.all_words()): wd = [dp for dp in d if dp[0] == w] # Word Data pw = [dp for dp in trueset if dp[0] == w] # Proposed Word Data pId = [dp for dp in wd if dp in pw] # Proposed Word Data Observed precision = float(len(set(pId))) / float(len(pw) + 1e-6) recall = float(len(pId)) / float(len(wd) + 1e-6) f1 = (2. * precision * recall) / (precision + recall + 1e-6) i = [ri[1] for ri in relinx if ri[0] == q(w)] F1s.append((counts[i], w, f1, precision, recall)) if counts[i] >= 1 and f1 <= self.alpha * 2. / 3.: return False return True
def canIrecurse(self, data, trueset): d = [(datum.word, datum.X, datum.Y) for datum in data] hyps = [self.value[w] for w in self.all_words()] try: grammar = hyps[0].grammar except: return True # Because if it doesn't have a grammar it's a force function counts, inx, _ = create_counts(grammar, hyps) counts = np.sum(counts['SET'], axis=0) relinx = [(k[2], inx[k]) for k in inx.keys() if k[1] == 'recurse_'] F1s = [] for wi, w in enumerate(self.all_words()): wd = [dp for dp in d if dp[0] == w] # Word Data pw = [dp for dp in trueset if dp[0] == w] # Proposed Word Data pId = [dp for dp in wd if dp in pw] # Proposed Word Data Observed precision = float(len(set(pId))) / float(len(pw) + 1e-6) recall = float(len(pId)) / float(len(wd) + 1e-6) f1 = (2.*precision*recall) / (precision + recall + 1e-6) i = [ri[1] for ri in relinx if ri[0] == q(w)] F1s.append((counts[i], w, f1, precision, recall)) if counts[i] >= 1 and f1 <= self.alpha * 2./ 3.: return False return True
def AnBnCnGrammar(): register_primitive(flatten2str) grammar = Grammar() grammar.add_rule('START', 'flatten2str', ['LIST', 'sep=\"\"'], 1.0) grammar.add_rule('LIST', 'if_', ['BOOL', 'LIST', 'LIST'], 0.09) grammar.add_rule('BOOL', 'empty_', ['LIST'], 0.56) grammar.add_rule('BOOL', 'flip_', [''], 0.43) grammar.add_rule('LIST', 'cons_', ['ATOM', 'LIST'], 0.203) grammar.add_rule('LIST', 'cdr_', ['LIST'], 0.15) grammar.add_rule('LIST', 'car_', ['LIST'], 0.15) grammar.add_rule('LIST', '\'\'', None, 0.23) grammar.add_rule('ATOM', q('a'), None, .33) grammar.add_rule('ATOM', q('b'), None, .33) grammar.add_rule('ATOM', q('c'), None, .33) return grammar
def DyckGrammar(): register_primitive(flatten2str) TERMINAL_WEIGHT = 2. grammar = Grammar() grammar.add_rule('START', 'flatten2str', ['LIST', 'sep=\"\"'], 1.0) grammar.add_rule('BOOL', 'empty_', ['LIST'], 1.) grammar.add_rule('BOOL', 'flip_', [''], 1.0) grammar.add_rule('LIST', 'if_', ['BOOL', 'LIST', 'LIST'], 1.) grammar.add_rule('LIST', 'cons_', ['ATOM', 'LIST'], 1.) grammar.add_rule('LIST', 'cons_', ['LIST', 'LIST'], 1.) grammar.add_rule('LIST', 'cdr_', ['LIST'], 1.) grammar.add_rule('LIST', 'car_', ['LIST'], 1.) grammar.add_rule('LIST', 'recurse_', [], 1.) grammar.add_rule('LIST', '[]', None, TERMINAL_WEIGHT) grammar.add_rule('ATOM', q('('), None, TERMINAL_WEIGHT) grammar.add_rule('ATOM', q(')'), None, TERMINAL_WEIGHT) return grammar
def make_hypothesis(s, **kwargs): """ NOTE: grammar only has atom a, you need to add other atoms yourself """ grammar = eng_grammar if s == 'SimpleEnglish' else a_grammar if 'terminals' in kwargs: terminals = kwargs.pop('terminals') if terminals is not None: for e in terminals: grammar.add_rule('ATOM', q(e), None, 2) return SimpleEnglishHypothesis(grammar=grammar, **kwargs)
def run_one(iteration, probs=None): m = MixtureProposal([RegenerationProposal(grammar), InsertDeleteProposal(grammar), InverseInlineProposal(grammar)], probs=probs ) # define a wrapper to set this proposal def wrapped_make_h0(): h0 = make_h0() h0.set_proposal_function(m) return h0 sampler = MultipleChainMCMC(wrapped_make_h0, data, steps=options.SAMPLES, nchains=options.CHAINS) # Run evaluate on it, printing to the right locations evaluate_sampler(sampler, prefix="\t".join(map(str, [options.MODEL, iteration, q(str(probs)) ])), out_hypotheses=out_hypotheses, out_aggregate=out_aggregate)
def run(options, ndata): """ This out on the DATA_RANGE amounts of data and returns all hypotheses in top count """ if LOTlib.SIG_INTERRUPTED: return 0, set() language = eval(options.LANG+"()") data = language.sample_data(LARGE_SAMPLE) assert len(data) == 1 # renormalize the counts for k in data[0].output.keys(): data[0].output[k] = float(data[0].output[k] * ndata) / LARGE_SAMPLE #print data # Now add the rules to the grammar grammar = deepcopy(base_grammar) for t in language.terminals(): # add in the specifics grammar.add_rule('ATOM', q(t), None, 2) h0 = IncrementalLexiconHypothesis(grammar=grammar) tn = TopN(N=options.TOP_COUNT) for outer in xrange(options.N): # how many do we add? # add to the grammar grammar.add_rule('SELFF', '%s' % (outer), None, 1.0) # Add one more to the number of words here h0.set_word(outer, h0.make_hypothesis(grammar=grammar)) h0.N = outer+1 assert len(h0.value.keys())==h0.N==outer+1 # now run mcmc for h in break_ctrlc(MHSampler(h0, data, steps=options.STEPS)): tn.add(h) # print h.posterior_score, h # print getattr(h, 'll_counts', None) # and start from where we ended h0 = deepcopy(h) # must deepcopy return ndata, tn
def run(options, ndata): """ This out on the DATA_RANGE amounts of data and returns all hypotheses in top count """ if LOTlib.SIG_INTERRUPTED: return 0, set() language = eval(options.LANG+"()") data = language.sample_data(LARGE_SAMPLE) assert len(data) == 1 # renormalize the counts for k in data[0].output.keys(): data[0].output[k] = float(data[0].output[k] * ndata) / LARGE_SAMPLE #print data # Now add the rules to the grammar grammar = deepcopy(base_grammar) for t in language.terminals(): # add in the specifics grammar.add_rule('ATOM', q(t), None, 2) h0 = IncrementalLexiconHypothesis(grammar=grammar) tn = TopN(N=options.TOP_COUNT) for outer in xrange(options.N): # how many do we add? # add to the grammar grammar.add_rule('SELFF', '%s' % (outer), None, 1.0) # Add one more to the number of words here h0.set_word(outer, h0.make_hypothesis(grammar=grammar)) h0.N = outer+1 assert len(h0.value.keys())==h0.N==outer+1 # now run mcmc for h in break_ctrlc(MHSampler(h0, data, steps=options.STEPS)): tn.add(h) print h.posterior_score, h print getattr(h, 'll_counts', None) # and start from where we ended h0 = deepcopy(h) # must deepcopy return ndata, tn
def run(options, ndata): """ This out on the DATA_RANGE amounts of data and returns all hypotheses in top count """ if LOTlib.SIG_INTERRUPTED: return set() language = eval(options.LANG + "()") data = language.sample_data(LARGE_SAMPLE) assert len(data) == 1 # renormalize the counts for k in data[0].output.keys(): data[0].output[k] = float(data[0].output[k] * ndata) / LARGE_SAMPLE # print data # Now add the rules to the grammar grammar = deepcopy(base_grammar) for t in language.terminals(): # add in the specifics grammar.add_rule("ATOM", q(t), None, 2) h0 = AugustHypothesis(grammar=grammar, display="lambda recurse_ :%s") print "# Starting on ", h0 tn = TopN(N=options.TOP_COUNT) # print h0.compute_posterior(data) # for i, h in enumerate(break_ctrlc(MHSampler(h0, data, steps=options.STEPS))): # # for h in MHSampler(h0, data, steps=options.STEPS, trace=True): # print h.posterior_score, h # print getattr(h, 'll_counts', None) with open( prefix + "hypotheses_" + options.LANG + "_" + str(rank) + "_" + str(ndata) + "_" + suffix + ".txt", "a" ) as ofile: for i, h in enumerate(break_ctrlc(MHSampler(h0, data, steps=options.STEPS))): tn.add(h) # print h.posterior_score, getattr(h, 'll_counts', None), h if i % options.SKIP == 0 and h.posterior_score > -Infinity: print >> ofile, i, ndata, h.posterior_score, h.prior, h.likelihood, h.likelihood / ndata print >> ofile, getattr(h, "ll_counts", None) print >> ofile, h, "\0" # must add \0 when not Lexicon return tn
def run(): """A version that cares more about recent data, showing how to use Hypotheses.DecayedLikelihoodHypothesis. """ G = grammar # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Create an initial hypothesis # This is where we set a number of relevant variables -- whether to use RR, alpha, etc.Z h0 = MyHypothesis(G, ll_decay=1.0, rrAlpha=1.0, args=['x']) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Run the MH # Run the vanilla sampler. Without steps, it will run infinitely # this prints out posterior (posterior_score), prior, likelihood, for h in break_ctrlc(MHSampler(h0, data, 10000, skip=100)): print h.posterior_score, h.prior, h.likelihood, q(h)
def run(options, ndata): """ This out on the DATA_RANGE amounts of data and returns all hypotheses in top count """ if LOTlib.SIG_INTERRUPTED: return set() language = eval(options.LANG+"()") data = language.sample_data(LARGE_SAMPLE) assert len(data) == 1 # renormalize the counts for k in data[0].output.keys(): data[0].output[k] = float(data[0].output[k] * ndata) / LARGE_SAMPLE # print data # Now add the rules to the grammar grammar = deepcopy(base_grammar) for t in language.terminals(): # add in the specifics grammar.add_rule('ATOM', q(t), None, 2) h0 = AugustHypothesis(grammar=grammar, display="lambda recurse_ :%s") print "# Starting on ", h0 tn = TopN(N=options.TOP_COUNT) # print h0.compute_posterior(data) # for i, h in enumerate(break_ctrlc(MHSampler(h0, data, steps=options.STEPS))): # # for h in MHSampler(h0, data, steps=options.STEPS, trace=True): # print h.posterior_score, h # print getattr(h, 'll_counts', None) with open(prefix+'hypotheses_'+options.LANG+'_'+str(rank)+'_'+str(ndata)+'_'+suffix+".txt", 'a') as ofile: for i, h in enumerate(break_ctrlc(MHSampler(h0, data, steps=options.STEPS))): tn.add(h) # print h.posterior_score, getattr(h, 'll_counts', None), h if i%options.SKIP == 0 and h.posterior_score > -Infinity: print >>ofile, i, ndata, h.posterior_score, h.prior, h.likelihood, h.likelihood/ndata print >>ofile, getattr(h,'ll_counts', None) print >>ofile, h, '\0' # must add \0 when not Lexicon return tn
def run_mh(): """Run the MH.""" # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # somewhat weirdly, we'll make an upper node above "START" for the two concepts # and require it to check if concept (an argument below) is 'A' grammar.add_rule('TWO_CONCEPT_START', 'if_', ['(concept==\'A\')', 'START', 'START'], 1.0) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Create an initial hypothesis # This is where we set a number of relevant variables -- whether to use RR, alpha, etc. # Here we give args as "concept" (used in TWO_CONCEPT_START above) and "x" h0 = RationalRulesLOTHypothesis(grammar=grammar, rrAlpha=1.0, ALPHA=0.9, start='TWO_CONCEPT_START', args=['concept', 'x']) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Run the vanilla sampler. Without steps, it will run infinitely # this prints out posterior (posterior_score), prior, likelihood, for h in break_ctrlc(MHSampler(h0, data, 10000, skip=100)): print h.posterior_score, h.prior, h.likelihood, q(h)
def run(options, ndata): """ This out on the DATA_RANGE amounts of data and returns all hypotheses in top count """ if LOTlib.SIG_INTERRUPTED: return set() language = eval(options.LANG + "()") data = language.sample_data(LARGE_SAMPLE) assert len(data) == 1 # renormalize the counts for k in data[0].output.keys(): data[0].output[k] = float(data[0].output[k] * ndata) / LARGE_SAMPLE print data # Now add the rules to the grammar grammar = deepcopy(base_grammar) for t in language.terminals(): # add in the specifics grammar.add_rule('ATOM', q(t), None, 2) h0 = AugustHypothesis(grammar=grammar, display="lambda recurse_ :%s") tn = TopN(N=options.TOP_COUNT) for i, h in enumerate(break_ctrlc(MHSampler(h0, data, steps=options.STEPS))): print h.posterior_score, h print getattr(h, 'll_counts', None) # with open(prefix+'hypotheses_'+options.LANG+'_'+str(rank)+'_'+str(ndata)+'_'+suffix+".txt", 'a') as ofile: # # for i, h in enumerate(break_ctrlc(MHSampler(h0, data, steps=options.STEPS))): # tn.add(h) # # print h.posterior_score, getattr(h, 'll_counts', None), h # if i%options.SKIP == 0: # print >>ofile, "\n" # print >>ofile, i, ndata, h.posterior_score, h.prior, h.likelihood, h.likelihood/ndata # print >>ofile, getattr(h,'ll_counts', None), # print >>ofile, h # ends in \0 so we can sort with sort -g -z return tn
def run(options, ndata): """ This out on the DATA_RANGE amounts of data and returns all hypotheses in top count """ if LOTlib.SIG_INTERRUPTED: return set() language = eval(options.LANG + "()") data = language.sample_data(LARGE_SAMPLE) assert len(data) == 1 # renormalize the counts for k in data[0].output.keys(): data[0].output[k] = float(data[0].output[k] * ndata) / LARGE_SAMPLE print data # Now add the rules to the grammar grammar = deepcopy(base_grammar) for t in language.terminals(): # add in the specifics grammar.add_rule("ATOM", q(t), None, 2) h0 = AugustHypothesis(grammar=grammar, display="lambda recurse_ :%s") tn = TopN(N=options.TOP_COUNT) for i, h in enumerate(break_ctrlc(MHSampler(h0, data, steps=options.STEPS))): print h.posterior_score, h print getattr(h, "ll_counts", None) # with open(prefix+'hypotheses_'+options.LANG+'_'+str(rank)+'_'+str(ndata)+'_'+suffix+".txt", 'a') as ofile: # # for i, h in enumerate(break_ctrlc(MHSampler(h0, data, steps=options.STEPS))): # tn.add(h) # # print h.posterior_score, getattr(h, 'll_counts', None), h # if i%options.SKIP == 0: # print >>ofile, "\n" # print >>ofile, i, ndata, h.posterior_score, h.prior, h.likelihood, h.likelihood/ndata # print >>ofile, getattr(h,'ll_counts', None), # print >>ofile, h # ends in \0 so we can sort with sort -g -z return tn
# flattern2str lives at the top, and it takes a cons, cdr, car structure and projects it to a string grammar.add_rule('START', 'flatten2str', ['EXPR'], 1.0) grammar.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 1.) grammar.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 1.) grammar.add_rule('BOOL', 'not_', ['BOOL'], 1.) grammar.add_rule('EXPR', 'if_', ['BOOL', 'EXPR', 'EXPR'], 1.) grammar.add_rule('BOOL', 'equal_', ['EXPR', 'EXPR'], 1.) grammar.add_rule('BOOL', 'flip_', [''], TERMINAL_WEIGHT) # List-building operators grammar.add_rule('EXPR', 'cons_', ['EXPR', 'EXPR'], 1.) grammar.add_rule('EXPR', 'cdr_', ['EXPR'], 1.) grammar.add_rule('EXPR', 'car_', ['EXPR'], 1.) grammar.add_rule('EXPR', '[]', None, TERMINAL_WEIGHT) grammar.add_rule('EXPR', q('D'), None, TERMINAL_WEIGHT) grammar.add_rule('EXPR', q('A'), None, TERMINAL_WEIGHT) grammar.add_rule('EXPR', q('N'), None, TERMINAL_WEIGHT) grammar.add_rule('EXPR', q('V'), None, TERMINAL_WEIGHT) grammar.add_rule('EXPR', q('who'), None, TERMINAL_WEIGHT) ## Allow lambda abstraction grammar.add_rule('EXPR', 'apply_', ['LAMBDAARG', 'LAMBDATHUNK'], 1) grammar.add_rule('LAMBDAARG', 'lambda', ['EXPR'], 1., bv_type='EXPR', bv_args=[] ) grammar.add_rule('LAMBDATHUNK', 'lambda', ['EXPR'], 1., bv_type=None, bv_args=None ) # A thunk
# (but the actual RR prior does not care about these probabilities) grammar = Grammar() grammar.add_rule('START', '', ['WORD'], 1.0) grammar.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 1./3.) grammar.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 1./3.) grammar.add_rule('BOOL', 'not_', ['BOOL'], 1./3.) grammar.add_rule('BOOL', 'True', None, 1.0/2.) grammar.add_rule('BOOL', 'False', None, 1.0/2.) # note that this can take basically any types for return values grammar.add_rule('WORD', 'if_', ['BOOL', 'WORD', 'WORD'], 0.5) grammar.add_rule('WORD', q('undef'), None, 0.5) # grammar.add_rule('WORD', 'if_', ['BOOL', 'WORD', q('undef')], 0.5) # grammar.add_rule('WORD', 'ifU_', ['BOOL', 'WORD'], 0.5) # if returning undef if condition not met grammar.add_rule('BOOL', 'cardinality1_', ['SET'], 1.0) grammar.add_rule('BOOL', 'cardinality2_', ['SET'], 1.0) grammar.add_rule('BOOL', 'cardinality3_', ['SET'], 1.0) grammar.add_rule('BOOL', 'equal_', ['WORD', 'WORD'], 1.0) grammar.add_rule('SET', 'union_', ['SET', 'SET'], 1./3.) grammar.add_rule('SET', 'intersection_', ['SET', 'SET'], 1./3.) grammar.add_rule('SET', 'setdifference_', ['SET', 'SET'], 1./3.) grammar.add_rule('SET', 'select_', ['SET'], 1.0) grammar.add_rule('SET', 'x', None, 4.0)
FEATURE_WEIGHT = 2.0 # Probability of expanding to a terminal # Set up the grammar # Here, we create our own instead of using DefaultGrammars.Nand because # we don't want a BOOL/PREDICATE distinction grammar = Grammar() grammar.add_rule("START", "", ["BOOL"], 1.0) grammar.add_rule("BOOL", "nand_", ["BOOL", "BOOL"], 1.0 / 3.0) grammar.add_rule("BOOL", "nand_", ["True", "BOOL"], 1.0 / 3.0) grammar.add_rule("BOOL", "nand_", ["False", "BOOL"], 1.0 / 3.0) # And finally, add the primitives for s in SHAPES: grammar.add_rule("BOOL", "is_shape_", ["x", q(s)], FEATURE_WEIGHT) for c in COLORS: grammar.add_rule("BOOL", "is_color_", ["x", q(c)], FEATURE_WEIGHT) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Data # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.DataAndObjects import FunctionData, make_all_objects from LOTlib.Miscellaneous import sample_one all_objects = make_all_objects(shape=SHAPES, color=COLORS) # Generator for data
self.penalty = penalty self.seen = Counter() def internal_sample(self, h): """ Keep track of how many samples we've drawn for h """ self.seen[h] += 1 def compute_posterior(self, h, data): """ Wrap the posterior with a penalty for how often we've seen h. Computes the penalty on the prior """ mypenalty = self.seen[h] * self.penalty np, nl = MHSampler.compute_posterior(self, h, data) return np + mypenalty, nl if __name__ == "__main__": from LOTlib.Examples.Number.Shared import generate_data, NumberExpression, grammar, get_knower_pattern from LOTlib.Miscellaneous import q data = generate_data(500) h0 = NumberExpression(grammar) for h in TabooMCMC(h0, data, steps=10000): print q(get_knower_pattern( h)), h.posterior_score, h.prior, h.likelihood, q(h)
MHSampler.__init__(self, h0, data, **kwargs) self.penalty = penalty self.seen = Counter() def next(self): v = MHSampler.next(self) self.seen[v] += 1 return v def compute_posterior(self, h, data, **kwargs): """ Compute prior & likelihood for `h`, penalizing prior by how many samples have been generated so far. """ return self.seen[h] * self.penalty + h.compute_posterior(data, **kwargs) if __name__ == "__main__": from LOTlib import break_ctrlc from LOTlib.Examples.Number.Model import * from LOTlib.Miscellaneous import q data = make_data(500) h0 = NumberExpression(grammar) tmc = TabooMCMC(h0, data, steps=10000) for h in break_ctrlc(tmc): print tmc.seen[h], h.posterior_score, h.prior, h.likelihood, q(h)
grammar = Grammar() grammar.add_rule('START', '', ['WORD'], 1.0) grammar.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 1. / 3.) grammar.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 1. / 3.) grammar.add_rule('BOOL', 'not_', ['BOOL'], 1. / 3.) grammar.add_rule('BOOL', 'True', None, 1.0 / 2.) grammar.add_rule('BOOL', 'False', None, 1.0 / 2.) # note that this can take basically any types for return values grammar.add_rule('WORD', '(%s if %s else %s)', ['WORD', 'BOOL', 'WORD'], 0.5) grammar.add_rule('WORD', q('undef'), None, 0.5) # grammar.add_rule('WORD', 'if_', ['BOOL', 'WORD', q('undef')], 0.5) # grammar.add_rule('WORD', 'ifU_', ['BOOL', 'WORD'], 0.5) # if returning undef if condition not met grammar.add_rule('BOOL', 'cardinality1_', ['SET'], 1.0) grammar.add_rule('BOOL', 'cardinality2_', ['SET'], 1.0) grammar.add_rule('BOOL', 'cardinality3_', ['SET'], 1.0) grammar.add_rule('BOOL', 'equal_', ['WORD', 'WORD'], 1.0) grammar.add_rule('SET', 'union_', ['SET', 'SET'], 1. / 3.) grammar.add_rule('SET', 'intersection_', ['SET', 'SET'], 1. / 3.) grammar.add_rule('SET', 'setdifference_', ['SET', 'SET'], 1. / 3.) grammar.add_rule('SET', 'select_', ['SET'], 1.0) grammar.add_rule('SET', 'x', None, 4.0)
# -*- coding: utf-8 -*- """ A quick script to load some large data and re-run-evaluate it to generate a file readable by plot_learning_curve.R """ import pickle from LOTlib.Miscellaneous import q from LOTlib.Examples.Number.Model import * LARGE_DATA_SIZE = 1000 if __name__ == "__main__": #now evaluate on different amounts of data too: huge_data = generate_data(LARGE_DATA_SIZE) print "# Generated data!" allfs = pickle.load(open("mpi-run.pkl")) # for now, use data from the run on February 10 print "# Loaded!" # save this with a huge data set -- eval with average ll H = allfs.get_all() [h.compute_posterior(huge_data) for h in H] # show the *average* ll for each hypothesis for h in H: if h.prior > float("-inf"): print h.prior, h.likelihood/float(LARGE_DATA_SIZE), q(get_knower_pattern(h)), q(h)
MHSampler.__init__(self, h0, data, **kwargs) self.penalty=penalty self.seen = Counter() def internal_sample(self, h): """ Keep track of how many samples we've drawn for h """ self.seen[h] += 1 def compute_posterior(self, h, data): """ Wrap the posterior with a penalty for how often we've seen h. Computes the penalty on the prior """ mypenalty = self.seen[h] * self.penalty np, nl = MHSampler.compute_posterior(self, h, data) return np+mypenalty, nl if __name__ == "__main__": from LOTlib.Examples.Number.Shared import generate_data, NumberExpression, grammar, get_knower_pattern from LOTlib.Miscellaneous import q data = generate_data(500) h0 = NumberExpression(grammar) for h in TabooMCMC(h0, data, steps=10000): print q(get_knower_pattern(h)), h.posterior_score, h.prior, h.likelihood, q(h)
def makeBiasedGrammar(objects, nterms=['Tree', 'Set', 'Gender', 'Generation', 'Ancestry', 'Paternity', 'English'], terms=['X', 'objects', 'all'], recursive=False, words=None, compositional=True): """ Define a grammar for tree relations """ grammar = Grammar() grammar.add_rule('START', '', ['SET'], 1.0) if 'Tree' in nterms: # TREE grammar.add_rule('SET', 'parents_of_', ['SET', 'C'], 1.0) grammar.add_rule('SET', 'children_of_', ['SET', 'C'], 2.6118861522) grammar.add_rule('SET', 'spouses_of_', ['SET', 'C'], 46.1592503413) if 'Set' in nterms: # SET THEORETIC grammar.add_rule('SET', 'union_', ['SET', 'SET'], 82.6253980731) grammar.add_rule('SET', 'complement_', ['SET', 'C'], 4.134794019) grammar.add_rule('SET', 'intersection_', ['SET', 'SET'], 13.6030444971) grammar.add_rule('SET', 'setdifference_', ['SET', 'SET'], 12.1666763444) if 'Gender' in nterms: # GENDER grammar.add_rule('SET', 'female_', ['SET'], 209.5667590174) grammar.add_rule('SET', 'male_', ['SET'], 266.749332462) if 'Generation' in nterms: # GENERATION grammar.add_rule('SET', 'generation0_', ['SET', 'C'], 4.9008668098) grammar.add_rule('SET', 'generation1_', ['SET', 'C'], 1.3398224552) grammar.add_rule('SET', 'generation2_', ['SET', 'C'], 1.165400777) if 'Ancestry' in nterms: # CEST grammar.add_rule('SET', 'ancestors', ['SET', 'C'], 8.0872979353) grammar.add_rule('SET', 'descendants', ['SET', 'C'], 3.1124377558) if 'Paternity' in nterms: # TERNAL grammar.add_rule('SET', 'maternal_', ['SET', 'C'], 2.2192339232) grammar.add_rule('SET', 'paternal_', ['SET', 'C'], 1.3887916971) if 'English' in nterms: if compositional: lhs = 'SET' else: lhs = 'O' # ENGLISH grammar.add_rule('SET', 'brothers_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'sisters_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'moms_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'dads_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'children_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'uncles_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'aunts_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'grandpas_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'grandmas_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'cousins_', [lhs, 'C'], 1.0) if recursive and words is not None: for w in words: grammar.add_rule('SET', 'recurse_', [q(w), 'C', 'SET'], 1.0 / len(words)) if 'objects' in terms: if compositional: for o in objects: grammar.add_rule('SET', 'set', ["[\'%s\']" % o], 123.5304511982 / len(objects)) else: for o in objects: grammar.add_rule('O', 'set', ["[\'%s\']" % o], 123.5304511982 / len(objects)) if 'all' in terms: grammar.add_rule('SET', 'all_', ['C'], 3.8903782136) if 'X' in terms: if compositional: grammar.add_rule('SET', 'X', None, 69.8908794494) # Had to give high prob to make pcfg well-defined else: grammar.add_rule('O', 'X', None, 69.8908794494) # Had to give high prob to make pcfg well-defined return grammar
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.DefaultGrammars import DNF from LOTlib.Miscellaneous import q # DNF defaultly includes the logical connectives so we need to add predicates to it. grammar = DNF # Two predicates for checking x's color and shape # Note: per style, functions in the LOT end in _ grammar.add_rule('PREDICATE', 'is_color_', ['x', 'COLOR'], 1.0) grammar.add_rule('PREDICATE', 'is_shape_', ['x', 'SHAPE'], 1.0) # Some colors/shapes each (for this simple demo) # These are written in quotes so they can be evaled grammar.add_rule('COLOR', q('red'), None, 1.0) grammar.add_rule('COLOR', q('blue'), None, 1.0) grammar.add_rule('COLOR', q('green'), None, 1.0) grammar.add_rule('COLOR', q('mauve'), None, 1.0) grammar.add_rule('SHAPE', q('square'), None, 1.0) grammar.add_rule('SHAPE', q('circle'), None, 1.0) grammar.add_rule('SHAPE', q('triangle'), None, 1.0) grammar.add_rule('SHAPE', q('diamond'), None, 1.0) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Hypothesis # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Hypotheses.RationalRulesLOTHypothesis import RationalRulesLOTHypothesis
grammar.add_rule('START', '', ['BOOL'], 0.7) grammar.add_rule('START', 'True', None, 0.2) grammar.add_rule('START', 'False', None, 0.1) grammar.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 0.1) grammar.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 0.05) grammar.add_rule('BOOL', 'not_', ['BOOL'], 0.025) grammar.add_rule('BOOL', 'iff_', ['BOOL', 'BOOL'], 0.0249) grammar.add_rule('BOOL', 'implies_', ['BOOL', 'BOOL'], 0.0001) # if we sample hypotheses (below), we will have high uncertainty on this grammar.add_rule('BOOL', '', ['FEATURE'], 0.8) grammar.add_rule('FEATURE', 'is_shape_', ['x', 'SHAPE'], 0.3) grammar.add_rule('FEATURE', 'is_color_', ['x', 'COLOR'], 0.7) for i, s in enumerate(SHAPES): grammar.add_rule('SHAPE', '%s'%q(s), None, 2.0 * (i+1)) for i, c in enumerate(COLORS): grammar.add_rule('COLOR', '%s'%q(c), None, 1.0/len(COLORS)) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Hypothesis # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis from LOTlib.Hypotheses.Likelihoods.BinaryLikelihood import BinaryLikelihood class MyHypothesis(BinaryLikelihood, LOTHypothesis): def __init__(self, grammar=grammar, **kwargs): LOTHypothesis.__init__(self, grammar=grammar, display="lambda x : %s", maxnodes=150, **kwargs)
def run_one(iteration, model, model2data, probs): if LOTlib.SIG_INTERRUPTED: # do this so we don't create (big) hypotheses return # Take model and load the function to create hypotheses # Data is passed in to be constant across runs if re.search(r":", model): m, d = re.split(r":", model) make_hypothesis, _ = load_example(m) else: make_hypothesis, _ = load_example(model) htmp = make_hypothesis() # just use this to get the grammar # Make a new class to wrap our mixture in class WrappedClass(MixtureProposer, type(htmp)): pass # define a wrapper to set this proposal def wrapped_make_hypothesis(**kwargs): h = WrappedClass(**kwargs) print ">>", htmp, model, h, kwargs h.set_proposal_probabilities(probs) return h sampler = MultipleChainMCMC(wrapped_make_hypothesis, model2data[model], steps=options.SAMPLES, nchains=options.CHAINS) with open(options.OUT+"/aggregate.%s" % get_rank(), 'a') as out_aggregate: evaluate_sampler(sampler, trace=False, prefix="\t".join(map(str, [model, iteration, q(str(probs)) ])), out_aggregate=out_aggregate, print_every=options.PRINTEVERY)
grammar = Grammar() grammar.add_rule("START", "", ["WORD"], 1.0) grammar.add_rule("BOOL", "and_", ["BOOL", "BOOL"], 1.0 / 3.0) grammar.add_rule("BOOL", "or_", ["BOOL", "BOOL"], 1.0 / 3.0) grammar.add_rule("BOOL", "not_", ["BOOL"], 1.0 / 3.0) grammar.add_rule("BOOL", "True", None, 1.0 / 2.0) grammar.add_rule("BOOL", "False", None, 1.0 / 2.0) # note that this can take basically any types for return values grammar.add_rule("WORD", "(%s if %s else %s)", ["WORD", "BOOL", "WORD"], 0.5) grammar.add_rule("WORD", q("undef"), None, 0.5) # grammar.add_rule('WORD', 'if_', ['BOOL', 'WORD', q('undef')], 0.5) # grammar.add_rule('WORD', 'ifU_', ['BOOL', 'WORD'], 0.5) # if returning undef if condition not met grammar.add_rule("BOOL", "cardinality1_", ["SET"], 1.0) grammar.add_rule("BOOL", "cardinality2_", ["SET"], 1.0) grammar.add_rule("BOOL", "cardinality3_", ["SET"], 1.0) grammar.add_rule("BOOL", "equal_", ["WORD", "WORD"], 1.0) grammar.add_rule("SET", "union_", ["SET", "SET"], 1.0 / 3.0) grammar.add_rule("SET", "intersection_", ["SET", "SET"], 1.0 / 3.0) grammar.add_rule("SET", "setdifference_", ["SET", "SET"], 1.0 / 3.0) grammar.add_rule("SET", "select_", ["SET"], 1.0) grammar.add_rule("SET", "x", None, 4.0)
if __name__ == "__main__": # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # Main running if is_master_process(): display_option_summary(options) huge_data = generate_data(options.LARGE_DATA_SIZE) # choose the appropriate map function argarray = map(lambda x: [x], options.DATA_AMOUNTS * options.CHAINS) seen = set() for fs in MPI_unorderedmap(run, numpy.random.permutation(argarray)): for h in fs.get_all(): if h not in seen: seen.add(h) h.compute_posterior(huge_data) if h.prior > float("-inf"): print h.prior, \ h.likelihood /float(options.LARGE_DATA_SIZE), \ q(get_knower_pattern(h)), \ qq(h) sys.stdout.flush() import pickle with open(options.OUT_PATH, 'w') as f: pickle.dump(seen, f)
# (but the actual RR prior does not care about these probabilities) grammar = Grammar() grammar.add_rule('START', '', ['WORD'], 1.0) grammar.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 1./3.) grammar.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 1./3.) grammar.add_rule('BOOL', 'not_', ['BOOL'], 1./3.) grammar.add_rule('BOOL', 'True', None, 1.0/2.) grammar.add_rule('BOOL', 'False', None, 1.0/2.) # note that this can take basically any types for return values grammar.add_rule('WORD', 'if_', ['BOOL', 'WORD', 'WORD'], 0.5) grammar.add_rule('WORD', q('undef'), None, 0.5) # grammar.add_rule('WORD', 'if_', ['BOOL', 'WORD', q('undef')], 0.5) # grammar.add_rule('WORD', 'ifU_', ['BOOL', 'WORD'], 0.5) # if returning undef if condition not met grammar.add_rule('BOOL', 'cardinality1_', ['SET'], 1.0) grammar.add_rule('BOOL', 'cardinality2_', ['SET'], 1.0) grammar.add_rule('BOOL', 'cardinality3_', ['SET'], 1.0) grammar.add_rule('BOOL', 'equal_', ['WORD', 'WORD'], 1.0) grammar.add_rule('SET', 'union_', ['SET', 'SET'], 1./3.) grammar.add_rule('SET', 'intersection_', ['SET', 'SET'], 1./3.) grammar.add_rule('SET', 'setdifference_', ['SET', 'SET'], 1./3.) grammar.add_rule('SET', 'select_', ['SET'], 1.0) grammar.add_rule('SET', 'x', None, 4.0)
def makeGrammar(objects, nterms=['Tree', 'Set', 'Gender', 'Generation', 'Ancestry', 'Paternity', 'English'], terms=['X', 'objects', 'all'], recursive=False, words=None, compositional=True, abstractP=10.0): """ Define a grammar for tree relations """ grammar = Grammar() grammar.add_rule('START', '', ['SET'], 1.0) if 'Tree' in nterms: # TREE grammar.add_rule('SET', 'parents_of_', ['SET', 'C'], 1.0) grammar.add_rule('SET', 'children_of_', ['SET', 'C'], 1.0) grammar.add_rule('SET', 'spouses_of_', ['SET', 'C'], 1.0) if 'Set' in nterms: # SET THEORETIC grammar.add_rule('SET', 'union_', ['SET', 'SET'], 1.0) grammar.add_rule('SET', 'complement_', ['SET', 'C'], 1.0) grammar.add_rule('SET', 'intersection_', ['SET', 'SET'], 1.0) grammar.add_rule('SET', 'setdifference_', ['SET', 'SET'], 1.0) if 'Gender' in nterms: # GENDER grammar.add_rule('SET', 'female_', ['SET'], 1.0 / 2) grammar.add_rule('SET', 'male_', ['SET'], 1.0 / 2) grammar.add_rule('SET', 'samegender_', ['SET', 'C'], 1.0) if 'Generation' in nterms: # GENERATION grammar.add_rule('SET', 'generation0_', ['SET', 'C'], 1.0/3) grammar.add_rule('SET', 'generation1_', ['SET', 'C'], 1.0/3) grammar.add_rule('SET', 'generation2_', ['SET', 'C'], 1.0/3) if 'Ancestry' in nterms: # CEST grammar.add_rule('SET', 'ancestors', ['SET', 'C'], 1.0) grammar.add_rule('SET', 'descendants', ['SET', 'C'], 1.0) if 'Paternity' in nterms: # TERNAL grammar.add_rule('SET', 'maternal_', ['SET', 'C'], 1.0) grammar.add_rule('SET', 'paternal_', ['SET', 'C'], 1.0) if 'English' in nterms: if compositional: lhs = 'SET' else: lhs = 'O' # ENGLISH grammar.add_rule('SET', 'brothers_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'sisters_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'moms_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'dads_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'children_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'uncles_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'aunts_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'grandpas_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'grandmas_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'cousins_', [lhs, 'C'], 1.0) if recursive and words is not None: for w in words: grammar.add_rule('SET', 'recurse_', [q(w), 'C', 'SET'], 1.0) if 'objects' in terms: if compositional: for o in objects: grammar.add_rule('SET', 'set', ["[\'%s\']" % o], abstractP/len(objects)) else: for o in objects: grammar.add_rule('O', 'set', ["[\'%s\']" % o], abstractP/len(objects)) if 'all' in terms: grammar.add_rule('SET', 'all_', ['C'], 1.0) if 'X' in terms: if compositional: grammar.add_rule('SET', 'X', None, 10.0) # Had to give high prob to make pcfg well-defined else: grammar.add_rule('O', 'X', None, 10.0) # Had to give high prob to make pcfg well-defined return grammar
grammar.add_rule('START', '', ['BOOL'], 1.) grammar.add_rule('BOOL', '(%s == %s)', ['NUMBER', 'NUMBER'], 1.) grammar.add_rule('BOOL', '(not %s)', ['BOOL'], 1.) grammar.add_rule('BOOL', '(%s and %s)', ['BOOL', 'BOOL'], 1.) grammar.add_rule('BOOL', '(%s or %s)', ['BOOL', 'BOOL'], 1.) # use the short_circuit form grammar.add_rule('NUMBER', 'x', None, 1.) grammar.add_rule('NUMBER', '1', None, 1.) grammar.add_rule('NUMBER', '0', None, 1.) grammar.add_rule('NUMBER', 'plus_', ['NUMBER', 'NUMBER'], 1.) grammar.add_rule('NUMBER', 'minus_', ['NUMBER', 'NUMBER'], 1.) for w in WORDS: grammar.add_rule('BOOL', 'lexicon', [q(w), 'NUMBER'], 1.) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Data # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.DataAndObjects import FunctionData def make_data(n=1, alpha=0.99): data = [] for x in xrange(1, 10): data.append( FunctionData(input=['even', x], output=(x % 2 == 0), alpha=alpha) ) data.append( FunctionData(input=['odd', x], output=(x % 2 == 1), alpha=alpha) ) return data*n # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# -*- coding: utf-8 -*- """ A quick script to load some large data and re-run-evaluate it to generate a file readable by plot_learning_curve.R """ import pickle from LOTlib.Miscellaneous import q from LOTlib.Examples.Number.Model import * LARGE_DATA_SIZE = 1000 if __name__ == "__main__": #now evaluate on different amounts of data too: huge_data = make_data(LARGE_DATA_SIZE) print "# Generated data!" allfs = pickle.load(open("mpi-run.pkl")) # for now, use data from the run on February 10 print "# Loaded!" # save this with a huge data set -- eval with average ll H = allfs.get_all() [h.compute_posterior(huge_data) for h in H] # show the *average* ll for each hypothesis for h in H: if h.prior > float("-inf"): print h.prior, h.likelihood/float(LARGE_DATA_SIZE), q(h.get_knower_pattern()), q(h)
self.ll_decay = ll_decay # needed here def make_hypothesis(**kwargs): return MyHypothesis(grammar=grammar, rrAlpha=1.0, **kwargs) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Main # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if __name__ == "__main__": from LOTlib import break_ctrlc from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler from LOTlib.Miscellaneous import q # Create an initial hypothesis # This is where we set a number of relevant variables -- whether to use RR, alpha, etc.Z h0 = MyHypothesis(grammar, ll_decay=1.0, rrAlpha=1.0, args=['x']) data = make_data() # Run the vanilla sampler. Without steps, it will run infinitely # this prints out posterior (posterior_score), prior, likelihood, for h in break_ctrlc(MHSampler(h0, data, 10000, skip=100, shortcut_likelihood=False)): print h.posterior_score, h.prior, h.likelihood, q(h) # This setup requires the *later* data to be upweighted, meaning that hypotheses that get # later data wrong should be given lower likelhood. But also with the decay, the overall # magnitude of the likelihood decreases.
grammar.add_rule('START', 'False', None, 0.1) grammar.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 0.1) grammar.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 0.05) grammar.add_rule('BOOL', 'not_', ['BOOL'], 0.025) grammar.add_rule('BOOL', 'iff_', ['BOOL', 'BOOL'], 0.0249) grammar.add_rule( 'BOOL', 'implies_', ['BOOL', 'BOOL'], 0.0001 ) # if we sample hypotheses (below), we will have high uncertainty on this grammar.add_rule('BOOL', '', ['FEATURE'], 0.8) grammar.add_rule('FEATURE', 'is_shape_', ['x', 'SHAPE'], 0.3) grammar.add_rule('FEATURE', 'is_color_', ['x', 'COLOR'], 0.7) for i, s in enumerate(SHAPES): grammar.add_rule('SHAPE', '%s' % q(s), None, 2.0 * (i + 1)) for i, c in enumerate(COLORS): grammar.add_rule('COLOR', '%s' % q(c), None, 1.0 / len(COLORS)) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Hypothesis # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis from LOTlib.Hypotheses.Likelihoods.BinaryLikelihood import BinaryLikelihood class MyHypothesis(BinaryLikelihood, LOTHypothesis): def __init__(self, grammar=grammar, **kwargs): LOTHypothesis.__init__(self,
from LOTlib.Grammar import Grammar from LOTlib.Miscellaneous import q base_grammar = Grammar() base_grammar.add_rule('START', 'flatten2str', ['LIST', 'sep=\"\"'], 1.0) base_grammar.add_rule('LIST', 'if_', ['BOOL', 'LIST', 'LIST'], 1.) base_grammar.add_rule('LIST', 'cons_', ['ATOM', 'LIST'], 1.) base_grammar.add_rule('LIST', 'cons_', ['LIST', 'LIST'], 1.) base_grammar.add_rule('LIST', 'cdr_', ['LIST'], 1.) base_grammar.add_rule('LIST', 'car_', ['LIST'], 1.) base_grammar.add_rule('LIST', '\'\'', None, 2) # base_grammar.add_rule('LIST', 'recurse_', [], 1.) base_grammar.add_rule('BOOL', 'empty_', ['LIST'], 1.) base_grammar.add_rule('BOOL', 'flip_', [''], 1.) from copy import deepcopy a_grammar = deepcopy(base_grammar) for x in 'a': a_grammar.add_rule('ATOM', q(x), None, 2) eng_grammar = deepcopy(base_grammar) for x in 'davtn': eng_grammar.add_rule('ATOM', q(x), None, 2)
def makeBiasedGrammar(objects, bias, nterms=[ 'Tree', 'Set', 'Gender', 'Generation', 'Ancestry', 'Paternity', 'English' ], terms=['X', 'objects', 'all'], recursive=False, words=None, compositional=True): """ Define a weighted PCFG for tree relations objects: a python list of strings for each person in the context bias: a python dictionary, bias[primitive] = weight (float) nterms: a python list of primitive families terms: a python list of terminals recursive: BOOL for should grammar be recursive? words: a python list of words to recurse compositional: BOOL for if english primitives can be composed returns a LOTlib Grammar object """ grammar = Grammar() grammar.add_rule('START', '', ['SET'], 1.0) if 'Tree' in nterms: grammar.add_rule('SET', 'parents_of_', ['SET', 'C'], bias['parents_of_']) grammar.add_rule('SET', 'children_of_', ['SET', 'C'], bias['children_of_']) grammar.add_rule('SET', 'spouses_of_', ['SET', 'C'], bias['spouses_of_']) if 'Set' in nterms: grammar.add_rule('SET', 'union_', ['SET', 'SET'], bias['union_']) grammar.add_rule('SET', 'complement_', ['SET', 'C'], bias['complement_']) grammar.add_rule('SET', 'intersection_', ['SET', 'SET'], bias['intersection_']) grammar.add_rule('SET', 'setdifference_', ['SET', 'SET'], bias['setdifference_']) if 'Gender' in nterms: grammar.add_rule('SET', 'female_', ['SET'], bias['female_']) grammar.add_rule('SET', 'male_', ['SET'], bias['male_']) if 'Generation' in nterms: grammar.add_rule('SET', 'generation0_', ['SET', 'C'], bias['generation0_']) grammar.add_rule('SET', 'generation1_', ['SET', 'C'], bias['generation1_']) grammar.add_rule('SET', 'generation2_', ['SET', 'C'], bias['generation2_']) if 'Ancestry' in nterms: grammar.add_rule('SET', 'ancestors', ['SET', 'C'], bias['ancestors']) grammar.add_rule('SET', 'descendants', ['SET', 'C'], bias['descendants']) if 'Paternity' in nterms: grammar.add_rule('SET', 'maternal_', ['SET', 'C'], bias['maternal_']) grammar.add_rule('SET', 'paternal_', ['SET', 'C'], bias['paternal_']) if 'English' in nterms: if compositional: lhs = 'SET' else: lhs = 'O' grammar.add_rule('SET', 'brothers_', [lhs, 'C'], bias['brothers_']) grammar.add_rule('SET', 'sisters_', [lhs, 'C'], bias['sisters_']) grammar.add_rule('SET', 'moms_', [lhs, 'C'], bias['moms_']) grammar.add_rule('SET', 'dads_', [lhs, 'C'], bias['dads_']) grammar.add_rule('SET', 'childz_', [lhs, 'C'], bias['children_']) grammar.add_rule('SET', 'uncles_', [lhs, 'C'], bias['uncles_']) grammar.add_rule('SET', 'aunts_', [lhs, 'C'], bias['aunts_']) grammar.add_rule('SET', 'grandpas_', [lhs, 'C'], bias['grandpas_']) grammar.add_rule('SET', 'grandmas_', [lhs, 'C'], bias['grandmas_']) grammar.add_rule('SET', 'cousins_', [lhs, 'C'], bias['cousins_']) if recursive and words is not None: for w in words: grammar.add_rule('SET', 'recurse_', [q(w), 'C', 'SET'], bias['recurse_' + w]) if 'objects' in terms: if compositional: for o in objects: grammar.add_rule('SET', 'set', ["[\'%s\']" % o], bias['terminal_' + o]) else: for o in objects: grammar.add_rule('O', 'set', ["[\'%s\']" % o], bias['terminal_' + o]) if 'all' in terms: grammar.add_rule('SET', 'all_', ['C'], bias['all_']) if 'X' in terms: if compositional: grammar.add_rule( 'SET', 'X', None, bias['terminal_X'] ) # Had to give high prob to make pcfg well-defined else: grammar.add_rule( 'O', 'X', None, bias['terminal_X'] ) # Had to give high prob to make pcfg well-defined return grammar
''' from LOTlib.Inference.Proposals.InsertDeleteProposal import InsertDeleteProposal h0 = NumberExpression(grammar, proposal_function=InsertDeleteProposal(grammar)) ''' # store hypotheses we've found allhyp = TopN(N=1000) # ======================================================================================================== # Run the standard RationalRules sampler mh_sampler = MHSampler(h0, data, STEPS, skip=SKIP) for h in lot_iter(mh_sampler): if TRACE: print q(get_knower_pattern(h)), h.posterior_score, h.compute_prior(), h.compute_likelihood(data), qq(h) # add h to our priority queue, with priority of its log probability, h.posterior_score allhyp.add(h) # ======================================================================================================== # now re-evaluate everything we found on new data ''' huge_data = generate_data(LARGE_DATA_SIZE) save this with a huge data set -- eval with average ll H = allhyp.get_sorted() compute the posterior for each hypothesis [ h.compute_posterior(huge_data) for h in H]
TARGET_CONCEPTS = [lambda x: and_(is_shape_(x,'square'), is_color_(x,'blue')), lambda x: or_(is_shape_(x,'triangle'), is_color_(x,'green')), lambda x: or_(is_shape_(x,'square'), is_color_(x,'red')), lambda x: and_(not_(is_shape_(x,'rectangle')), is_color_(x,'red')), lambda x: and_(not_(is_shape_(x,'square')), not_(is_color_(x,'blue'))), lambda x: and_(is_shape_(x,'rectangle'), is_color_(x,'green')), lambda x: or_(not_(is_shape_(x,'triangle')), is_color_(x,'red')) ] # ------------------------------------------------------------------ # Set up the grammar # Here, we create our own instead of using DefaultGrammars.Nand because # we don't want a BOOL/PREDICATE distinction # ------------------------------------------------------------------ FEATURE_WEIGHT = 2. # Probability of expanding to a terminal grammar = Grammar() grammar.add_rule('START', '', ['BOOL'], 1.0) grammar.add_rule('BOOL', 'nand_', ['BOOL', 'BOOL'], 1.0/3.) grammar.add_rule('BOOL', 'nand_', ['True', 'BOOL'], 1.0/3.) grammar.add_rule('BOOL', 'nand_', ['False', 'BOOL'], 1.0/3.) # And finally, add the primitives for s in SHAPES: grammar.add_rule('BOOL', 'is_shape_', ['x', q(s)], FEATURE_WEIGHT) for c in COLORS: grammar.add_rule('BOOL', 'is_color_', ['x', q(c)], FEATURE_WEIGHT)