class MorganNewport(FormalLanguage): """ From Morgan & Newport 1981, also studied in Saffran 2001, JML Here, we are not doing the word learning/categorization part, just assuming that the parts of speech are known. Note Morgan & Newport give both a CFG and a FSM, and the language itself is finite (18 possible strings) """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s%s', ['AP', 'BP'], 1.0) self.grammar.add_rule('S', '%s%s%s', ['AP', 'BP', 'CP'], 1.0) self.grammar.add_rule('AP', 'A', None, 1.0) self.grammar.add_rule('AP', 'AD', None, 1.0) # two terminals, A,D self.grammar.add_rule('BP', 'E', None, 1.0) self.grammar.add_rule('BP', '%sF', ['CP'], 1.0) self.grammar.add_rule('CP', 'C', None, 1.0) self.grammar.add_rule('CP', 'CD', None, 1.0) def terminals(self): return list('ADECF') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
class HudsonKamNewport(FormalLanguage): """ From Hudson Kam & Newport, simplifying out words to only POS Goal is to investigate learning of probabilities on N+DET vs N (no det) Here, we do not include mass/count subcategories on nouns http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.436.7524&rep=rep1&type=pdf """ def __init__(self): self.grammar = Grammar(start='S') """ V = transitive verb v = intransitive verb """ self.grammar.add_rule('S', '%s%s', ['v', 'NP'], 1.0) self.grammar.add_rule('S', '%s%s%s', ['V', 'NP', 'NP'], 1.0) self.grammar.add_rule('S', '!%s%s', ['v', 'NP'], 1.0) self.grammar.add_rule('S', '!%s%s%s', ['V', 'NP', 'NP'], 1.0) def terminals(self): return list('!vVnd') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
class SimpleEnglish(FormalLanguage): """ A simple English language with a few kinds of recursion all at once """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s%s', ['NP', 'VP'], 4.0) self.grammar.add_rule('NP', 'd%sn', ['AP'], 1.0) self.grammar.add_rule('NP', 'dn', None, 1.0) self.grammar.add_rule('NP', 'n', None, 2.0) self.grammar.add_rule('AP', 'a%s', ['AP'], 1.0) self.grammar.add_rule('AP', 'a', None, 3.0) #self.grammar.add_rule('NP', '%s%s', ['NP', 'PP'], 1.0) # a little ambiguity #self.grammar.add_rule('VP', '%s%s', ['VP', 'PP'], 1.0) #self.grammar.add_rule('PP', 'p%s', ['NP'], 1.0) self.grammar.add_rule('VP', 'v', None, 2.0) # intransitive self.grammar.add_rule('VP', 'v%s', ['NP'], 1.0) # transitive self.grammar.add_rule('VP', 'vt%s', ['S'], 1.0) # v that S #self.grammar.add_rule('S', 'i%sh%s', ['S', 'S'], 1.0) # add if S then S grammar -- seems hard, and unnattural to get so many def terminals(self): return list('dnavt') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
class Milne(FormalLanguage): """ From https://www.sciencedirect.com/science/article/pii/S0306452217304645#f0025 """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s', ['A'], 1.0) self.grammar.add_rule('A', 'a%s', ['D'], 1.0) self.grammar.add_rule('A', 'a%s', ['C'], 1.0) self.grammar.add_rule('D', 'd%s', ['C'], 1.0) self.grammar.add_rule('C', 'c%s', ['G'], 1.0) self.grammar.add_rule('C', 'c%s', ['F'], 1.0) self.grammar.add_rule('G', 'g%s', ['F'], 1.0) self.grammar.add_rule('F', 'f', None, 1.0) self.grammar.add_rule('F', 'f%s', ['X'], 1.0) # last two states are X,Y so they aren't D,C self.grammar.add_rule('X', 'c', None, 1.0) self.grammar.add_rule('X', 'c%s', ['Y'], 1.0) self.grammar.add_rule('Y', 'g', None, 1.0) def terminals(self): return list('acdgf') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
class Reber(FormalLanguage): """ From Reber, 1967 """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'T%s', ['S1'], 1.0) self.grammar.add_rule('S', 'V%s', ['S3'], 1.0) self.grammar.add_rule('S1', 'P%s', ['S1'], 1.0) self.grammar.add_rule('S1', 'T%s', ['S2'], 1.0) self.grammar.add_rule('S3', 'X%s', ['S3'], 1.0) self.grammar.add_rule('S3', 'V%s', ['S4'], 1.0) self.grammar.add_rule('S2', 'X%s', ['S3'], 1.0) self.grammar.add_rule('S2', 'S', None, 1.0) self.grammar.add_rule('S4', 'P%s', ['S2'], 1.0) self.grammar.add_rule('S4', 'S', None, 1.0) def terminals(self): return list('PSTVX') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
class English(FormalLanguage): """ A fancier English language with a few kinds of recursion all at once """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s%s', ['NP', 'VP'], 5.0) self.grammar.add_rule('NP', 'd%sn', ['AP'], 1.0) self.grammar.add_rule('NP', 'dn', None, 1.0) self.grammar.add_rule('NP', 'n', None, 1.0) self.grammar.add_rule('AP', 'a%s', ['AP'], 1.0) self.grammar.add_rule('AP', 'a', None, 5.0) self.grammar.add_rule('NP', '%s%s', ['NP', 'PP'], 0.50) # a little ambiguity self.grammar.add_rule('VP', '%s%s', ['VP', 'PP'], 0.50) self.grammar.add_rule('PP', 'p%s', ['NP'], 1.0) self.grammar.add_rule('VP', 'v', None, 1.0) # intransitive self.grammar.add_rule('VP', 'v%s', ['NP'], 1.0) # transitive self.grammar.add_rule('VP', 'vt%s', ['S'], 1.0) # v that S #self.grammar.add_rule('S', 'i%sh%s', ['S', 'S'], 1.0) def terminals(self): return list('dnavtp') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
class Braine66(FormalLanguage): """ Language from Braine '66 """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s', ['A'], 6.0) self.grammar.add_rule('S', '%s', ['PQ'], 3.0) self.grammar.add_rule('S', '%s%s', ['PQ', 'A'], 2.0) self.grammar.add_rule('S', '%s%s', ['A', 'PQ'], 1.0) # A phrases: # ob = b # ordem = d # remin = r # gice = g # kivil = k # noot = n # yarmo = f # PQ Phrases: # ged = G # mervo = m # yag = y # leck = l # som = s # eena = e # wimp = w self.grammar.add_rule('A', 'fbd', None, 1.0) # yarmo ob ordem self.grammar.add_rule('A', 'frg', None, 1.0) self.grammar.add_rule('A', 'fk', None, 1.0) self.grammar.add_rule('A', 'fn', None, 1.0) self.grammar.add_rule('PQ', 'Gms', None, 1.0) self.grammar.add_rule('PQ', 'Gys', None, 1.0) self.grammar.add_rule('PQ', 'Gls', None, 1.0) self.grammar.add_rule('PQ', 'Gme', None, 1.0) self.grammar.add_rule('PQ', 'Gye', None, 1.0) self.grammar.add_rule('PQ', 'Gle', None, 1.0) self.grammar.add_rule('PQ', 'Gmw', None, 1.0) self.grammar.add_rule('PQ', 'Gyw', None, 1.0) self.grammar.add_rule('PQ', 'Glw', None, 1.0) def terminals(self): return list('bdrgknfGmylsew') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
class NewportAslin(FormalLanguage): """ From Newport & Aslin 2004, Learning at a distance I Only the 1-3 syllable word frames, Table 1, reduced to single characters ba=b te=t gu=g do=d pi=p ra=r ke=k du=u lo=l ki=i middles: di=1 ku=2 to=3 pa=4 """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule( 'S', 'b%st', ['MID'], 1.0 ) # We're going to put a probability distribution on this so that it can be evaluated like everything else (otherwise its top 25 strings are not meaningful since its all uniform!) self.grammar.add_rule('S', 'g%sd', ['MID'], 1.0) self.grammar.add_rule('S', 'p%sr', ['MID'], 1.0) self.grammar.add_rule('S', 'k%su', ['MID'], 1.0) self.grammar.add_rule('S', 'l%si', ['MID'], 1.0) self.grammar.add_rule('MID', '1', None, 1.0) self.grammar.add_rule('MID', '2', None, 1.0) self.grammar.add_rule('MID', '3', None, 1.0) self.grammar.add_rule('MID', '4', None, 1.0) def terminals(self): return list('btgdprkuli1234') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
class Elman(FormalLanguage): """ From Saffran, Aslin, Newport studies. Strings consisting of tupiro golabu bidaku padoti coded here with single characters: tpr glb Bdk PDT """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s%s', ['T', 'S'], 2.0) self.grammar.add_rule('S', '%s', ['T'], 1.0) self.grammar.add_rule('T', 'baa', None, 1.0) # We are going to put a probability distribution on the words so that they can be evaluated reasonably, otherwise its hard to score uniform self.grammar.add_rule('T', 'dii', None, 1.0) self.grammar.add_rule('T', 'guuu', None, 1.0) def terminals(self): return list('badigu') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
class NewportAslin(FormalLanguage): """ From Newport & Aslin 2004, Learning at a distance I Only the 1-3 syllable word frames, Table 1, reduced to single characters ba=b te=t gu=g do=d pi=p ra=r ke=k du=u lo=l ki=i middles: di=1 ku=2 to=3 pa=4 """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'b%st', ['MID'], 1.0) self.grammar.add_rule('S', 'g%sd', ['MID'], 1.0) self.grammar.add_rule('S', 'p%sr', ['MID'], 1.0) self.grammar.add_rule('S', 'k%su', ['MID'], 1.0) self.grammar.add_rule('S', 'l%si', ['MID'], 1.0) self.grammar.add_rule('MID', '1', None, 1.0) self.grammar.add_rule('MID', '2', None, 1.0) self.grammar.add_rule('MID', '3', None, 1.0) self.grammar.add_rule('MID', '4', None, 1.0) def terminals(self): return list('btgdprkuli1234') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
class Gomez(FormalLanguage): """ Gomez (2002) language 1b """ def __init__(self, X): assert X < len(OTHER_TERMINALS) self.X = X self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sd', ['X'], 1.0) self.grammar.add_rule('S', 'b%se', ['X'], 1.0) for x in OTHER_TERMINALS[:self.X]: self.grammar.add_rule('X', '%s' % x, None, 1.0) def terminals(self): return list('abde' + OTHER_TERMINALS[:self.X]) def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
class Saffran(FormalLanguage): """ From Saffran, Aslin, Newport studies. Strings consisting of tupiro golabu bidaku padoti coded here with single characters: tpr glb Bdk PDT """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s%s', ['T', 'S'], 1.0) self.grammar.add_rule('S', '%s', ['T'], 1.0) self.grammar.add_rule('T', 'tpr', None, 0.25) self.grammar.add_rule('T', 'glb', None, 0.25) self.grammar.add_rule('T', 'Bdk', None, 0.25) self.grammar.add_rule('T', 'PDT', None, 0.25) def terminals(self): return list('tprglbBdkPDT') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
class SimpleEnglish(FormalLanguage): """ A simple English language with a few kinds of recursion all at once """ def __init__(self, max_length=6): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'S', ['NP', 'VP'], 1.0) self.grammar.add_rule('NP', 'NP', ['d', 'AP', 'n'], 1.0) self.grammar.add_rule('AP', 'AP', ['a', 'AP'], 1.0) self.grammar.add_rule('AP', 'AP', None, 1.0) self.grammar.add_rule('VP', 'VP', ['v'], 1.0) self.grammar.add_rule('VP', 'VP', ['v', 'NP'], 1.0) self.grammar.add_rule('VP', 'VP', ['v', 't', 'S'], 1.0) FormalLanguage.__init__(self, max_length) def all_strings(self, max_length): for x in self.grammar.enumerate(d=max_length): s = ''.join(x.all_leaves()) if len(s) < max_length: yield s
class Gomez(FormalLanguage): """ Gomez (2002) language 1b """ def __init__(self, X): assert X < len(OTHER_TERMINALS) self.X=X self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sd', ['X'], 1.0) self.grammar.add_rule('S', 'b%se', ['X'], 1.0) for x in OTHER_TERMINALS[:self.X]: self.grammar.add_rule('X', '%s'%x, None, 1.0) def terminals(self): return list('abde'+OTHER_TERMINALS[:self.X] ) def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
class MorganMeierNewport(FormalLanguage): """ From Morgan, Meier, & Newport with function word cues to phrase structure boundaries """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s%s', ['AP', 'BP'], 1.0) self.grammar.add_rule('S', '%s%s%s', ['AP', 'BP', 'CP'], 1.0) self.grammar.add_rule('AP', 'oA', None, 1.0) self.grammar.add_rule('AP', 'oAD', None, 1.0) # two terminals, A,D self.grammar.add_rule('BP', 'uE', None, 1.0) self.grammar.add_rule('BP', 'a%sF', ['CP'], 1.0) self.grammar.add_rule('CP', 'iC', None, 1.0) self.grammar.add_rule('CP', 'iCD', None, 1.0) def terminals(self): return list('ADECFouai') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
class BerwickPilato(FormalLanguage): """ From Figure 3a of Berwick & Pilato 1987 Ignores tense J = Judy g = gives G = gave d = does D = did e = get i = is W = was h = has H = had N = given v = giving V = give m = may M = might j = have b = being B = been E = be o = bread """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'J%s', ['S1'], 1.0) self.grammar.add_rule('S1', 'g%s', ['S4'], 1.0) self.grammar.add_rule('S1', 'G%s', ['S4'], 1.0) self.grammar.add_rule('S1', 'd%s', ['S3'], 1.0) self.grammar.add_rule('S1', 'D%s', ['S3'], 1.0) self.grammar.add_rule('S1', 'i%s', ['S6'], 1.0) self.grammar.add_rule('S1', 'w%s', ['S6'], 1.0) self.grammar.add_rule('S1', 'h%s', ['S5'], 1.0) self.grammar.add_rule('S1', 'H%s', ['S5'], 1.0) self.grammar.add_rule('S1', 'm%s', ['S2'], 1.0) self.grammar.add_rule('S1', 'M%s', ['S2'], 1.0) self.grammar.add_rule('S2', 'j%s', ['S5'], 1.0) self.grammar.add_rule('S2', 'E%s', ['S6'], 1.0) self.grammar.add_rule('S2', 'V%s', ['S4'], 1.0) self.grammar.add_rule('S3', 'e%s', ['S7'], 1.0) self.grammar.add_rule('S3', 'V%s', ['S4'], 1.0) self.grammar.add_rule('S4', 'o', None, 1.0) self.grammar.add_rule('S4', 'o', None, 1.0) self.grammar.add_rule('S5', 'N%s', ['S4'], 1.0) self.grammar.add_rule('S5', 'B%s', ['S6'], 1.0) self.grammar.add_rule('S6', 'b%s', ['S7'], 1.0) self.grammar.add_rule('S6', 'v%s', ['S4'], 1.0) self.grammar.add_rule('S6', 'N%s', ['S4'], 1.0) self.grammar.add_rule('S7', 'N%s', ['S4'], 1.0) def terminals(self): return list('JgGdDeiWhHNvVmMjbBEo') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
TERMINAL_WEIGHT = 15 grammar = Grammar() # flattern2str lives at the top, and it takes a cons, cdr, car structure and projects it to a string grammar.add_rule('START', 'flatten2str', ['EXPR'], 1.0) grammar.add_rule('EXPR', 'sample_', ['SET'], 1.) grammar.add_rule('EXPR', 'cons_', ['EXPR', 'EXPR'], 1.0 / 2.0) grammar.add_rule('SET', '"%s"', ['STRING'], 1.0) # Build up partitions before we have the terminals and strings # this way, partitions are mainly structural partitions = [] for t in grammar.enumerate(7): t = deepcopy(t) # just make sure it's a copy (may not be necessary) for n in t: setattr(n, "p_propose", 0.0) # add a tree attribute saying we can't propose partitions.append(t) grammar.add_rule('STRING', '%s%s', ['TERMINAL', 'STRING'], 1.0) grammar.add_rule('STRING', '%s', ['TERMINAL'], 1.0) grammar.add_rule('TERMINAL', 'g', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'e', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'k', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 's', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'f', None, TERMINAL_WEIGHT)
self.prior_temperature) if __name__ == "__main__": from LOTlib.SampleStream import * from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler from LOTlib.DataAndObjects import FunctionData import time import copy lst = "00110011" stps = 100000 #print weave_every('000000000', '1', 3) data = [FunctionData(alpha=1.0 - 10e-6, input=(), output={lst: len(lst)})] rules_iter = grammar.enumerate(10) start_counts = {} #for r in grammar: # if '0' in r.get_rule_signature()[1]: # start_counts[r.get_rule_signature()] = 1 s = 0 for _ in xrange(50): h = rules_iter.next() h0 = MyHypothesis() h0.start_counts = start_counts #for h in grammar.get_rule #print h0.__dict__.get('rrAlpha', 1.0) h0.set_value(value=h)
## TODO: Vary resample_p to make sure that works here! from LOTlib.Grammar import Grammar grammar = Grammar() grammar.add_rule('START', '', ['A'], 1.0) grammar.add_rule('A', 'A', ['A', 'A'], 0.2) grammar.add_rule('A', 'A', ['a'], 0.7) grammar.add_rule('A', 'apply_', ['L', 'A'], 0.10) grammar.add_rule('L', 'lambda', ['A'], 0.11, bv_p=0.07, bv_type='A') grammar.add_rule('A', 'apply_', ['LF', 'A'], 0.10) grammar.add_rule('LF', 'lambda', ['A'], 0.11, bv_p=0.07, bv_type='A', bv_args=['A'], bv_prefix='F') ## NOTE: DOES NTO HANDLE THE CASE WITH TWO A->APPLY, L->LAMBDAS if __name__ == "__main__": from LOTlib import break_ctrlc for t in break_ctrlc(grammar.enumerate()): print t
grammar.add_rule('EXPR', 'sample_', ['SET'], 1.0) grammar.add_rule('EXPR', 'cons_', ['EXPR', 'EXPR'], 1.0/1.5)#downweight the recursion grammar.add_rule('SET', '"%s"', ['STRING'], 1.0) grammar.add_rule('EXPR', 'if_', ['BOOL', 'EXPR', 'EXPR'], 1./5)#downweight the recursion grammar.add_rule('BOOL', 'flip_', [''], 1.) # Build up partitions before we have the terminals and strings # this way, partitions are mainly structural (and we search for the terminals in the templates) partitions = [] for t in grammar.enumerate(6): t = deepcopy(t) # just make sure it's a copy (may not be necessary) for n in t: setattr(n, "p_propose", 0.0) # add a tree attribute saying we can't propose partitions.append(t) for part in partitions: print part print "\n" grammar.add_rule('STRING', '%s%s', ['TERMINAL', 'STRING'], 1.0) grammar.add_rule('STRING', '%s', ['TERMINAL'], 1.0) grammar.add_rule('TERMINAL', 'g', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'a', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'i', None, TERMINAL_WEIGHT)
## TODO: Vary resample_p to make sure that works here! from LOTlib.Grammar import Grammar grammar = Grammar() grammar.add_rule('START', '', ['A'], 1.0) grammar.add_rule('A', 'A', ['A', 'A'], 0.2) grammar.add_rule('A', 'A', ['a'], 0.7) grammar.add_rule('A', 'apply_', ['L', 'A'], 0.10) grammar.add_rule('L', 'lambda', ['A'], 0.11, bv_p=0.07, bv_type='A') grammar.add_rule('A', 'apply_', ['LF', 'A'], 0.10) grammar.add_rule('LF', 'lambda', ['A'], 0.11, bv_p=0.07, bv_type='A', bv_args=['A'], bv_prefix='F') ## NOTE: DOES NTO HANDLE THE CASE WITH TWO A->APPLY, L->LAMBDAS if __name__ == "__main__": from LOTlib import lot_iter for t in lot_iter(grammar.enumerate()): print t
# flattern2str lives at the top, and it takes a cons, cdr, car structure and projects it to a string grammar.add_rule('START', '', ['EXPR'], 1.0) grammar.add_rule('EXPR', 'sample_uniform_d', ['SET'], 1.) # this requires its own set for terminals grammar.add_rule('SET', '%s', ['DETTERMINAL'], 1.0) grammar.add_rule('SET', '%s+%s', ['DETTERMINAL', 'SET'], 1.0) grammar.add_rule('PROB','prob_()', None,1.0) # Build up partitions before we have the terminals and strings # this way, partitions are mainly structural partitions = [] for t in grammar.enumerate(7): t = deepcopy(t) # just make sure it's a copy (may not be necessary) for n in t: setattr(n, "p_propose", 0.0) # add a tree attribute saying we can't propose partitions.append(t) for part in partitions: print part for t in TERMINALS: grammar.add_rule('DETTERMINAL', "'%s'"%t, None, TERMINAL_WEIGHT) # deterministic terminals participate in sets grammar.add_rule('EXPR', 'if_d', ['PROB','EXPR','EXPR'],1.0) grammar.add_rule('EXPR', 'cons_d', ['EXPR', 'EXPR'], 1.0/2.0)