def __init__(self, camp, npc, pc, start, visualizer=None): self.camp = camp self.npc = npc self.pc = pc if not visualizer: visualizer = SimpleVisualizer() self.visualizer = visualizer self.root = None self.npc_offers = list() self.npc_grammar = grammar.Grammar() self.pc_grammar = grammar.Grammar() #self._get_dialogue_data() self.build(start)
def test_empty_words(self): grammar = gmr.Grammar(gmr.Rule('N', ['Nothing'], preterminal=True)) words = [] parser = psr.EarleyParser(grammar) trees = parser.parse(words) self.assertEqual(0, len(trees)) self.assertEqual([], trees)
def test_empty_grammar(self): grammar = gmr.Grammar() words = ['Something'] parser = psr.EarleyParser(grammar) trees = parser.parse(words) self.assertEqual(0, len(trees)) self.assertEqual([], trees)
def __init__(self): self.generation = 0 evolver.TIME = time.time() evolver.SAVE_BEST = True evolver.CODON_SIZE = 100 evolver.ELITE_SIZE = 1 evolver.POPULATION_SIZE = 35 evolver.GENERATION_SIZE = 35 evolver.FRONT_FOLDER = "frontData" evolver.GENERATIONS = 5 evolver.DEFAULT_FIT = 100000000000000 evolver.MUTATION_PROBABILITY = 0.015 evolver.CROSSOVER_PROBABILITY = 0.7 evolver.GRAMMAR_FILE = "grammars/jon_pylon10.bnf" evolver.FITNESS_FUNCTION = evolver.StructuralFitness() evolver.IMG_COUNTER = 0 self.pop_size = evolver.POPULATION_SIZE self.grammar = grammar.Grammar(evolver.GRAMMAR_FILE) self.individuals = evolver.initialise_population(evolver.POPULATION_SIZE) for idx, indiv in enumerate(self.individuals): indiv.uid = idx self.selection = lambda x: evolver.tournament_selection(x, evolver.POPULATION_SIZE) evolver.evaluate_fitness(self.individuals, self.grammar, evolver.FITNESS_FUNCTION) self.best_ever = min(self.individuals) self.fronts = [] self.individuals.sort() print "creating meshes" evolver.create_meshes(self.individuals) evolver.print_stats(1, self.individuals)
def main(): # gram1 = {"A": not_terminal.Not_terminal("A", [rule.Rule("B C"), rule.Rule("bad")]), # "B": not_terminal.Not_terminal("B", [rule.Rule("big C boss"), rule.Rule("epsilon")]), # "C": not_terminal.Not_terminal("C", [rule.Rule("cat"), rule.Rule("cow")])} # gram1 = {"S": not_terminal.Not_terminal("S", [rule.Rule("A uno B C"), rule.Rule("S dos")]), # "A": not_terminal.Not_terminal("A", [rule.Rule("B C D"), rule.Rule("A tres"), rule.Rule("epsilon")]), # "B": not_terminal.Not_terminal("B", [rule.Rule("D cuatro C tres"), rule.Rule("epsilon")]), # "C": not_terminal.Not_terminal("C", [rule.Rule("cinco D B"), rule.Rule("epsilon")]), # "D": not_terminal.Not_terminal("D", [rule.Rule("seis"), rule.Rule("epsilon")])} # gram1 = {"A": not_terminal.Not_terminal("A", [rule.Rule("ant C"), rule.Rule("B")]), # "B": not_terminal.Not_terminal("B", [rule.Rule("cat C"), rule.Rule("C")]), # "C": not_terminal.Not_terminal("C", [rule.Rule("D fat"), rule.Rule("D")]), # "D": not_terminal.Not_terminal("D", [rule.Rule("B")])} # gram1 = {"A": not_terminal.Not_terminal("A", [rule.Rule("ant C"), rule.Rule("B")]), # "B": not_terminal.Not_terminal("B", [rule.Rule("cat C"), rule.Rule("C")]), # "C": not_terminal.Not_terminal("C", [rule.Rule("fat D"), rule.Rule("D")]), # "D": not_terminal.Not_terminal("D", [rule.Rule("B")])} gramar = grammar.Grammar(gram1) # gramar = first(gramar) IMPORTANTE # first(gramar) # print(first_aux(gramar, "S")) first(gramar)
def main(fname): with open(fname, "r") as infile: tokens = scanner.scan(infile, rules) tokens.append(scanner.Symbol("$", "EOF", -1, -1, -1)) #print(tokens) g = grammar.Grammar(grammar_dict) lr_parse_common.augment_grammar(g) for rule in g.rules: if rule.to_node is None: rule.to_node = lambda rule, children: ast.ASTNode( rule.lhs, children) kernel = slr1.LR0Item(g.rules[-1], 0) first_set = first_follow.get_first(g) follow = first_follow.get_follow(g, first_set) dfa = lr_parse_common.make_dfa(g, slr1.closure, kernel, first_set) action, goto_table = slr1.make_parse_table(dfa, follow, g) ast_root = lr_parse_common.parse(dfa, action, goto_table, tokens, g) print(ast.gen_ast_digraph(ast_root)) gen_code = gen_ir.CodeGenVisitor(ast_root) gen_code.accept() with open(fname + ".ll", "w") as outfile: outfile.write(gen_code.get_code())
def test_initializer(self): grammar = gmr.Grammar(gmr.Rule('S', ['VP']), gmr.Rule('VP', ['V']), gmr.Rule('V', ['initialize'], preterminal=True)) self.assertIn(gmr.Rule('S', ['VP']), grammar) self.assertIn(gmr.Rule('VP', ['V']), grammar) self.assertIn(gmr.Rule('V', ['initialize'], preterminal=True), grammar) self.assertEqual(3, len(grammar))
def test_regex_rule(self): grammar = gmr.Grammar( gmr.Rule('S', [gmr.Regex(r'[a-z]')], preterminal=True)) words = ['hello'] parser = psr.EarleyParser(grammar) trees = parser.parse(words) self.assertEqual(1, len(trees)) self.assertEqual([['S', 'hello']], trees)
def grammarEnumeration(grammarFile, number, debug = False): g = grammar.Grammar(grammarFile) classification = g.classifyFirstNStrings(number, debug = debug) notInLang = sorted([k for k, v in classification.items() if not v], key=lambda x: (len(x),x)) isInLang = sorted([k for k, v in classification.items() if v], key=lambda x: (len(x), x)) print("In Language:") print('\t' + str(isInLang)) print("Not In Language:") print('\t' + str(notInLang))
def isInGrammar(grammarFile, testString, debug = False): g = grammar.Grammar(grammarFile) alphabet = g.getAlphabet() tokens = tokenizer.Tokenizer(tokenizer.getTTLForAlphabet(alphabet), True) tokens.tokenize(testString) if g.isInLanguage(tokens, debug): print("Test String in Language!") else: print("Test String NOT in Language!")
def lastgen(resultsfolder): PATHNAME = "results/"+resultsfolder print "pathname", PATHNAME filename = get_last_gen(PATHNAME) lastgen = int(filename.rstrip('.dat').lstrip('gen')) print "the last generation was:", lastgen parsed_pop = parse_pop(PATHNAME+'/'+filename) sorted_pop = sort_pop(parsed_pop) BNF_GRAMMAR = GRAMMAR.Grammar(GRAMMAR_FILE) INDIVIDUALS = reinitialise_pop(sorted_pop) FITNESS_FUNCTION = bwbfitness.CFD_Fitness(debug=True,foampng=False) evaluate_fitness(INDIVIDUALS, BNF_GRAMMAR, FITNESS_FUNCTION)
def load(self, gramfile): self.grammar = grammar.Grammar(gramfile) self.gramlist = [os.path.basename(gramfile)] # take basename of the gramfile as a for f in os.listdir(self.runtimedir): name, ext = os.path.splitext(f) if ext in ['.bgr']: os.unlink(os.path.join(self.runtimedir, f)) with open( os.path.join( self.runtimedir, os.path.extsep.join([os.path.basename(gramfile), 'bgr'])), 'wb') as o: cPickle.dump(self.grammar, o)
def constraint_test(): """ Verify constraint checking methods.""" import itertools, sys show_analysis = False #Generated via grammar gr = grammar.Grammar('grammars/test_constraints.bnf') inputs = ([1 for _ in range(100)], [ i%3 for i in range(100)]) for _input in inputs: output = gr.generate(_input) azr = analyser.Analyser('test',output['phenotype'],True) try: azr.create_graph() except ValueError as e: print(__name__, "ERROR", _input, e) continue azr.parse_graph() azr.apply_stresses() azr.create_slf_file() azr.test_slf_file() azr.parse_results() azr.print_stresses() if show_analysis: azr.show_analysis() #Fixed generated lengths = (1000, 10000) levels = (5, 10) for length_idx, level_idx in itertools.permutations([0,1]): try: GRAPH = constrained_offset_graph(lengths[length_idx], levels[length_idx]) except ValueError as e: print(__name__, "ERROR", lengths[length_idx], levels[length_idx], e) continue GRAPH.save_graph("pylon") print "nodes:", GRAPH.number_of_nodes() print "edges", GRAPH.number_of_edges() #will it blend? azr = analyser.Analyser('test',"moo",True) azr.my_graph = GRAPH azr.parse_graph() azr.apply_stresses() azr.create_slf_file() azr.test_slf_file() azr.parse_results() azr.print_stresses() if show_analysis: azr.show_analysis()
def test_ambiguity(self): grammar = gmr.Grammar( gmr.Rule('S', ['NP', 'VP']), gmr.Rule('NP', ['Det', 'Nominal']), gmr.Rule('NP', ['Det', 'Nominal', 'PP']), gmr.Rule('NP', ['Nominal']), gmr.Rule('VP', ['VP', 'PP']), gmr.Rule('VP', ['V', 'NP']), gmr.Rule('PP', ['Prep', 'NP']), gmr.Rule('Det', ['a'], preterminal=True), gmr.Rule('Nominal', ['I'], preterminal=True), gmr.Rule('Nominal', ['man'], preterminal=True), gmr.Rule('Nominal', ['telescope'], preterminal=True), gmr.Rule('V', ['saw'], preterminal=True), gmr.Rule('Prep', ['with'], preterminal=True)) words = ['I', 'saw', 'a', 'man', 'with', 'a', 'telescope'] parser = psr.EarleyParser(grammar) trees = parser.parse(words) self.assertEqual(2, len(trees)) self.assertEqual( [ # ... saw ... with a telescope [ 'S', ['NP', ['Nominal', 'I']], [ 'VP', [ 'VP', ['V', 'saw'], ['NP', ['Det', 'a'], ['Nominal', 'man']] ], [ 'PP', ['Prep', 'with'], ['NP', ['Det', 'a'], ['Nominal', 'telescope']] ] ] ], # ... man with a telescope [ 'S', ['NP', ['Nominal', 'I']], [ 'VP', ['V', 'saw'], [ 'NP', ['Det', 'a'], ['Nominal', 'man'], [ 'PP', ['Prep', 'with'], ['NP', ['Det', 'a'], ['Nominal', 'telescope']] ] ] ] ] ], trees)
def __init__(self): rospy.init_node("grammar_lu") with open("prohibited_words.txt", "r") as f: self.probibited_words = [line.strip() for line in f.readlines()] self.pub_results = rospy.Publisher('grammar_lu/results', String, queue_size=10) rospy.Subscriber("google_speech/recres_nbest", String, self.recog_callback) rospy.Subscriber("grammar_lu/grammar", String, self.set_gram) self.gram = grammar.Grammar() self.gram.load("grammar_sample.txt") rospy.spin()
def test_multiple_parses(self): grammar = gmr.Grammar(gmr.Rule('N', ['I'], preterminal=True), gmr.Rule('V', ['made'], preterminal=True), gmr.Rule('N', ['her'], preterminal=True), gmr.Rule('V', ['duck'], preterminal=True), gmr.Rule('N', ['duck'], preterminal=True), gmr.Rule('S', ['N', 'V', 'N', 'V']), gmr.Rule('S', ['N', 'V', 'N', 'N'])) words = ['I', 'made', 'her', 'duck'] parser = psr.EarleyParser(grammar) trees = parser.parse(words) self.assertEqual(2, len(trees)) self.assertEqual( [['S', ['N', 'I'], ['V', 'made'], ['N', 'her'], ['V', 'duck']], ['S', ['N', 'I'], ['V', 'made'], ['N', 'her'], ['N', 'duck']]], trees)
def main(): gram_dict = {'start': 'S->CC', 'other': ['C->cC|d']} gram = grammar.Grammar(gram_dict['start'], gram_dict['other']) gram.normalize() all_items, raw_goto = get_lr1_relation(gram) action_dict, goto_dict = get_parse_table(gram, all_items, raw_goto) print 'action_dict' for from_set, edges in action_dict.iteritems(): for token, to_set in edges.iteritems(): print from_set, token, to_set print 'goto_dict' for from_set, edges in goto_dict.iteritems(): for token, to_set in edges.iteritems(): print from_set, '-------', token, '-----', to_set return for itm in get_lr1_relation(gram): print itm
def test_programming_language_parsing(self): grammar = gmr.Grammar(gmr.Rule('program', ['variable', 'operator', 'value']), gmr.Rule('variable', [gmr.Regex(r'x')], preterminal=True), gmr.Rule('operator', [gmr.Regex(r'[+\-=*/]')], preterminal=True), gmr.Rule('value', [gmr.Regex(r'\d+')], preterminal=True), distinguished_symbol='program') words = ['x', '=', '599993949'] parser = psr.EarleyParser(grammar) trees = parser.parse(words) self.assertEqual([[ 'program', ['variable', 'x'], ['operator', '='], ['value', '599993949'] ]], trees)
def main(): if len(sys.argv) < 2: print "Please set a random seed" exit() else: random.seed(sys.argv[1]) PATHNAME = "results/run"+str(sys.argv[1]) if os.path.exists(PATHNAME): print "path already exists! going to overwrite results" exit() else: os.makedirs(PATHNAME) BNF_GRAMMAR = GRAMMAR.Grammar(GRAMMAR_FILE) INDIVIDUALS = initialise_population(POPULATION_SIZE) LAST_POP = search_loop(GENERATIONS, INDIVIDUALS, BNF_GRAMMAR, tournament_selection, FITNESS_FUNCTION, PATHNAME)
def main(): cwd = os.getcwd() if cwd.startswith('/ichec/home'): print "Run it from the work dir!" exit() runstarttime = time.time() if os.path.exists(PATHNAME): print "path already exists! going to overwrite results" exit() else: os.makedirs(PATHNAME) BNF_GRAMMAR = GRAMMAR.Grammar(GRAMMAR_FILE) INDIVIDUALS = initialise_population(POPULATION_SIZE) LAST_POP = search_loop(GENERATIONS, INDIVIDUALS, BNF_GRAMMAR, tournament_selection, FITNESS_FUNCTION, PATHNAME) endtime = time.time() - runstarttime print "total run took", endtime, "seconds"
def test_parse(self): grammar = gmr.Grammar( gmr.Rule('S', ['VP']), gmr.Rule('VP', ['V', 'NP']), gmr.Rule('NP', ['Det', 'Nominal']), gmr.Rule('Det', ['that'], preterminal=True), gmr.Rule('Nominal', ['flight'], preterminal=True), gmr.Rule('V', ['Book'], preterminal=True)) words = ['Book', 'that', 'flight'] parser = psr.EarleyParser(grammar) trees = parser.parse(words) self.assertEqual([[ 'S', [ 'VP', ['V', 'Book'], ['NP', ['Det', 'that'], ['Nominal', 'flight']] ] ]], trees)
def create_grammar(self, _input: str): lines = _input.split('\n') non_terminals = [] terminals = set() productions = {} for line in lines: line = line.split('->') head = line[0][:-1] body = line[1][1:] non_terminals.append(head) productions[head] = body.split(' | ') for body in productions.values(): for production in body: symbols = production.split(' ') for symbol in symbols: if symbol not in non_terminals: terminals.add(symbol) start = non_terminals[0] non_terminals = set(non_terminals) return grammar.Grammar(non_terminals, terminals, start, productions)
def set_cpp(): return gr.Grammar(set_rules(), set_nonterm(), set_term(), gr.Term("программа"))
print(file_) if file_ in [".DS_Store", ".DS_Storebinarized.txt"]: continue #print (file_) f = codecs.open(subfolder + file_, encoding="utf-8") for line in f: #print (line[:-1]) try: t = tree(string=line[:-1]) sent = t.sentence if "=" in sent: print(sent) continue #devo mettere l'altro livello di "binarizzazione" t.binarize() t.normalize() except Exception as e: print(file_, line) print(e.with_traceback()) rules.extend([grammar.fromTreetoRule(x) for x in t.allRules()]) print("") G = grammar.Grammar(rules) print(len(G.nonterminalrules)) print(len(G.terminalrules)) print(len(G.symbols)) pickle.dump(G, codecs.open("grammarPennTree5.txt", "wb"))
def grammarTest(inFile = '../ParserTongue/ebnf.ebnf'): ebnf = grammar.Grammar(inFile) for i, rule in enumerate(ebnf.getRuleList()): print("Rule " + str(i) + ":\n") print(str(rule)) print()
def grammarFileGenIters(grammarFile, iters, _debug = False): g = grammar.Grammar(grammarFile) grammarGenIters(g, iters, _debug)
def grammarFileGen(grammarFile, _debug = False): g = grammar.Grammar(grammarFile) grammarGen(g, _debug)
def parse(self, input): """Read and parse automat.""" self.index = 0 self.str = input self._line = 1 self._pos = 0 self._charLine = 1 self._charPos = 0 while True: # wait for keyword token = self._getToken() self._tShould(token, ['id', '']) keyword = token.string if token.type == '': break token = self._getToken() self._tShould(token, ['=']) if keyword == 'grammar': if self.grammar is not False: raise ValueError("Grammar is defined twice in this file.", 3) # new empty grammar self.grammar = grammar.Grammar() # wait for opening brackets token = self._getToken() self._tShould(token, ['(']) token = self._getToken() self._tShould(token, ['{']) # load non-terminals self._loadIdsArr(self.grammar.addNonTerminal) # comma and opening bracket token = self._getToken() self._tShould(token, [',']) token = self._getToken() self._tShould(token, ['{']) # load terminals self._loadCharArr(self.grammar.addTerminal) # comma and opening bracket token = self._getToken() self._tShould(token, [',']) token = self._getToken() self._tShould(token, ['{']) # load rules self._loadGrammarRules() # comma and one character token = self._getToken() self._tShould(token, [',']) token = self._getToken() self._tShould(token, ['id']) self.grammar.setStartSymbol(token.string) # closing bracket and comma - end of grammar token = self._getToken() self._tShould(token, [')']) elif keyword == 'automaton': if self.grammar is False: raise ValueError( "Automaton must " + " be defined after grammar.", 3) if self.automaton is not False: raise ValueError("Automaton is defined twice.", 3) self.automaton = True # new empty automat aut = automat.Automat() # automat alphabet are terminals and nonterminals from grammar for symbol in self.grammar.nonterminals: aut.addAlpha(symbol) for symbol in self.grammar.terminals: aut.addAlpha(symbol) # wait for opening brackets token = self._getToken() self._tShould(token, ['(']) token = self._getToken() self._tShould(token, ['{']) # load states self._loadIdsArr(aut.addState) # comma and opening bracket token = self._getToken() self._tShould(token, [',']) token = self._getToken() self._tShould(token, ['{']) # load rules self._loadAutomatRules(aut) # comma and start state token = self._getToken() self._tShould(token, [',']) token = self._getToken() if token.type != 'id': raise ValueError("Missing automat start state", 3) else: aut.setStart(token.string) # comma and opening bracket token = self._getToken() self._tShould(token, [',']) token = self._getToken() self._tShould(token, ['{']) self._loadIdsArr(aut.setTerminating) # closing bracket and nothing token = self._getToken() self._tShould(token, [')']) if not self.aut: self.aut = aut else: aut.join(self.aut) self.aut = aut elif keyword == 'precedence': if self.grammar is False: raise ValueError( "Precedence must be defined after" + " grammar.", 3) if self.prec is not False: raise ValueError("Precedence is defined twice.", 3) self.prec = PrecedenceTable() token = self._getToken() self._tShould(token, ['(']) while self.loadPrecedenceRules(): pass elif keyword == 'levels': if self.grammar is False: raise ValueError( "Levels must be defined after" + " grammar.", 3) if self.levels is not False: raise ValueError("Levels are defined twice.", 3) self.levels = True token = self._getToken() self._tShould(token, ['{']) aut = automat.Automat() # automat alphabet are terminals and nonterminals from grammar for symbol in self.grammar.nonterminals: aut.addAlpha(symbol) for symbol in self.grammar.terminals: aut.addAlpha(symbol) start = '0' aut.addState(start) aut.setStart(start) stringNum = 0 charNum = 0 while True: lastState = start token = self._getToken() self._tShould(token, ['}', ';', 'str']) if token.type == '}': break else: while True: if token.type == ';': aut.setTerminating(lastState) break newState = str(stringNum) + "-" + str(charNum) aut.addState(newState) aut.addRule(lastState, token.string, newState) lastState = newState token = self._getToken() self._tShould(token, ['str', ';']) charNum += 1 stringNum += 1 if not self.aut: self.aut = aut else: self.aut.join(aut) else: raise ValueError("Undefined keyword '" + keyword + "'", 3) if self.grammar is False: raise ValueError("No grammar specified in grammar input file.", 3)
def main(): # extract args p = args.pickle verbose = args.very_verbose or args.verbose check = args.check ambiguous = args.ambiguous mlps = args.most_likely_productions lower_case = args.lower_case test = args.test non_terms_for_ml = mlps.split() if mlps and mlps.__class__ != bool else ['VP', 'S', 'NP', 'SBAR', 'PP'] max_word_length = 15 # loading grammar if p: if verbose: util.log_g("Loading grammar from pickle file %s" % (p)) pkl_file = open(p, 'rb') G = pickle.load(pkl_file) pkl_file.close() else: if verbose: util.log_g("Loading grammar from treebank %s" % (args.treebank)) f = open(args.treebank, 'r') G = grammar.Grammar(f, args.grammar_limit, verbose, lower_case) f.close() if args.save: output = open(args.save + '.pkl', 'wb') pickle.dump(G, output) output.close() if verbose: util.log_g("Grammar loaded.") # running checks and statistics if check: util.log_g("Testing probability consistencies.") util.log_g("Greatest divergence from unity: %0.20f." % max([abs(1 - i) for i in G.check_pcfg_sums()])) if check or ambiguous: util.log_g("Ambiguous word tests.") ambig = G.ambiguous() ambig_words = zip(*ambig)[0] if ambig else [] if ambiguous and not ambiguous.__class__ == bool: for word in ambiguous.split(): if word in ambig_words: util.log_g("'%s' is ambiguous." % (word)) pprint.pprint(ambig[ambig_words.index(word)]) else: util.log_g("'%s' is not ambiguous." % (word)) else: util.log_g("4 randomly chosen syntactically ambiguous terminals:") pprint.pprint(ambig[0:4]) if check or mlps: util.log_g("Most likely production for non-terminals %s:" % non_terms_for_ml) mlps = G.most_likely_productions(non_terms_for_ml) pprint.pprint(mlps) # running CYK if args.cyk: if args.cyk.__class__ == bool: util.log_p("Enter new line to exit.") while True: s = raw_input('Enter a sentence to parse: ') if len(s): if verbose: util.log_p("Start CYK") parse = cyk.CYK(G, s, verbose, lower_case) if verbose > 1: util.log_p("Covering productions:") pprint.pprint(parse.covering_productions()) util.log_p("Covering productions string: %s" % parse.covering_productions_str()) util.log_p("Viterbi Parse: %s" % parse.viterbi_parse()) else: break else: f = open(args.cyk) limit = args.parser_test_limit start = args.parser_test_start i = 0 if test: f_vit = open('viterbi_sentences.txt', 'w') else: f_cov = open('covering_productions.txt', 'w') for line in f: if limit and i >= limit: break i += 1 if start and i < start: continue if max_word_length and len(line.split()) > max_word_length: out = "\n" if test: f_vit.write(out) else: f_cov.write(out) else: util.log_p("Sentence %d, parsing sentence: << %s >>" % (i, line.strip())) parse = cyk.CYK(G, line, verbose) # write parse results to output file if test: out = parse.viterbi_parse() if out == util.NOT_IN_GRAMMAR_ERROR: out = "\n" else: out += "\n" f_vit.write(out) else: out = parse.covering_productions_str() f_cov.write(out + "\n") if verbose: util.log_p("Wrote line: %s" % out) gc.collect() # collect cyk object f.close() if test: f_vit.close() else: f_cov.close()
def parse (spec, filename='stdin'): '''Construct a new Grammar from the specification SPEC.''' def error (msg): '''Prints MSG to stderr and exits with a non-zero error code.''' print >>sys.stderr, 'Error: %s'% (msg) sys.exit (1) def checkpoint (): '''Create a parser checkpoint.''' return (lexer.checkpoint (), len (stack)) def restore (checkpoint): '''Restore a parser checkpoint.''' lexer.restore (checkpoint[0]) stack.__setslice__(0, len (stack), stack[0:checkpoint[1]]) return True lexer = Tokenizer (spec, r'[ \n\r\t\v\f]+', r'//[^\n\r]*?(?:[\n\r]|$)') stack = [] # semantic stack g = grammar.Grammar () # the grammar to build def G (): while Declaration (): pass if not lexer.token ('%%'): error ('"%%" must separate declarations from rules') if not R (): error ('must have at least one rule') rule = stack.pop () g.setStartSymbol (rule.lhs) g.addRule (rule) while R (): rule = stack.pop () g.addRule (rule) return lexer.token ('$') != None def Declaration (): if AssocDecl (): pass elif ImportDecl (): pass elif IgnoreDecl (): pass elif OptDecl(): pass else: return False return True def AssocDecl (): if lexer.token ('%right'): assoc = grammar.Grammar.RIGHT_ASSOCIATIVE elif lexer.token ('%left'): assoc = grammar.Grammar.LEFT_ASSOCIATIVE else: return False if not Terminal (): error ('"associativity" decls require at least one operator') ops = [stack.pop ()] while Terminal (): ops.append (stack.pop ()) g.declareOperatorAssocs (ops, assoc) return True def ImportDecl (): if not lexer.token ('%import'): return False if not PyModuleName (): error ('"import" decls require a module name') module = stack.pop () g.declareImport (module) return True def IgnoreDecl (): if not lexer.token ('%ignore'): return False if not Terminal (): error ('"ignore" decls require a terminal symbol') term = stack.pop () g.declareIgnore (term) return True def OptDecl (): if not lexer.token ('%optional'): return False if not (Nonterminal () and Terminal ()): error ('invalid %optional decl') regex = stack.pop () lhs = stack.pop () g.declareOptional (lhs, regex) return True def R (): if not Nonterminal (): return False if not lexer.token (r'\->'): error ('rules LHSs must be followed by "->"') rule = grammar.Rule (stack.pop ()) if not Production (): error ('rule "{0}" has no productions'.format(rule.lhs)) (rhs, actions, prec, assoc, subsym) = stack.pop () rule.addProduction (rhs=rhs, actions=actions, prec=prec, assoc=assoc, subsym=subsym) while lexer.token (r'\|'): if not Production (): error ('(%s) "|" must be followed by a production'% (rule.lhs)) (rhs, actions, prec, assoc, subsym) = stack.pop () rule.addProduction (rhs=rhs, actions=actions, prec=prec, assoc=assoc, subsym=subsym) if not lexer.token (';'): error ('(%s) rules must be ended by ";"'% (rule.lhs)) stack.append (rule) return True def Production (): if not (EmptyProd () or NonEmptyProd ()): return False (rhs, prec, assoc, actions, subsym) = stack.pop () action = None if Action (): action = stack.pop () actions.append (action) if subsym and len(rhs) > 1: # can't subparse a multi-nonterminal RHS... error ('"subparse" requires a one-element RHS, but you\'ve got %s' % rhs) stack.append ((rhs, actions, prec, assoc, subsym)) return True def EmptyProd (): if not Epsilon (): return False stack.append (([stack.pop ()], -1, None, [None], False)) return True def NonEmptyProd (): if not ActionSymbol (): return False sym, action = stack.pop () rhs = [sym] actions = [action] prec = -1 assoc = None subsym = False while ActionSymbol (): sym, action = stack.pop () rhs.append (sym) actions.append (action) if PrecDecl (): prec = stack.pop () elif TempAssocDecl (): assoc = stack.pop () elif SubParse (): subsym = stack.pop () stack.append ((rhs, prec, assoc, actions, subsym)) return True def ActionSymbol (): cp = checkpoint () action = None if Action (): action = stack.pop () if not Symbol (): restore (cp) return False stack.append ((stack.pop (), action)) return True def PrecDecl (): if not lexer.token ('%dprec'): return False if not Number (): error ('"dprec" decls require a numeric precedence') return True def TempAssocDecl (): if not lexer.token ('%prec'): return False if not Terminal (): error ('"prec" decls require a terminal') return True def SubParse (): if not lexer.token ('%subparse'): return False stack.append (True) return True def Symbol (): return Terminal () or Nonterminal () def Terminal (): return String () or Regex () def Nonterminal (): match = lexer.token (r'[a-zA-Z][a-zA-Z0-9_]*') if not match: return False stack.append (match) return True def String (): match = lexer.token (r'\'.*?\'') if not match: return False stack.append (re.compile (re.escape (match[1:-1]))) return True def Regex (): match = lexer.token (r'/ (?: \\\\ | \\/ | [^/])* /') if not match: return False try: stack.append (re.compile (match[1:-1])) except: error ('invalid regular expression') return True def Epsilon (): if not lexer.token ('_'): return False stack.append (grammar.Grammar.EPSILON) return True def PyModuleName (): match = lexer.token ( r'[a-zA-Z_][a-zA-Z0-9_]* (?: \. [a-zA-Z_][a-zA-Z0-9_]*)*') if not match: return False stack.append (match) return True def Action (): match = lexer.token (r'%\{ (?: . | [\n\r])*? %\}') if not match: return False stack.append (match[2:-2]) return True def Number (): match = lexer.token (r'[0-9]+') if not match: return False try: stack.append (int (match)) except: error ('number too large') return True # And finally, build and return a Grammar if not G (): error ('invalid grammar') return g