def buildgrammar(self): g = Grammar() g.nonterminals = self.states g.terminals = self.symbols g.startsymbol = str(self.initialstate) tf = self.transitions if len(tf) > 0: for t in tf: if len(t) == 3: g.add_production(t[0], t[1] + t[2]) if t[2] in self.finalstates: g.add_production(t[0], t[1] + '') if g.starsymbol in self.finalstates: g.add_production(g.starsymbol, 'e') self.grammar = g print 'Nonterminals: ', self.grammar.nonterminals print 'Terminals: ', self.grammar.terminals print 'Start symbol: ', self.grammar.startsymbol print 'Productions: ', self.grammar.productions return
class TestGrammar(unittest.TestCase): def setUp(self): self.grammar = Grammar() def test_create_empty_grammar(self): grammar = Grammar() self.assertSetEqual(set(), grammar.productions()) def test_create_production(self): production = Production('S', 'aS') self.assertEqual('S', production.left()) self.assertEqual('aS', production.right()) def test_add_production(self): self.grammar.add_production(Production("S", "aS")) self.assertEqual(1, self.grammar.productions_quantity()) def test_grammar_conversion_ndfa_fa_aaab(self): # S -> aS | b self.grammar.add_production(Production('S', 'aS')) self.grammar.add_production(Production('S', 'b')) fa = self.grammar.to_finite_automaton() # Should accept self.assertEqual(True, fa.recognize_sentence('b')) self.assertEqual(True, fa.recognize_sentence('ab')) self.assertEqual(True, fa.recognize_sentence('aab')) self.assertEqual(True, fa.recognize_sentence('aaaaaaaaaaaaab')) # Shouldn't accept self.assertEqual(False, fa.recognize_sentence('')) self.assertEqual(False, fa.recognize_sentence('a')) self.assertEqual(False, fa.recognize_sentence('aa')) self.assertEqual(False, fa.recognize_sentence('aaaaaaaaaaaaa')) self.assertEqual(False, fa.recognize_sentence('ba')) self.assertEqual(False, fa.recognize_sentence('abb')) self.assertEqual(False, fa.recognize_sentence('abaaaaaaab')) def test_grammar_conversion_ndfa_fa_aabbccd(self): # S -> aS | bB # B -> bB | cC # C -> cC | d self.grammar.add_production(Production('S', 'aS')) self.grammar.add_production(Production('S', 'bB')) self.grammar.add_production(Production('B', 'bB')) self.grammar.add_production(Production('B', 'cC')) self.grammar.add_production(Production('C', 'cC')) self.grammar.add_production(Production('C', 'd')) fa = self.grammar.to_finite_automaton() # Should accept self.assertEqual(True, fa.recognize_sentence('abcd')) self.assertEqual(True, fa.recognize_sentence('bcd')) self.assertEqual(True, fa.recognize_sentence('bbbcccd')) self.assertEqual(True, fa.recognize_sentence('aaabbbcccd')) self.assertEqual(True, fa.recognize_sentence('aaaabccccd')) self.assertEqual(True, fa.recognize_sentence('aaaabcd')) # Shouldn't accept self.assertEqual(False, fa.recognize_sentence('')) self.assertEqual(False, fa.recognize_sentence('abc')) self.assertEqual(False, fa.recognize_sentence('acd')) self.assertEqual(False, fa.recognize_sentence('abd')) self.assertEqual(False, fa.recognize_sentence('aaaaabbbbbcccc')) self.assertEqual(False, fa.recognize_sentence('dabc')) self.assertEqual(False, fa.recognize_sentence('abdc')) self.assertEqual(False, fa.recognize_sentence('adbc')) self.assertEqual(False, fa.recognize_sentence('aadbbccd')) self.assertEqual(False, fa.recognize_sentence('dabcd')) self.assertEqual(False, fa.recognize_sentence('abcdd')) def test_grammar_conversion_ndfa_fa_ccababba(self): # S -> cS | cA # A -> aA | bA | a | b self.grammar.add_production(Production('S', 'cS')) self.grammar.add_production(Production('S', 'cA')) self.grammar.add_production(Production('A', 'aA')) self.grammar.add_production(Production('A', 'bA')) self.grammar.add_production(Production('A', 'a')) self.grammar.add_production(Production('A', 'b')) fa = self.grammar.to_finite_automaton() # Should accept self.assertEqual(True, fa.recognize_sentence('ca')) self.assertEqual(True, fa.recognize_sentence('cb')) self.assertEqual(True, fa.recognize_sentence('ccccca')) self.assertEqual(True, fa.recognize_sentence('cccccb')) self.assertEqual(True, fa.recognize_sentence('cab')) self.assertEqual(True, fa.recognize_sentence('cba')) self.assertEqual(True, fa.recognize_sentence('cbababba')) self.assertEqual(True, fa.recognize_sentence('ccccababaaaabbbbbbabaabaabbb')) # Shouldn't accept self.assertEqual(False, fa.recognize_sentence('')) self.assertEqual(False, fa.recognize_sentence('c')) self.assertEqual(False, fa.recognize_sentence('cccccc')) self.assertEqual(False, fa.recognize_sentence('a')) self.assertEqual(False, fa.recognize_sentence('b')) self.assertEqual(False, fa.recognize_sentence('babaaab')) self.assertEqual(False, fa.recognize_sentence('babababaabc')) self.assertEqual(False, fa.recognize_sentence('bababcabaab')) def test_text_to_grammar(self): text = "S -> aA | a | bS\nA -> aS | bA | b" grammar = Grammar.text_to_grammar(text) fa = grammar.to_finite_automaton() self.assertTrue(fa.recognize_sentence("babababbbbaa")) self.assertFalse(fa.recognize_sentence("abbbbaabaabbba")) def test_text_to_grammar_epsilon(self): text = "S -> aA\nA -> aS | bB\nB->bB | &" grammar = Grammar.text_to_grammar(text) fa = grammar.to_finite_automaton() self.assertTrue(fa.recognize_sentence("aaaaabbb")) self.assertFalse(fa.recognize_sentence("aaaabbb")) def test_text_to_grammar_epsilon_2(self): text = "S -> aS | a | bS | b" grammar = Grammar.text_to_grammar(text) fa = grammar.to_finite_automaton() self.assertTrue(fa.recognize_sentence("abbabaaababbabab")) self.assertFalse(fa.recognize_sentence("babbababcabab")) def test_text_to_grammar_2(self): text = "S->aA\nA->b|&" grammar = Grammar.text_to_grammar(text) fa = grammar.to_finite_automaton() #pdb.set_trace() fa.rename_states() self.assertTrue(fa.recognize_sentence("ab")) self.assertFalse(fa.recognize_sentence("b"))
def build(self): g = Grammar(self.tokens) for level, (assoc, terms) in enumerate(self.precedence, 1): for term in terms: g.set_precedence(term, assoc, level) for prod_name, syms, func, precedence in self.productions: g.add_production(prod_name, syms, func, precedence) g.set_start() for unused_term in g.unused_terminals(): warnings.warn( "Token %r is unused" % unused_term, ParserGeneratorWarning, stacklevel=2 ) for unused_prod in g.unused_productions(): warnings.warn( "Production %r is not reachable" % unused_prod, ParserGeneratorWarning, stacklevel=2 ) g.build_lritems() g.compute_first() g.compute_follow() # cache_dir = AppDirs("rply").user_cache_dir cache_file = cache_dir = 'zgrammar.txt' table = None if os.path.exists(cache_file): with open(cache_file, 'r') as f: data = json.load(f) if self.data_is_valid(g, data): table = LRTable.from_cache(g, data) if table is None: table = LRTable.from_grammar(g) serial = self.serialize_table(table) try: with open(cache_file, "w") as f: json.dump(serial, f) except IOError as e: print(e.message) if table.sr_conflicts: warnings.warn( "%d shift/reduce conflict%s" % ( len(table.sr_conflicts), "s" if len(table.sr_conflicts) > 1 else "" ), ParserGeneratorWarning, stacklevel=2, ) if table.rr_conflicts: warnings.warn( "%d reduce/reduce conflict%s" % ( len(table.rr_conflicts), "s" if len(table.rr_conflicts) > 1 else "" ), ParserGeneratorWarning, stacklevel=2, ) return LRParser(table, self.error_handler)
class _BNFParser: ''' bnf := prod end | prod bnf prod := nterm ':=' rhs syms := sym | sym syms rhs := syms | syms '|' rhs ''' class Token(Enum): END = 1 SYM = 2 KEYWORD = 3 def __init__(self, buf: str): self.pos = 0 self.tokens = self.tokenize(buf) self.grammar = Grammar() self.parse_bnf() self.fix_grammar() def peek(self): if self.pos < len(self.tokens): return self.tokens[self.pos] return (self.Token.END, None) def get(self, n=1): result = self.peek() self.pos += n return result def unget(self, n=1): self.pos -= n def fix_grammar(self): all_syms = set() for prodlist in self.grammar.prods.values(): for prod in prodlist: all_syms.update(prod.syms) self.grammar.terms = all_syms - self.grammar.prods.keys() - set('@') def parse_rhs(self): result = list() while True: token = self.get() if token[0] == self.Token.END: self.unget() return result if token[0] == self.Token.KEYWORD: if token[1] == '|': return result else: raise SyntaxError("Unexpected token " + str(token)) result.append(token[1]) def parse_prod(self): token = self.get() if token[0] != self.Token.SYM: raise SyntaxError("Nonterminal expected, got " + str(token)) nterm = token[1] if not self.grammar.start: self.grammar.start = nterm token = self.get() if token[0] != self.Token.KEYWORD or token[1] != ':=': raise SyntaxError("Keyword ':=' expected, got " + str(token)) while True: if self.peek()[0] == self.Token.END: return prod_list = self.parse_rhs() if not len(prod_list): raise SyntaxError("Empty right hand side of production " "for nonterminal " + nterm) self.grammar.add_production(nterm, prod_list) def parse_bnf(self): while True: token = self.peek() if token[0] == self.Token.END: if token[1] is None: return else: self.get() else: self.parse_prod() def tokenize(self, buf: str) -> list: result = list() import re regexp_space = re.compile(r"[ \t]+") regexps = ( (self.Token.KEYWORD, re.compile(r"\||:=")), (self.Token.SYM, re.compile(r"[^ \t\r\n]+")), (self.Token.END, re.compile(r"[\r\n]+")), ) i = 0 while i < len(buf): # Skip spaces m = regexp_space.match(buf, i) if m: i = m.end() if i == len(buf): break for token, regexp in regexps: m = regexp.match(buf, i) if m: if token == self.Token.SYM: if m.group() == r"'\''": result.append((self.Token.SYM, "'")) else: result.append((token, m.group())) else: result.append((token, m.group())) i = m.end() break else: raise SyntaxError("Unknown token at pos {} ({})".format( i, buf[i:i + 10])) return result
class Parser: def __init__(self, source): self.token = '' self.rule = [] self.rule_list = [] self.terminal_name = {} self.terminal_group = {} self.start_token = '' self.grammar = None self.l = Lexer(source) self.parse() def match(self, m): if m == self.token[0]: self.token = self.l.next() else: print 'match error', m def parse(self): self.grammar = Grammar() self.token = self.l.next() self.terminals() self.start() self.productions() def terminals(self): group = 0 while self.token[0] == 'TOKEN': self.match('TOKEN') while self.token[0] == 'TERM': self.grammar.add_terminal(self.token[1], group) self.match('TERM') group += 1 def start(self): self.match('START') self.start_token = self.token[1] self.match('NONTERM') def productions(self): self.match('BLOCK') while self.token[0] != 'BLOCK': self.left_side() self.grammar.start_token = self.start_token def left_side(self): self.rule_list = [] production_name = self.token[1] self.match('NONTERM') self.match('COLON') self.right_side() self.grammar.add_production(production_name, self.rule_list) self.match('SEMI') def right_side(self): rule = [] first = True while self.token[0] != 'SEMI': if self.token[0] == 'TERM': if first == True: first = False rule.append(self.token[1]) self.match('TERM') elif self.token[0] == 'NONTERM': if first == True: first = False rule.append(self.token[1]) self.match('NONTERM') elif self.token[0] == 'CHAR': if first == True: first = False rule.append(self.token[1]) self.match('CHAR') elif self.token[0] == 'PIPE': if first == True: self.rule_list.append(Rule([''])) first = False self.match('PIPE') else: self.rule_list.append(Rule(rule)) rule = [] self.match('PIPE') if self.token[0] == 'PIPE': self.rule_list.append(Rule([''])) self.match(self.token[0]) elif self.token[0] == 'SEMI': rule = [''] else: print 'right_side:', self.token self.rule_list.append(Rule(rule)) def printer(self, fo = None): pp = PrettyPrint(self.grammar) pp.printer(fo)