def parse_bnf(text, epsilon='ε', eof='$'): """ Parse BNF from text :param text: grammar especification :param epsilon: empty symbol :param eof: EOF symbol :return: a grammar Productions use the following format: Start -> A A -> ( A ) | Two Two -> a Two -> b """ try: productions = [ p for p in text.strip().split('\n') if not p.startswith('#') ] start = productions[0].split( '->')[0].strip() # First rule as starting symbol g = Grammar(start=start, epsilon=epsilon, eof=eof) for r in productions: head, body = [x.strip() for x in r.split('->')] productions = [p.strip() for p in body.split('|')] productions_tokenized = [tuple(p.split()) for p in productions] for p in productions_tokenized: g.add_rule(Rule(head, p)) return g except ValueError: raise InvalidGrammar("Invalid grammar", text)
def __remove_left_factoring(grammar): new_grammar = Grammar(start=grammar.start, epsilon=grammar.epsilon, eof=grammar.eof) new_productions = [] for nonterminal in grammar.nonterminals: productions = grammar.productions_for(nonterminal) if len(productions) > 1: prefixes = get_prefixes(productions) for prefix, v in prefixes.items(): if (len(v) == 1): new_productions.append(Rule(nonterminal, tuple(v[0]))) continue new_x = __generate_key(grammar, nonterminal) body = [prefix] + [new_x] new_productions.append(Rule(nonterminal, tuple(body))) for prod in v: if not prod: new_productions.append( Rule(new_x, tuple([grammar.epsilon]))) else: new_productions.append(Rule(new_x, tuple(prod))) else: new_productions.append(Rule(nonterminal, tuple(productions[0]))) for prod in new_productions: new_grammar.add_rule(prod) return __normalize_productions(new_grammar)
def test_grammar(self): gramm = Grammar.from_string('S -> "a" A \n A -> "b"') self.assertSetEqual(gramm.get_non_terminals(), {'S', 'A'}) self.assertSetEqual(gramm.get_terminals(), {'"a"', '"b"'}) self.assertTrue(gramm.is_regular()) prods = gramm.get_productions() self.assertIn(Production("S", ('"a"', 'A')), prods) gramm = Grammar.from_string( 'Sassas -> "salutarea lume" ABSOLUT "bine" | "ana" \n ABSO_aLUT -> "basfsaffas"' ) self.assertSetEqual(gramm.get_non_terminals(), {'Sassas', 'ABSO_aLUT'}) self.assertSetEqual( gramm.get_terminals(), {'"salutarea lume"', '"basfsaffas"', '"bine"', '"ana"'})
def remove_left_recursion(g): """ Remove all left recursions from grammar :param g: input grammar :return: equivalent grammar with no left-recursions """ temp_grammar = copy(g) new_grammar = Grammar(start=temp_grammar.start, epsilon=temp_grammar.epsilon, eof=temp_grammar.eof) nonterminals = nonterminal_ordering(temp_grammar) for i in range(0, len(nonterminals)): ai = nonterminals[i] for j in range(0, i): aj = nonterminals[j] for p_ai in temp_grammar.productions[ai]: # For each production of the form Ai -> Aj y if p_ai.body and aj == p_ai.body[0]: replaced_productions = [ Rule(ai, p_aj.body + p_ai.body[1:]) for p_aj in temp_grammar.productions[aj] ] can_remove_productions = any( map(lambda x: x.is_left_recursive(), replaced_productions)) # Replace productions only if there were left-recursive ones if can_remove_productions: temp_grammar.remove_rule(p_ai) for p in replaced_productions: temp_grammar.add_rule(p) new_productions = remove_immediate_left_recursion(temp_grammar, ai) for p in new_productions: new_grammar.add_rule(p) return __normalize_productions(new_grammar)
def test_state_creation(self): gramm = Grammar.from_string('E -> E "+" T | T \n T -> "(" E ")" | "id"') config_sets, transitions = state_automaton_from_grammar(gramm, 'E') self.assertEqual(len(config_sets), 9) self.assertEqual(sum(map(len, transitions.values())), 14) actions, gotos = table_from_grammar(gramm, 'E') indiv_acts = [item for row in actions.values() for item in row.values()] # get the number of actions self.assertEqual(sum((act['action'] == 'SHIFT' for act in indiv_acts)), 9) self.assertEqual(sum((act['action'] == 'ACCEPT' for act in indiv_acts)), 1) self.assertEqual(sum((act['action'] == 'REDUCE' for act in indiv_acts)), 20) indiv_gotos = [item for row in gotos.values() for item in row.values()] self.assertEqual(len(indiv_gotos), 5)
def setUp(self): self.a = Grammar(start='X') self.b = Grammar(start='X') self.c = Grammar(start='X') self.a.add_rule(Rule('X', ('hello', 'Y'))) self.a.add_rule(Rule('Y', ('world Z', ))) self.a.add_rule(Rule('Z', ('?', ))) self.a.add_rule(Rule('Z', ('!', ))) # B will have productions in different order self.b.add_rule(Rule('Y', ('world Z', ))) self.b.add_rule(Rule('Z', ('!', ))) self.b.add_rule(Rule('Z', ('?', ))) self.b.add_rule(Rule('X', ('hello', 'Y'))) self.c.add_rule(Rule('X', ('bye', 'Y'))) self.c.add_rule(Rule('Y', ('cruel world', )))
def test_parsing(self): gramm = Grammar.from_string('E -> E "+" T | T \n T -> "(" E ")" | "id"') parser = parser_from_grammar(gramm, 'E') parser.parse(['"id"', '"+"', '"("', '"id"', '")"']) derivs = parser.derivations expected_derivs = list( reversed([Production(non_terminal='T', symbols=('"id"',)), Production(non_terminal='E', symbols=('T',)), Production(non_terminal='T', symbols=('"id"',)), Production(non_terminal='E', symbols=('T',)), Production(non_terminal='T', symbols=('"("', 'E', '")"')), Production(non_terminal='E', symbols=('E', '"+"', 'T'))])) self.assertTrue(parser.is_parsed) self.assertListEqual(derivs, expected_derivs) parser.parse(['"id"']) derivs = parser.derivations expected_derivs = list(reversed([Production(non_terminal='T', symbols=('"id"',)), Production(non_terminal='E', symbols=('T',))])) self.assertTrue(parser.is_parsed) self.assertListEqual(derivs, expected_derivs) parser.parse(['"id"', '"+"']) derivs = parser.derivations self.assertFalse(parser.is_parsed)
class TestGrammarEquality(unittest.TestCase): """Test Grammar equality.""" def setUp(self): self.a = Grammar(start='X') self.b = Grammar(start='X') self.c = Grammar(start='X') self.a.add_rule(Rule('X', ('hello', 'Y'))) self.a.add_rule(Rule('Y', ('world Z', ))) self.a.add_rule(Rule('Z', ('?', ))) self.a.add_rule(Rule('Z', ('!', ))) # B will have productions in different order self.b.add_rule(Rule('Y', ('world Z', ))) self.b.add_rule(Rule('Z', ('!', ))) self.b.add_rule(Rule('Z', ('?', ))) self.b.add_rule(Rule('X', ('hello', 'Y'))) self.c.add_rule(Rule('X', ('bye', 'Y'))) self.c.add_rule(Rule('Y', ('cruel world', ))) def test_equal(self): self.assertEqual(self.a, self.b) self.assertEqual(self.b, self.a) def test_unequal(self): self.assertNotEqual(self.a, self.c) self.assertNotEqual(self.b, self.c)
def test_simple(self): a = Grammar(start='E') a.add_rule(Rule('E', ('E', '+', 'T'))) a.add_rule(Rule('E', ('T', ))) a.add_rule(Rule('T', ('T', '*', 'F'))) a.add_rule(Rule('T', ('F', ))) a.add_rule(Rule('F', ('(', 'E', ')'))) a.add_rule(Rule('F', ('id', ))) text = str(a) g = f.parse_bnf(text) self.assertEqual(a, g)
def test_parsing(self): h = Grammar(start='P') h.add_rule(Rule('P', ('D', ))) h.add_rule(Rule('D', ('T', ':', 'id', ';', 'D'))) h.add_rule(Rule('D', ('ε', ))) h.add_rule(Rule('T', ('real', ))) h.add_rule(Rule('T', ('int', ))) self.assertEqual(self.g, h)
def make_minilang_parser(): """Initializes an LR0 parser for the minilanguage""" gramm = Grammar.from_string(GRAMMAR) return parser_from_grammar(gramm, 'program')