예제 #1
0
def parse_bnf(text, epsilon='ε', eof='$'):
    """
    Parse BNF from text
    :param text: grammar especification
    :param epsilon: empty symbol
    :param eof: EOF symbol
    :return: a grammar

    Productions use the following format:

    Start -> A
    A -> ( A ) | Two
    Two -> a
    Two -> b
    """
    try:
        productions = [
            p for p in text.strip().split('\n') if not p.startswith('#')
        ]
        start = productions[0].split(
            '->')[0].strip()  # First rule as starting symbol
        g = Grammar(start=start, epsilon=epsilon, eof=eof)

        for r in productions:
            head, body = [x.strip() for x in r.split('->')]
            productions = [p.strip() for p in body.split('|')]
            productions_tokenized = [tuple(p.split()) for p in productions]
            for p in productions_tokenized:
                g.add_rule(Rule(head, p))

        return g
    except ValueError:
        raise InvalidGrammar("Invalid grammar", text)
예제 #2
0
def __remove_left_factoring(grammar):
    new_grammar = Grammar(start=grammar.start,
                          epsilon=grammar.epsilon,
                          eof=grammar.eof)

    new_productions = []

    for nonterminal in grammar.nonterminals:

        productions = grammar.productions_for(nonterminal)
        if len(productions) > 1:
            prefixes = get_prefixes(productions)
            for prefix, v in prefixes.items():
                if (len(v) == 1):
                    new_productions.append(Rule(nonterminal, tuple(v[0])))
                    continue
                new_x = __generate_key(grammar, nonterminal)
                body = [prefix] + [new_x]
                new_productions.append(Rule(nonterminal, tuple(body)))
                for prod in v:
                    if not prod:
                        new_productions.append(
                            Rule(new_x, tuple([grammar.epsilon])))
                    else:
                        new_productions.append(Rule(new_x, tuple(prod)))
        else:
            new_productions.append(Rule(nonterminal, tuple(productions[0])))

    for prod in new_productions:
        new_grammar.add_rule(prod)
    return __normalize_productions(new_grammar)
예제 #3
0
    def test_grammar(self):
        gramm = Grammar.from_string('S -> "a" A \n A -> "b"')

        self.assertSetEqual(gramm.get_non_terminals(), {'S', 'A'})
        self.assertSetEqual(gramm.get_terminals(), {'"a"', '"b"'})
        self.assertTrue(gramm.is_regular())

        prods = gramm.get_productions()
        self.assertIn(Production("S", ('"a"', 'A')), prods)

        gramm = Grammar.from_string(
            'Sassas -> "salutarea lume" ABSOLUT  "bine" | "ana" \n ABSO_aLUT -> "basfsaffas"'
        )
        self.assertSetEqual(gramm.get_non_terminals(), {'Sassas', 'ABSO_aLUT'})
        self.assertSetEqual(
            gramm.get_terminals(),
            {'"salutarea lume"', '"basfsaffas"', '"bine"', '"ana"'})
예제 #4
0
def remove_left_recursion(g):
    """
    Remove all left recursions from grammar
    :param g: input grammar
    :return: equivalent grammar with no left-recursions
    """
    temp_grammar = copy(g)
    new_grammar = Grammar(start=temp_grammar.start,
                          epsilon=temp_grammar.epsilon,
                          eof=temp_grammar.eof)
    nonterminals = nonterminal_ordering(temp_grammar)

    for i in range(0, len(nonterminals)):
        ai = nonterminals[i]
        for j in range(0, i):
            aj = nonterminals[j]
            for p_ai in temp_grammar.productions[ai]:
                # For each production of the form Ai -> Aj y
                if p_ai.body and aj == p_ai.body[0]:
                    replaced_productions = [
                        Rule(ai, p_aj.body + p_ai.body[1:])
                        for p_aj in temp_grammar.productions[aj]
                    ]
                    can_remove_productions = any(
                        map(lambda x: x.is_left_recursive(),
                            replaced_productions))
                    # Replace productions only if there were left-recursive ones
                    if can_remove_productions:
                        temp_grammar.remove_rule(p_ai)
                        for p in replaced_productions:
                            temp_grammar.add_rule(p)

        new_productions = remove_immediate_left_recursion(temp_grammar, ai)
        for p in new_productions:
            new_grammar.add_rule(p)

    return __normalize_productions(new_grammar)
예제 #5
0
    def test_state_creation(self):
        gramm = Grammar.from_string('E -> E "+" T | T \n T -> "(" E ")" | "id"')
        config_sets, transitions = state_automaton_from_grammar(gramm, 'E')
        self.assertEqual(len(config_sets), 9)
        self.assertEqual(sum(map(len, transitions.values())), 14)

        actions, gotos = table_from_grammar(gramm, 'E')
        indiv_acts = [item for row in actions.values() for item in row.values()]

        # get the number of actions
        self.assertEqual(sum((act['action'] == 'SHIFT' for act in indiv_acts)), 9)
        self.assertEqual(sum((act['action'] == 'ACCEPT' for act in indiv_acts)), 1)
        self.assertEqual(sum((act['action'] == 'REDUCE' for act in indiv_acts)), 20)

        indiv_gotos = [item for row in gotos.values() for item in row.values()]
        self.assertEqual(len(indiv_gotos), 5)
예제 #6
0
    def setUp(self):
        self.a = Grammar(start='X')
        self.b = Grammar(start='X')
        self.c = Grammar(start='X')

        self.a.add_rule(Rule('X', ('hello', 'Y')))
        self.a.add_rule(Rule('Y', ('world Z', )))
        self.a.add_rule(Rule('Z', ('?', )))
        self.a.add_rule(Rule('Z', ('!', )))

        # B will have productions in different order
        self.b.add_rule(Rule('Y', ('world Z', )))
        self.b.add_rule(Rule('Z', ('!', )))
        self.b.add_rule(Rule('Z', ('?', )))
        self.b.add_rule(Rule('X', ('hello', 'Y')))

        self.c.add_rule(Rule('X', ('bye', 'Y')))
        self.c.add_rule(Rule('Y', ('cruel world', )))
예제 #7
0
    def test_parsing(self):
        gramm = Grammar.from_string('E -> E "+" T | T \n T -> "(" E ")" | "id"')
        parser = parser_from_grammar(gramm, 'E')

        parser.parse(['"id"', '"+"', '"("', '"id"', '")"'])
        derivs = parser.derivations
        expected_derivs = list(
            reversed([Production(non_terminal='T', symbols=('"id"',)), Production(non_terminal='E', symbols=('T',)),
                      Production(non_terminal='T', symbols=('"id"',)), Production(non_terminal='E', symbols=('T',)),
                      Production(non_terminal='T', symbols=('"("', 'E', '")"')),
                      Production(non_terminal='E', symbols=('E', '"+"', 'T'))]))
        self.assertTrue(parser.is_parsed)
        self.assertListEqual(derivs, expected_derivs)

        parser.parse(['"id"'])
        derivs = parser.derivations
        expected_derivs = list(reversed([Production(non_terminal='T', symbols=('"id"',)),
                                         Production(non_terminal='E', symbols=('T',))]))
        self.assertTrue(parser.is_parsed)
        self.assertListEqual(derivs, expected_derivs)

        parser.parse(['"id"', '"+"'])
        derivs = parser.derivations
        self.assertFalse(parser.is_parsed)
예제 #8
0
class TestGrammarEquality(unittest.TestCase):
    """Test Grammar equality."""
    def setUp(self):
        self.a = Grammar(start='X')
        self.b = Grammar(start='X')
        self.c = Grammar(start='X')

        self.a.add_rule(Rule('X', ('hello', 'Y')))
        self.a.add_rule(Rule('Y', ('world Z', )))
        self.a.add_rule(Rule('Z', ('?', )))
        self.a.add_rule(Rule('Z', ('!', )))

        # B will have productions in different order
        self.b.add_rule(Rule('Y', ('world Z', )))
        self.b.add_rule(Rule('Z', ('!', )))
        self.b.add_rule(Rule('Z', ('?', )))
        self.b.add_rule(Rule('X', ('hello', 'Y')))

        self.c.add_rule(Rule('X', ('bye', 'Y')))
        self.c.add_rule(Rule('Y', ('cruel world', )))

    def test_equal(self):
        self.assertEqual(self.a, self.b)
        self.assertEqual(self.b, self.a)

    def test_unequal(self):
        self.assertNotEqual(self.a, self.c)
        self.assertNotEqual(self.b, self.c)
예제 #9
0
 def test_simple(self):
     a = Grammar(start='E')
     a.add_rule(Rule('E', ('E', '+', 'T')))
     a.add_rule(Rule('E', ('T', )))
     a.add_rule(Rule('T', ('T', '*', 'F')))
     a.add_rule(Rule('T', ('F', )))
     a.add_rule(Rule('F', ('(', 'E', ')')))
     a.add_rule(Rule('F', ('id', )))
     text = str(a)
     g = f.parse_bnf(text)
     self.assertEqual(a, g)
예제 #10
0
 def test_parsing(self):
     h = Grammar(start='P')
     h.add_rule(Rule('P', ('D', )))
     h.add_rule(Rule('D', ('T', ':', 'id', ';', 'D')))
     h.add_rule(Rule('D', ('ε', )))
     h.add_rule(Rule('T', ('real', )))
     h.add_rule(Rule('T', ('int', )))
     self.assertEqual(self.g, h)
예제 #11
0
def make_minilang_parser():
    """Initializes an LR0 parser for the minilanguage"""
    gramm = Grammar.from_string(GRAMMAR)
    return parser_from_grammar(gramm, 'program')