Ejemplo n.º 1
0
 def test_given_reference_followed_by_reference_then_lookahead_is_first_set(
         self) -> None:
     grammar = Grammar(Rule('A', ['B', 'X']), Rule('B', ['x']),
                       Rule('X', ['a']), Rule('X', ['b']))
     state = RuleState(0, 0, {'x'})
     self.assertEqual({RuleState(1, 0, {'a', 'b'})},
                      state.follow_states(grammar))
Ejemplo n.º 2
0
 def test_given_non_lr1_when_collision_then_error(self) -> None:
     from cmaj.parser.grammar import augment
     from cmaj.parser.graph import graph_for
     grammar = Grammar(Rule('X', ['0', 'X', '0']), Rule('X', ['1', 'X', '1']), Rule('X', ['0']), Rule('X', ['1']))
     grammar = augment(grammar, 'X')
     graph = graph_for(grammar)
     self.assertRaises(ConflictError, table_for, grammar, graph)
Ejemplo n.º 3
0
 def test_given_last_unprocessed_reference_then_lookaheads_of_parent(
         self) -> None:
     grammar = Grammar(Rule('A', ['x', 'B']), Rule('A', ['B', 'x']),
                       Rule('B', ['x']))
     state = RuleState(0, 1, {'a'})
     self.assertEqual({RuleState(2, 0, {'a'})},
                      state.follow_states(grammar))
Ejemplo n.º 4
0
 def test_given_single_rule_then_initial_and_final_state(self) -> None:
     from cmaj.testing.closure import closure
     grammar = Grammar(Rule('A', ['a']))
     v = [closure((1, 0, '$'), (0, 0, '$')),
          closure((1, 1, '$')),
          closure((0, 1, '$'))]
     e = [(0, 'A', 1), (0, 'a', 2)]
     self._given_grammar_then_correct_graph(grammar, 'A', v, e)
Ejemplo n.º 5
0
 def test_given_states_followed_by_terminals_then_complete_closure(
         self) -> None:
     grammar = Grammar(Rule('A', ['X', 'a']), Rule('A', ['A', 'X']),
                       Rule('X', ['x']))
     result = closure_for(grammar, RuleState(0, 1, {'$'}),
                          RuleState(1, 2, {'$'}))
     self.assertEqual({RuleState(0, 1, {'$'}),
                       RuleState(1, 2, {'$'})}, result)
Ejemplo n.º 6
0
 def test_given_grammar_with_terminal_and_valid_token_then_ast(
         self) -> None:
     grammar = augment(Grammar(Rule('A', ['a'])), 'A')
     graph = graph_for(grammar)
     table = table_for(grammar, graph)
     actual_root = parse(tokens('a'), grammar, table)
     self._assert_correct_tree(('A', ('a', )), actual_root)
     self.assertRaises(ParserError, parse, tokens('aa'), grammar, table)
Ejemplo n.º 7
0
def _rule_state_to_string(rule_state: RuleState,
                          grammar: Grammar) -> Tuple[str, str, str]:
    rule = grammar.rule_at(rule_state.rule_index)
    key = rule.key
    processed_str = ' '.join(rule.symbols[:rule_state.num_processed])
    unprocessed_str = ' '.join(rule.symbols[rule_state.num_processed:])
    lookaheads_str = ' '.join(
        [str(symbol) for symbol in rule_state.lookaheads])
    return f'{key}', f'{processed_str}⬤{unprocessed_str}', f'{lookaheads_str}'
Ejemplo n.º 8
0
 def test_given_count_grammar_when_00001111_then_ast(self) -> None:
     grammar = augment(
         Grammar(Rule('X', ['0', 'X', '1']), Rule('X', ['0', '1'])), 'X')
     graph = graph_for(grammar)
     table = table_for(grammar, graph)
     actual_root = parse(tokens('00001111'), grammar, table)
     expected_tree = ('X', '0', ('X', '0', ('X', '0', ('X', '0', '1'), '1'),
                                 '1'), '1')
     self._assert_correct_tree(expected_tree, actual_root)
     self.assertRaises(ParserError, parse, tokens('001'), grammar, table)
Ejemplo n.º 9
0
 def test_given_states_followed_by_recursion_then_closure_includes_all_lookaheads(
         self) -> None:
     grammar = Grammar(Rule('A', ['a', 'A']), Rule('A', ['A', 'A']))
     result = closure_for(grammar, RuleState(0, 1, {'$'}))
     expected = {
         RuleState(0, 1, {'$'}),
         RuleState(1, 0, {'$', 'a'}),
         RuleState(0, 0, {'$', 'a'})
     }
     self.assertEqual(expected, result)
Ejemplo n.º 10
0
 def test_given_states_followed_by_references_then_closure_includes_references(
         self) -> None:
     grammar = Grammar(Rule('A', ['x', 'B']), Rule('B', ['C']),
                       Rule('C', ['x']))
     result = closure_for(grammar, RuleState(0, 1, {'$'}))
     expected = {
         RuleState(0, 1, {'$'}),
         RuleState(1, 0, {'$'}),
         RuleState(2, 0, {'$'})
     }
     self.assertEqual(expected, result)
Ejemplo n.º 11
0
def compile_grammar(grammar_node: Node) -> Grammar:
    from cmaj.ast.simplify import squash, prune, skip
    grammar_node = squash(grammar_node, 'GRAMMAR', 'OPTION', 'SEQUENCE')
    grammar_node = prune(grammar_node, 'comment', '=', '|', 'eol')
    grammar_node = skip(grammar_node, 'LINE', 'ANCHOR')

    rules: List[Rule] = []
    for definition_node in grammar_node.children:
        identifier, option = definition_node.children
        rules += compile_rules(identifier.token.value, option)
    return Grammar(*rules)
Ejemplo n.º 12
0
 def test_given_lr0_grammar_then_correct_graph(self) -> None:
     from cmaj.testing.closure import closure
     grammar = Grammar(Rule('A', ['1']), Rule('A', ['A', 'B']),
                       Rule('B', ['0']), Rule('B', ['1']))
     v = [closure((4, 0, '$'), (0, 0, '01$'), (1, 0, '01$')),
          closure((4, 1, '$'), (1, 1, '01$'), (2, 0, '01$'), (3, 0, '01$')),
          closure((0, 1, '01$')),
          closure((1, 2, '01$')),
          closure((2, 1, '01$')),
          closure((3, 1, '01$'))]
     e = [(0, 'A', 1), (0, '1', 2), (1, 'B', 3), (1, '0', 4), (1, '1', 5)]
     self._given_grammar_then_correct_graph(grammar, 'A', v, e)
Ejemplo n.º 13
0
 def test_given_lookaheads_then_closure_simplifies_lookaheads(self) -> None:
     grammar = Grammar(Rule('A', ['a']), Rule('A', ['B', 'B']),
                       Rule('B', ['b']), Rule('B', ['A', 'B']))
     result = closure_for(grammar, RuleState(1, 1, {'x', 'y'}))
     expected = {
         RuleState(1, 1, {'x', 'y'}),
         RuleState(2, 0, {'x', 'y', 'b', 'a'}),
         RuleState(3, 0, {'x', 'y', 'b', 'a'}),
         RuleState(0, 0, {'b', 'a'}),
         RuleState(1, 0, {'b', 'a'})
     }
     self.assertEqual(expected, result)
Ejemplo n.º 14
0
    def test_given_slr_grammar_then_correct_table(self) -> None:
        from cmaj.parser.grammar import augment
        from cmaj.parser.graph import graph_for
        grammar = augment(Grammar(Rule('S', ['X', 'X']), Rule('X', ['a', 'X']), Rule('X', ['b'])), 'S')
        graph = graph_for(grammar)
        table = table_for(grammar, graph)

        s = [row_of(graph, 0, 0, Grammar.AUGMENTED_EOF),
             row_of(graph, 3, 1, Grammar.AUGMENTED_EOF),
             row_of(graph, 0, 1, Grammar.AUGMENTED_EOF),
             row_of(graph, 1, 1, 'a'),
             row_of(graph, 2, 1, 'a'),
             row_of(graph, 0, 2, Grammar.AUGMENTED_EOF),
             row_of(graph, 1, 1, Grammar.AUGMENTED_EOF),
             row_of(graph, 2, 1, Grammar.AUGMENTED_EOF),
             row_of(graph, 1, 2, 'a'),
             row_of(graph, 1, 2, Grammar.AUGMENTED_EOF)]

        count = sum(1 for row in range(10) for column in ['S', 'X', 'a', 'b', Grammar.AUGMENTED_EOF]
                    if table.action(row, column) is None)
        self.assertEqual(29, count)

        self.assertEqual(Action.shift(s[3]), table.action(s[0], 'a'))
        self.assertEqual(Action.shift(s[4]), table.action(s[0], 'b'))
        self.assertEqual(Action.goto(s[1]), table.action(s[0], 'S'))
        self.assertEqual(Action.goto(s[2]), table.action(s[0], 'X'))

        self.assertEqual(Action.accept(3), table.action(s[1], Grammar.AUGMENTED_EOF))

        self.assertEqual(Action.shift(s[6]), table.action(s[2], 'a'))
        self.assertEqual(Action.shift(s[7]), table.action(s[2], 'b'))
        self.assertEqual(Action.goto(s[5]), table.action(s[2], 'X'))

        self.assertEqual(Action.shift(s[3]), table.action(s[3], 'a'))
        self.assertEqual(Action.shift(s[4]), table.action(s[3], 'b'))
        self.assertEqual(Action.goto(s[8]), table.action(s[3], 'X'))

        self.assertEqual(Action.reduce(2), table.action(s[4], 'a'))
        self.assertEqual(Action.reduce(2), table.action(s[4], 'b'))

        self.assertEqual(Action.reduce(0), table.action(s[5], Grammar.AUGMENTED_EOF))

        self.assertEqual(Action.shift(s[6]), table.action(s[6], 'a'))
        self.assertEqual(Action.shift(s[7]), table.action(s[6], 'b'))
        self.assertEqual(Action.goto(s[9]), table.action(s[6], 'X'))

        self.assertEqual(Action.reduce(2), table.action(s[7], Grammar.AUGMENTED_EOF))

        self.assertEqual(Action.reduce(1), table.action(s[8], 'a'))
        self.assertEqual(Action.reduce(1), table.action(s[8], 'b'))

        self.assertEqual(Action.reduce(1), table.action(s[9], Grammar.AUGMENTED_EOF))
Ejemplo n.º 15
0
def meta_grammar() -> Grammar:
    from cmaj.parser.grammar import Rule, augment
    grammar = Grammar(Rule('GRAMMAR', ['LINE', 'GRAMMAR']),
                      Rule('GRAMMAR', ['LINE']),
                      Rule('LINE', ['DEFINITION', 'eol']),
                      Rule('LINE', ['comment', 'eol']), Rule('LINE', ['eol']),
                      Rule('DEFINITION', ['identifier', '=', 'OPTION']),
                      Rule('OPTION', ['SEQUENCE', '|', 'OPTION']),
                      Rule('OPTION', ['SEQUENCE']),
                      Rule('SEQUENCE', ['ANCHOR', 'SEQUENCE']),
                      Rule('SEQUENCE', ['ANCHOR']), Rule('ANCHOR', ['string']),
                      Rule('ANCHOR', ['identifier']))
    return augment(grammar, 'GRAMMAR')
Ejemplo n.º 16
0
 def test_given_arithmetic_grammar_when_1add1add1mul1add1_then_ast(
         self) -> None:
     grammar = augment(
         Grammar(Rule('ADD', ['ADD', '+', 'MUL']), Rule('ADD', ['MUL']),
                 Rule('MUL', ['MUL', '*', '1']), Rule('MUL', ['1'])), 'ADD')
     graph = graph_for(grammar)
     table = table_for(grammar, graph)
     actual_root = parse(tokens('1+1+1*1+1'), grammar, table)
     mul_one = ('MUL', '1')
     add_one = ('ADD', mul_one)
     expected_tree = ('ADD', ('ADD', ('ADD', add_one, '+', mul_one), '+',
                              ('MUL', mul_one, '*', '1')), '+', mul_one)
     self._assert_correct_tree(expected_tree, actual_root)
     self.assertRaises(ParserError, parse, tokens('11+1'), grammar, table)
Ejemplo n.º 17
0
    def test_given_grammar_and_graph_then_table_with_num_closures_rows_and_num_symbols_columns(self) -> None:
        from cmaj.parser.grammar import augment
        from cmaj.testing.closure import closure
        grammar = augment(Grammar(Rule('A', ['a']), Rule('B', ['b'])), 'A')

        graph = ClosureGraph()
        c1 = closure((0, 0, '$'))
        c2 = closure((1, 0, '$'))
        graph.add_edge(c1, 'a', c2)
        graph.add_edge(c2, 'b', c1)

        table = table_for(grammar, graph)
        self.assertEqual(2, table.num_rows)
        self.assertEqual(5, table.num_columns)
Ejemplo n.º 18
0
 def test_given_states_in_cycles_then_closure_includes_all_lookaheads(
         self) -> None:
     grammar = Grammar(Rule('C', ['A']), Rule('A', ['B', 'B']),
                       Rule('B', ['C']), Rule('C', ['c']), Rule('A', ['a']),
                       Rule('B', ['b']))
     result = closure_for(grammar, RuleState(1, 1, {'$'}))
     expected = {
         RuleState(1, 1, {'$'}),
         RuleState(2, 0, {'$', 'c', 'a', 'b'}),
         RuleState(5, 0, {'$', 'c', 'a', 'b'}),
         RuleState(0, 0, {'$', 'c', 'a', 'b'}),
         RuleState(3, 0, {'$', 'c', 'a', 'b'}),
         RuleState(1, 0, {'$', 'c', 'a', 'b'}),
         RuleState(4, 0, {'$', 'c', 'a', 'b'})
     }
     self.assertEqual(expected, result)
Ejemplo n.º 19
0
 def test_given_slr_grammar_then_correct_graph(self) -> None:
     from cmaj.testing.closure import closure
     grammar = Grammar(Rule('S', ['X', 'X']),
                       Rule('X', ['a', 'X']), Rule('X', ['b']))
     v = [closure((3, 0, '$'), (0, 0, '$'), (1, 0, 'ab'), (2, 0, 'ab')),
          closure((3, 1, '$')),
          closure((0, 1, '$'), (1, 0, '$'), (2, 0, '$')),
          closure((1, 1, 'ab'), (1, 0, 'ab'), (2, 0, 'ab')),
          closure((2, 1, 'ab')),
          closure((0, 2, '$')),
          closure((1, 1, '$'), (1, 0, '$'), (2, 0, '$')),
          closure((2, 1, '$')),
          closure((1, 2, 'ab')),
          closure((1, 2, '$'))]
     e = [(0, 'S', 1), (0, 'X', 2), (0, 'a', 3), (0, 'b', 4),
          (2, 'X', 5), (2, 'a', 6), (2, 'b', 7),
          (3, 'X', 8), (3, 'a', 3), (3, 'b', 4),
          (6, 'X', 9), (6, 'a', 6), (6, 'b', 7)]
     self._given_grammar_then_correct_graph(grammar, 'S', v, e)
Ejemplo n.º 20
0
def parse(tokens: List[Node], grammar: Grammar, table: ParseTable) -> Node:
    from cmaj.parser.table import Action
    assert table.num_rows > 0
    stack: Stack = []
    row = 0
    tokens = tokens + [Node(Grammar.AUGMENTED_EOF)]
    token_index = 0
    while True:
        token = tokens[token_index]
        action = table.action(row, token.key)
        if action is None:
            raise ParserError(f'Unexpected token: {tokens[token_index]!r}')
        elif action.key == Action.ACCEPT:
            break
        elif action.key == Action.SHIFT:
            stack.append((row, tokens[token_index]))
            row = action.index
            token_index += 1
        elif action.key == Action.GOTO:
            row = action.index
        elif action.key == Action.REDUCE:
            rule_index = action.index
            rule = grammar.rule_at(rule_index)

            stack, row, nodes = _reduce_stack(stack, rule)
            node = _reduce_nodes(nodes, rule)
            stack.append((row, node))

            action = table.action(row, node.key)
            assert action.key == Action.GOTO
            row = action.index
        else:
            raise ParserError(f'Unexpected parser action {action!r} for token: {tokens[token_index]!r}')

    if len(stack) != 1:
        raise ParserError(f'Found unprocessed tokens: {_to_symbols(stack)!r}')
    return stack[0][1]
Ejemplo n.º 21
0
 def test_given_cycle_then_first_cycle_breaking_terminals(self) -> None:
     grammar = Grammar(Rule('A', ['B']), Rule('A', ['a']), Rule('B', ['A']), Rule('B', ['b']))
     first_set = grammar.first(['A'])
     self.assertEqual({'a', 'b'}, first_set)
Ejemplo n.º 22
0
 def test_given_unprocessed_reference_then_initial_reference_states(
         self) -> None:
     grammar = Grammar(Rule('A', ['B']), Rule('B', ['b']))
     state = RuleState(0, 0, {'b'})
     self.assertEqual({RuleState(1, 0, {'b'})},
                      state.follow_states(grammar))
Ejemplo n.º 23
0
 def test_given_unprocessed_terminal_then_no_follow_states(self) -> None:
     grammar = Grammar(Rule('A', ['a']))
     state = RuleState(0, 0, {'a'})
     self.assertEqual(set(), state.follow_states(grammar))
Ejemplo n.º 24
0
 def test_given_unprocessed_reference_then_not_reducible_and_next_symbol_is_reference(
         self) -> None:
     grammar = Grammar(Rule('A', ['a', 'A']))
     state = resolve(RuleState(0, 1, {'$'}), grammar)
     self.assertFalse(state.reducible)
     self.assertEqual('A', state.next_symbol)
Ejemplo n.º 25
0
 def test_given_grammar_with_single_rule_then_complete_closure(
         self) -> None:
     grammar = Grammar(Rule('X', ['x']))
     result = closure_for(grammar, RuleState(0, 0, {'$'}))
     self.assertEqual({RuleState(0, 0, {'$'})}, result)
Ejemplo n.º 26
0
 def test_given_state_followed_by_two_references_then_lookahead_of_reference(
         self) -> None:
     grammar = Grammar(Rule('A', ['B', 'B']), Rule('B', ['b']))
     result = closure_for(grammar, RuleState(0, 0, {'$'}))
     self.assertEqual({RuleState(0, 0, {'$'}),
                       RuleState(1, 0, {'b'})}, result)
Ejemplo n.º 27
0
 def test_given_reference_followed_by_recursion_then_lookahead_is_first_set(
         self) -> None:
     grammar = Grammar(Rule('X', ['a']), Rule('X', ['b', 'X', 'X']))
     state = RuleState(1, 1, {'x'})
     expected = {RuleState(0, 0, {'a', 'b'}), RuleState(1, 0, {'a', 'b'})}
     self.assertEqual(expected, state.follow_states(grammar))
Ejemplo n.º 28
0
 def test_given_start_is_terminal_when_augment_then_error(self) -> None:
     from cmaj.parser.grammar import augment
     grammar = Grammar()
     self.assertRaises(AssertionError, augment, grammar, 'S')
Ejemplo n.º 29
0
 def test_given_processed_rule_then_reducible(self) -> None:
     grammar = Grammar(Rule('A', ['a', 'A']))
     state = resolve(RuleState(0, 2, {'$'}), grammar)
     self.assertTrue(state.reducible)
Ejemplo n.º 30
0
 def test_given_augmented_eof_in_grammar_when_augment_then_error(self) -> None:
     from cmaj.parser.grammar import augment
     grammar = Grammar(Rule('S', [Grammar.AUGMENTED_EOF]))
     self.assertRaises(AssertionError, augment, grammar, 'S')