def test_given_reference_followed_by_reference_then_lookahead_is_first_set( self) -> None: grammar = Grammar(Rule('A', ['B', 'X']), Rule('B', ['x']), Rule('X', ['a']), Rule('X', ['b'])) state = RuleState(0, 0, {'x'}) self.assertEqual({RuleState(1, 0, {'a', 'b'})}, state.follow_states(grammar))
def test_given_non_lr1_when_collision_then_error(self) -> None: from cmaj.parser.grammar import augment from cmaj.parser.graph import graph_for grammar = Grammar(Rule('X', ['0', 'X', '0']), Rule('X', ['1', 'X', '1']), Rule('X', ['0']), Rule('X', ['1'])) grammar = augment(grammar, 'X') graph = graph_for(grammar) self.assertRaises(ConflictError, table_for, grammar, graph)
def test_given_last_unprocessed_reference_then_lookaheads_of_parent( self) -> None: grammar = Grammar(Rule('A', ['x', 'B']), Rule('A', ['B', 'x']), Rule('B', ['x'])) state = RuleState(0, 1, {'a'}) self.assertEqual({RuleState(2, 0, {'a'})}, state.follow_states(grammar))
def test_given_single_rule_then_initial_and_final_state(self) -> None: from cmaj.testing.closure import closure grammar = Grammar(Rule('A', ['a'])) v = [closure((1, 0, '$'), (0, 0, '$')), closure((1, 1, '$')), closure((0, 1, '$'))] e = [(0, 'A', 1), (0, 'a', 2)] self._given_grammar_then_correct_graph(grammar, 'A', v, e)
def test_given_states_followed_by_terminals_then_complete_closure( self) -> None: grammar = Grammar(Rule('A', ['X', 'a']), Rule('A', ['A', 'X']), Rule('X', ['x'])) result = closure_for(grammar, RuleState(0, 1, {'$'}), RuleState(1, 2, {'$'})) self.assertEqual({RuleState(0, 1, {'$'}), RuleState(1, 2, {'$'})}, result)
def test_given_grammar_with_terminal_and_valid_token_then_ast( self) -> None: grammar = augment(Grammar(Rule('A', ['a'])), 'A') graph = graph_for(grammar) table = table_for(grammar, graph) actual_root = parse(tokens('a'), grammar, table) self._assert_correct_tree(('A', ('a', )), actual_root) self.assertRaises(ParserError, parse, tokens('aa'), grammar, table)
def _rule_state_to_string(rule_state: RuleState, grammar: Grammar) -> Tuple[str, str, str]: rule = grammar.rule_at(rule_state.rule_index) key = rule.key processed_str = ' '.join(rule.symbols[:rule_state.num_processed]) unprocessed_str = ' '.join(rule.symbols[rule_state.num_processed:]) lookaheads_str = ' '.join( [str(symbol) for symbol in rule_state.lookaheads]) return f'{key}', f'{processed_str}⬤{unprocessed_str}', f'{lookaheads_str}'
def test_given_count_grammar_when_00001111_then_ast(self) -> None: grammar = augment( Grammar(Rule('X', ['0', 'X', '1']), Rule('X', ['0', '1'])), 'X') graph = graph_for(grammar) table = table_for(grammar, graph) actual_root = parse(tokens('00001111'), grammar, table) expected_tree = ('X', '0', ('X', '0', ('X', '0', ('X', '0', '1'), '1'), '1'), '1') self._assert_correct_tree(expected_tree, actual_root) self.assertRaises(ParserError, parse, tokens('001'), grammar, table)
def test_given_states_followed_by_recursion_then_closure_includes_all_lookaheads( self) -> None: grammar = Grammar(Rule('A', ['a', 'A']), Rule('A', ['A', 'A'])) result = closure_for(grammar, RuleState(0, 1, {'$'})) expected = { RuleState(0, 1, {'$'}), RuleState(1, 0, {'$', 'a'}), RuleState(0, 0, {'$', 'a'}) } self.assertEqual(expected, result)
def test_given_states_followed_by_references_then_closure_includes_references( self) -> None: grammar = Grammar(Rule('A', ['x', 'B']), Rule('B', ['C']), Rule('C', ['x'])) result = closure_for(grammar, RuleState(0, 1, {'$'})) expected = { RuleState(0, 1, {'$'}), RuleState(1, 0, {'$'}), RuleState(2, 0, {'$'}) } self.assertEqual(expected, result)
def compile_grammar(grammar_node: Node) -> Grammar: from cmaj.ast.simplify import squash, prune, skip grammar_node = squash(grammar_node, 'GRAMMAR', 'OPTION', 'SEQUENCE') grammar_node = prune(grammar_node, 'comment', '=', '|', 'eol') grammar_node = skip(grammar_node, 'LINE', 'ANCHOR') rules: List[Rule] = [] for definition_node in grammar_node.children: identifier, option = definition_node.children rules += compile_rules(identifier.token.value, option) return Grammar(*rules)
def test_given_lr0_grammar_then_correct_graph(self) -> None: from cmaj.testing.closure import closure grammar = Grammar(Rule('A', ['1']), Rule('A', ['A', 'B']), Rule('B', ['0']), Rule('B', ['1'])) v = [closure((4, 0, '$'), (0, 0, '01$'), (1, 0, '01$')), closure((4, 1, '$'), (1, 1, '01$'), (2, 0, '01$'), (3, 0, '01$')), closure((0, 1, '01$')), closure((1, 2, '01$')), closure((2, 1, '01$')), closure((3, 1, '01$'))] e = [(0, 'A', 1), (0, '1', 2), (1, 'B', 3), (1, '0', 4), (1, '1', 5)] self._given_grammar_then_correct_graph(grammar, 'A', v, e)
def test_given_lookaheads_then_closure_simplifies_lookaheads(self) -> None: grammar = Grammar(Rule('A', ['a']), Rule('A', ['B', 'B']), Rule('B', ['b']), Rule('B', ['A', 'B'])) result = closure_for(grammar, RuleState(1, 1, {'x', 'y'})) expected = { RuleState(1, 1, {'x', 'y'}), RuleState(2, 0, {'x', 'y', 'b', 'a'}), RuleState(3, 0, {'x', 'y', 'b', 'a'}), RuleState(0, 0, {'b', 'a'}), RuleState(1, 0, {'b', 'a'}) } self.assertEqual(expected, result)
def test_given_slr_grammar_then_correct_table(self) -> None: from cmaj.parser.grammar import augment from cmaj.parser.graph import graph_for grammar = augment(Grammar(Rule('S', ['X', 'X']), Rule('X', ['a', 'X']), Rule('X', ['b'])), 'S') graph = graph_for(grammar) table = table_for(grammar, graph) s = [row_of(graph, 0, 0, Grammar.AUGMENTED_EOF), row_of(graph, 3, 1, Grammar.AUGMENTED_EOF), row_of(graph, 0, 1, Grammar.AUGMENTED_EOF), row_of(graph, 1, 1, 'a'), row_of(graph, 2, 1, 'a'), row_of(graph, 0, 2, Grammar.AUGMENTED_EOF), row_of(graph, 1, 1, Grammar.AUGMENTED_EOF), row_of(graph, 2, 1, Grammar.AUGMENTED_EOF), row_of(graph, 1, 2, 'a'), row_of(graph, 1, 2, Grammar.AUGMENTED_EOF)] count = sum(1 for row in range(10) for column in ['S', 'X', 'a', 'b', Grammar.AUGMENTED_EOF] if table.action(row, column) is None) self.assertEqual(29, count) self.assertEqual(Action.shift(s[3]), table.action(s[0], 'a')) self.assertEqual(Action.shift(s[4]), table.action(s[0], 'b')) self.assertEqual(Action.goto(s[1]), table.action(s[0], 'S')) self.assertEqual(Action.goto(s[2]), table.action(s[0], 'X')) self.assertEqual(Action.accept(3), table.action(s[1], Grammar.AUGMENTED_EOF)) self.assertEqual(Action.shift(s[6]), table.action(s[2], 'a')) self.assertEqual(Action.shift(s[7]), table.action(s[2], 'b')) self.assertEqual(Action.goto(s[5]), table.action(s[2], 'X')) self.assertEqual(Action.shift(s[3]), table.action(s[3], 'a')) self.assertEqual(Action.shift(s[4]), table.action(s[3], 'b')) self.assertEqual(Action.goto(s[8]), table.action(s[3], 'X')) self.assertEqual(Action.reduce(2), table.action(s[4], 'a')) self.assertEqual(Action.reduce(2), table.action(s[4], 'b')) self.assertEqual(Action.reduce(0), table.action(s[5], Grammar.AUGMENTED_EOF)) self.assertEqual(Action.shift(s[6]), table.action(s[6], 'a')) self.assertEqual(Action.shift(s[7]), table.action(s[6], 'b')) self.assertEqual(Action.goto(s[9]), table.action(s[6], 'X')) self.assertEqual(Action.reduce(2), table.action(s[7], Grammar.AUGMENTED_EOF)) self.assertEqual(Action.reduce(1), table.action(s[8], 'a')) self.assertEqual(Action.reduce(1), table.action(s[8], 'b')) self.assertEqual(Action.reduce(1), table.action(s[9], Grammar.AUGMENTED_EOF))
def meta_grammar() -> Grammar: from cmaj.parser.grammar import Rule, augment grammar = Grammar(Rule('GRAMMAR', ['LINE', 'GRAMMAR']), Rule('GRAMMAR', ['LINE']), Rule('LINE', ['DEFINITION', 'eol']), Rule('LINE', ['comment', 'eol']), Rule('LINE', ['eol']), Rule('DEFINITION', ['identifier', '=', 'OPTION']), Rule('OPTION', ['SEQUENCE', '|', 'OPTION']), Rule('OPTION', ['SEQUENCE']), Rule('SEQUENCE', ['ANCHOR', 'SEQUENCE']), Rule('SEQUENCE', ['ANCHOR']), Rule('ANCHOR', ['string']), Rule('ANCHOR', ['identifier'])) return augment(grammar, 'GRAMMAR')
def test_given_arithmetic_grammar_when_1add1add1mul1add1_then_ast( self) -> None: grammar = augment( Grammar(Rule('ADD', ['ADD', '+', 'MUL']), Rule('ADD', ['MUL']), Rule('MUL', ['MUL', '*', '1']), Rule('MUL', ['1'])), 'ADD') graph = graph_for(grammar) table = table_for(grammar, graph) actual_root = parse(tokens('1+1+1*1+1'), grammar, table) mul_one = ('MUL', '1') add_one = ('ADD', mul_one) expected_tree = ('ADD', ('ADD', ('ADD', add_one, '+', mul_one), '+', ('MUL', mul_one, '*', '1')), '+', mul_one) self._assert_correct_tree(expected_tree, actual_root) self.assertRaises(ParserError, parse, tokens('11+1'), grammar, table)
def test_given_grammar_and_graph_then_table_with_num_closures_rows_and_num_symbols_columns(self) -> None: from cmaj.parser.grammar import augment from cmaj.testing.closure import closure grammar = augment(Grammar(Rule('A', ['a']), Rule('B', ['b'])), 'A') graph = ClosureGraph() c1 = closure((0, 0, '$')) c2 = closure((1, 0, '$')) graph.add_edge(c1, 'a', c2) graph.add_edge(c2, 'b', c1) table = table_for(grammar, graph) self.assertEqual(2, table.num_rows) self.assertEqual(5, table.num_columns)
def test_given_states_in_cycles_then_closure_includes_all_lookaheads( self) -> None: grammar = Grammar(Rule('C', ['A']), Rule('A', ['B', 'B']), Rule('B', ['C']), Rule('C', ['c']), Rule('A', ['a']), Rule('B', ['b'])) result = closure_for(grammar, RuleState(1, 1, {'$'})) expected = { RuleState(1, 1, {'$'}), RuleState(2, 0, {'$', 'c', 'a', 'b'}), RuleState(5, 0, {'$', 'c', 'a', 'b'}), RuleState(0, 0, {'$', 'c', 'a', 'b'}), RuleState(3, 0, {'$', 'c', 'a', 'b'}), RuleState(1, 0, {'$', 'c', 'a', 'b'}), RuleState(4, 0, {'$', 'c', 'a', 'b'}) } self.assertEqual(expected, result)
def test_given_slr_grammar_then_correct_graph(self) -> None: from cmaj.testing.closure import closure grammar = Grammar(Rule('S', ['X', 'X']), Rule('X', ['a', 'X']), Rule('X', ['b'])) v = [closure((3, 0, '$'), (0, 0, '$'), (1, 0, 'ab'), (2, 0, 'ab')), closure((3, 1, '$')), closure((0, 1, '$'), (1, 0, '$'), (2, 0, '$')), closure((1, 1, 'ab'), (1, 0, 'ab'), (2, 0, 'ab')), closure((2, 1, 'ab')), closure((0, 2, '$')), closure((1, 1, '$'), (1, 0, '$'), (2, 0, '$')), closure((2, 1, '$')), closure((1, 2, 'ab')), closure((1, 2, '$'))] e = [(0, 'S', 1), (0, 'X', 2), (0, 'a', 3), (0, 'b', 4), (2, 'X', 5), (2, 'a', 6), (2, 'b', 7), (3, 'X', 8), (3, 'a', 3), (3, 'b', 4), (6, 'X', 9), (6, 'a', 6), (6, 'b', 7)] self._given_grammar_then_correct_graph(grammar, 'S', v, e)
def parse(tokens: List[Node], grammar: Grammar, table: ParseTable) -> Node: from cmaj.parser.table import Action assert table.num_rows > 0 stack: Stack = [] row = 0 tokens = tokens + [Node(Grammar.AUGMENTED_EOF)] token_index = 0 while True: token = tokens[token_index] action = table.action(row, token.key) if action is None: raise ParserError(f'Unexpected token: {tokens[token_index]!r}') elif action.key == Action.ACCEPT: break elif action.key == Action.SHIFT: stack.append((row, tokens[token_index])) row = action.index token_index += 1 elif action.key == Action.GOTO: row = action.index elif action.key == Action.REDUCE: rule_index = action.index rule = grammar.rule_at(rule_index) stack, row, nodes = _reduce_stack(stack, rule) node = _reduce_nodes(nodes, rule) stack.append((row, node)) action = table.action(row, node.key) assert action.key == Action.GOTO row = action.index else: raise ParserError(f'Unexpected parser action {action!r} for token: {tokens[token_index]!r}') if len(stack) != 1: raise ParserError(f'Found unprocessed tokens: {_to_symbols(stack)!r}') return stack[0][1]
def test_given_cycle_then_first_cycle_breaking_terminals(self) -> None: grammar = Grammar(Rule('A', ['B']), Rule('A', ['a']), Rule('B', ['A']), Rule('B', ['b'])) first_set = grammar.first(['A']) self.assertEqual({'a', 'b'}, first_set)
def test_given_unprocessed_reference_then_initial_reference_states( self) -> None: grammar = Grammar(Rule('A', ['B']), Rule('B', ['b'])) state = RuleState(0, 0, {'b'}) self.assertEqual({RuleState(1, 0, {'b'})}, state.follow_states(grammar))
def test_given_unprocessed_terminal_then_no_follow_states(self) -> None: grammar = Grammar(Rule('A', ['a'])) state = RuleState(0, 0, {'a'}) self.assertEqual(set(), state.follow_states(grammar))
def test_given_unprocessed_reference_then_not_reducible_and_next_symbol_is_reference( self) -> None: grammar = Grammar(Rule('A', ['a', 'A'])) state = resolve(RuleState(0, 1, {'$'}), grammar) self.assertFalse(state.reducible) self.assertEqual('A', state.next_symbol)
def test_given_grammar_with_single_rule_then_complete_closure( self) -> None: grammar = Grammar(Rule('X', ['x'])) result = closure_for(grammar, RuleState(0, 0, {'$'})) self.assertEqual({RuleState(0, 0, {'$'})}, result)
def test_given_state_followed_by_two_references_then_lookahead_of_reference( self) -> None: grammar = Grammar(Rule('A', ['B', 'B']), Rule('B', ['b'])) result = closure_for(grammar, RuleState(0, 0, {'$'})) self.assertEqual({RuleState(0, 0, {'$'}), RuleState(1, 0, {'b'})}, result)
def test_given_reference_followed_by_recursion_then_lookahead_is_first_set( self) -> None: grammar = Grammar(Rule('X', ['a']), Rule('X', ['b', 'X', 'X'])) state = RuleState(1, 1, {'x'}) expected = {RuleState(0, 0, {'a', 'b'}), RuleState(1, 0, {'a', 'b'})} self.assertEqual(expected, state.follow_states(grammar))
def test_given_start_is_terminal_when_augment_then_error(self) -> None: from cmaj.parser.grammar import augment grammar = Grammar() self.assertRaises(AssertionError, augment, grammar, 'S')
def test_given_processed_rule_then_reducible(self) -> None: grammar = Grammar(Rule('A', ['a', 'A'])) state = resolve(RuleState(0, 2, {'$'}), grammar) self.assertTrue(state.reducible)
def test_given_augmented_eof_in_grammar_when_augment_then_error(self) -> None: from cmaj.parser.grammar import augment grammar = Grammar(Rule('S', [Grammar.AUGMENTED_EOF])) self.assertRaises(AssertionError, augment, grammar, 'S')