def test_add_incorrect_pratt_parser(): grammar = Grammar() stmt_id = grammar.add_parselet('stmt', kind=ParseletKind.Pratt, result_type=SyntaxToken) expr_id = grammar.add_parselet('expr', kind=ParseletKind.Pratt, result_type=SyntaxToken) integer_id = grammar.add_token('Integer') with pytest.raises(GrammarError): grammar.add_parser(expr_id, make_optional(integer_id)) with pytest.raises(GrammarError): grammar.add_parser(expr_id, make_sequence(stmt_id)) with pytest.raises(GrammarError): grammar.add_parser(expr_id, make_sequence(expr_id, stmt_id)) with pytest.raises(GrammarError): grammar.add_parser(expr_id, make_sequence(expr_id, make_optional(stmt_id))) with pytest.raises(GrammarError): grammar.add_parser(expr_id, make_sequence(expr_id, expr_id))
def test_variables(): grammar = Grammar() name_id = grammar.add_token('Name') # name: Name comb = make_named('name', name_id) assert 'name' in comb.variables assert comb.variables['name'] == SyntaxToken # names: Name names: Name comb = make_sequence(make_named('names', name_id), make_named('names', name_id)) assert 'names' in comb.variables assert comb.variables['names'] == Sequence[SyntaxToken] # [ name: Name ] comb = make_optional(make_named('name', name_id)) assert 'name' in comb.variables assert comb.variables['name'] == Optional[SyntaxToken] # { name: Name } comb = make_repeat(make_named('names', name_id)) assert comb.variables['names'] == Sequence[SyntaxToken] # names: Name { names: Name } comb = make_sequence(make_named('names', name_id), make_repeat(make_named('names', name_id))) assert 'names' in comb.variables assert comb.variables['names'] == Sequence[SyntaxToken] # names: { Name } comb = make_named('names', make_repeat(name_id)) assert 'names' in comb.variables assert comb.variables['names'] == Sequence[SyntaxToken]
def test_add_packrat_parser(): grammar = Grammar() stmt_id = grammar.add_parselet('stmt', kind=ParseletKind.Packrat, result_type=SyntaxToken) star_id = grammar.add_implicit('*') assert grammar.add_parser( stmt_id, make_sequence(grammar.add_implicit('('), stmt_id, grammar.add_implicit(')'))) assert grammar.add_parser(stmt_id, make_sequence(grammar.add_implicit('('))) assert grammar.add_parser(stmt_id, star_id) assert grammar.add_parser(stmt_id, stmt_id)
def convert_node(grammar: Grammar, node: CombinatorNode, location: Location) -> Combinator: if isinstance(node, SequenceNode): return make_sequence(*(convert_node(grammar, child, location) for child in node.combinators)) if isinstance(node, RepeatNode): return make_repeat(convert_node(grammar, node.combinator, location)) if isinstance(node, OptionalNode): return make_optional(convert_node(grammar, node.combinator, location)) if isinstance(node, NamedNode): return make_named(node.name.value, convert_node(grammar, node.combinator, location)) if isinstance(node, ImplicitNode): token_id = grammar.add_implicit(ast.literal_eval(node.value.value), location=location) return make_token(token_id) if isinstance(node, ReferenceNode): name = node.name.value if name in grammar.tokens: if node.priority: raise DiagnosticError(location, f'Token combinator can not have priority') return make_token(grammar.tokens[name]) elif name in grammar.parselets: priority = node.priority and ast.literal_eval(node.priority.value) return make_parselet(grammar.parselets[name], priority) else: raise DiagnosticError(location, f"Not found symbol {name} in grammar") raise NotImplementedError(f'Not implemented conversion from node to combinator: {type(node).__name__}')
def test_make_sequence(): grammar = Grammar() name_id = grammar.add_token('Name') expr_id = grammar.add_parselet('expr') comb = make_sequence(name_id, expr_id) assert isinstance(comb, SequenceCombinator) assert len(comb) == 2 assert isinstance(comb[0], TokenCombinator) assert isinstance(comb[1], ParseletCombinator) assert comb.result_type == SyntaxNode
def test_add_pratt_parser(): grammar = Grammar() expr_id = grammar.add_parselet('expr', kind=ParseletKind.Pratt, result_type=SyntaxToken) integer_id = grammar.add_token('Integer') string_id = grammar.add_token('String') plus_id = grammar.add_implicit('+') star_id = grammar.add_implicit('*') table = cast(PrattTable, grammar.tables[expr_id]) assert table.prefix_tokens == set() assert grammar.add_parser(expr_id, integer_id) assert integer_id in table.prefix_tokens, "Cleanup of pratt table prefix tokens is not worked" assert grammar.add_parser(expr_id, make_named('value', string_id)) assert string_id in table.prefix_tokens, "Cleanup of pratt table prefix tokens is not worked" assert grammar.add_parser(expr_id, make_sequence(expr_id, plus_id, expr_id)) assert grammar.add_parser( expr_id, make_sequence(make_named('lhs', expr_id), make_named('op', star_id), expr_id))
def test_make_sequence_with_single_element(): grammar = Grammar() name_id = grammar.add_token('Name') comb = make_sequence(name_id) assert isinstance(comb, TokenCombinator)
def test_make_empty_sequence(): with pytest.raises(ValueError): make_sequence()
def create_combinator_grammar() -> Grammar: """ Create grammar for parse combinator definition P.S. This grammar is used for bootstrap process of initial grammar, e.g. definition of combinators in grammar """ grammar = Grammar() grammar.extend(create_core_grammar()) # tokens name_id = grammar.tokens['Name'] string_id = grammar.tokens['String'] number_id = grammar.tokens['Integer'] colon_id = grammar.add_implicit(':') parent_open_id = grammar.tokens['('] parent_close_id = grammar.tokens[')'] square_open_id = grammar.tokens['['] square_close_id = grammar.tokens[']'] curly_open_id = grammar.tokens['{'] curly_close_id = grammar.tokens['}'] less_id = grammar.tokens['<'] great_id = grammar.tokens['>'] # parse combinator definition comb_id = grammar.add_parselet('combinator', result_type=CombinatorNode) seq_id = grammar.add_parselet('combinator_sequence', result_type=SequenceNode) # combinator := name: Name ":" combinator=combinator ; named variable grammar.add_parser( comb_id, make_sequence(make_named('name', name_id), colon_id, make_named('combinator', comb_id)), make_ctor(NamedNode) ) # combinator := name: Name [ '<' priority: Number '>' ] ; reference to parselet or token grammar.add_parser( comb_id, make_sequence(make_named('name', name_id), make_optional(less_id, make_named('priority', number_id), great_id)), make_ctor(ReferenceNode) ) # combinator := value: String ; reference to implicit token grammar.add_parser(comb_id, make_named('value', string_id), make_ctor(ImplicitNode)) # combinator := '[' combinator: combinator_sequence ']' ; optional combinator grammar.add_parser( comb_id, make_sequence(square_open_id, make_named('combinator', seq_id), square_close_id), make_ctor(OptionalNode) ) # combinator := '{' combinator: combinator_sequence '}' ; repeat combinator grammar.add_parser( comb_id, make_sequence(curly_open_id, make_named('combinator', seq_id), curly_close_id), make_ctor(RepeatNode) ) # combinator := '(' combinator: combinator_sequence ')' ; parenthesis combinator grammar.add_parser( comb_id, make_sequence(parent_open_id, make_named('combinator', seq_id), parent_close_id), make_return_variable('combinator') ) # combinator_sequence := combinators:combinator combinators:{ combinator } ; sequence combinator grammar.add_parser( seq_id, make_sequence(make_named('combinators', comb_id), make_named('combinators', make_repeat(comb_id))), make_ctor(SequenceNode) ) return grammar