def test_extend_implicit_grammar(): grammar1 = Grammar() grammar1.add_implicit('(') result = Grammar() result.extend(grammar1) assert result.patterns[0].token_id == result.tokens['('] assert result.patterns[0].priority == -len('(') assert result.patterns[0].is_implicit
def test_add_brackets(): grammar = Grammar() open_id = grammar.add_implicit('(') close_id = grammar.add_implicit(')') assert grammar.brackets == set() assert grammar.open_brackets == set() assert grammar.close_brackets == set() grammar.add_brackets(open_id, close_id) assert grammar.brackets == {(open_id, close_id)} assert grammar.open_brackets == {open_id} assert grammar.close_brackets == {close_id} assert grammar.bracket_pairs[open_id] == close_id
def test_extend_brackets_grammar(): grammar1 = Grammar() grammar1.add_brackets(grammar1.add_implicit('('), grammar1.add_implicit(')')) grammar2 = Grammar() grammar2.add_brackets(grammar2.add_implicit('('), grammar2.add_implicit(')')) grammar2.add_brackets(grammar2.add_implicit('['), grammar2.add_implicit(']')) result = Grammar.merge(grammar1, grammar2) assert result.brackets == {(result.tokens['['], result.tokens[']']), (result.tokens['('], result.tokens[')'])}
def create_core_grammar() -> Grammar: """ This function is used for initialize default grammar """ grammar = Grammar() grammar.add_pattern(grammar.add_token('Comment'), RE_COMMENT) grammar.add_pattern(grammar.add_token('Whitespace'), RE_WHITESPACE) grammar.add_pattern(grammar.add_token('Name'), RE_NAME) grammar.add_pattern(grammar.add_token('NewLine'), RE_NEWLINE) grammar.add_pattern(grammar.add_token('String'), RE_STRING_SINGLE) grammar.add_pattern(grammar.add_token('String'), RE_STRING_DOUBLE) grammar.add_pattern(grammar.add_token('Integer'), RE_NUMBER_BINARY) grammar.add_pattern(grammar.add_token('Integer'), RE_NUMBER_OCTAL) grammar.add_pattern(grammar.add_token('Integer'), RE_NUMBER_DECIMAL) grammar.add_pattern(grammar.add_token('Integer'), RE_NUMBER_HEXADECIMAL) grammar.add_pattern(grammar.add_token('Float'), RE_FLOAT_POINT) grammar.add_pattern(grammar.add_token('Float'), RE_FLOAT_EXPONENT) grammar.add_implicit('(') grammar.add_implicit(')') grammar.add_implicit('[') grammar.add_implicit(']') grammar.add_implicit('{') grammar.add_implicit('}') grammar.add_implicit('<') grammar.add_implicit('>') grammar.add_trivia(grammar.tokens['Comment']) grammar.add_trivia(grammar.tokens['Whitespace']) grammar.add_brackets(grammar.tokens['('], grammar.tokens[')']) grammar.add_brackets(grammar.tokens['['], grammar.tokens[']']) grammar.add_brackets(grammar.tokens['{'], grammar.tokens['}']) return grammar
def test_add_packrat_parser(): grammar = Grammar() stmt_id = grammar.add_parselet('stmt', kind=ParseletKind.Packrat, result_type=SyntaxToken) star_id = grammar.add_implicit('*') assert grammar.add_parser( stmt_id, make_sequence(grammar.add_implicit('('), stmt_id, grammar.add_implicit(')'))) assert grammar.add_parser(stmt_id, make_sequence(grammar.add_implicit('('))) assert grammar.add_parser(stmt_id, star_id) assert grammar.add_parser(stmt_id, stmt_id)
def convert_node(grammar: Grammar, node: CombinatorNode, location: Location) -> Combinator: if isinstance(node, SequenceNode): return make_sequence(*(convert_node(grammar, child, location) for child in node.combinators)) if isinstance(node, RepeatNode): return make_repeat(convert_node(grammar, node.combinator, location)) if isinstance(node, OptionalNode): return make_optional(convert_node(grammar, node.combinator, location)) if isinstance(node, NamedNode): return make_named(node.name.value, convert_node(grammar, node.combinator, location)) if isinstance(node, ImplicitNode): token_id = grammar.add_implicit(ast.literal_eval(node.value.value), location=location) return make_token(token_id) if isinstance(node, ReferenceNode): name = node.name.value if name in grammar.tokens: if node.priority: raise DiagnosticError(location, f'Token combinator can not have priority') return make_token(grammar.tokens[name]) elif name in grammar.parselets: priority = node.priority and ast.literal_eval(node.priority.value) return make_parselet(grammar.parselets[name], priority) else: raise DiagnosticError(location, f"Not found symbol {name} in grammar") raise NotImplementedError(f'Not implemented conversion from node to combinator: {type(node).__name__}')
def test_add_implicit_token(): grammar = Grammar() token_id = grammar.add_implicit('+') assert token_id.name == '+' assert token_id.description == '+' assert token_id.is_implicit assert '+' in grammar.tokens assert len(grammar.patterns) == 1 pattern = grammar.patterns[0] assert pattern.token_id == token_id assert pattern.pattern == re.compile(re.escape('+')) assert pattern.priority < 0 assert pattern.is_implicit
def grammar() -> Grammar: grammar = Grammar() whitespace_id = grammar.add_pattern(grammar.add_token('Whitespace'), r'\s+') grammar.add_trivia(whitespace_id) grammar.add_pattern(grammar.add_token('Number'), r'[0-9]+') grammar.add_pattern(grammar.add_token('Name'), r'[a-zA-Z_][a-zA-Z0-9]+') grammar.add_implicit("for") grammar.add_implicit("while") grammar.add_implicit("+") grammar.add_implicit("-") return grammar
def test_add_pratt_parser(): grammar = Grammar() expr_id = grammar.add_parselet('expr', kind=ParseletKind.Pratt, result_type=SyntaxToken) integer_id = grammar.add_token('Integer') string_id = grammar.add_token('String') plus_id = grammar.add_implicit('+') star_id = grammar.add_implicit('*') table = cast(PrattTable, grammar.tables[expr_id]) assert table.prefix_tokens == set() assert grammar.add_parser(expr_id, integer_id) assert integer_id in table.prefix_tokens, "Cleanup of pratt table prefix tokens is not worked" assert grammar.add_parser(expr_id, make_named('value', string_id)) assert string_id in table.prefix_tokens, "Cleanup of pratt table prefix tokens is not worked" assert grammar.add_parser(expr_id, make_sequence(expr_id, plus_id, expr_id)) assert grammar.add_parser( expr_id, make_sequence(make_named('lhs', expr_id), make_named('op', star_id), expr_id))
def create_combinator_grammar() -> Grammar: """ Create grammar for parse combinator definition P.S. This grammar is used for bootstrap process of initial grammar, e.g. definition of combinators in grammar """ grammar = Grammar() grammar.extend(create_core_grammar()) # tokens name_id = grammar.tokens['Name'] string_id = grammar.tokens['String'] number_id = grammar.tokens['Integer'] colon_id = grammar.add_implicit(':') parent_open_id = grammar.tokens['('] parent_close_id = grammar.tokens[')'] square_open_id = grammar.tokens['['] square_close_id = grammar.tokens[']'] curly_open_id = grammar.tokens['{'] curly_close_id = grammar.tokens['}'] less_id = grammar.tokens['<'] great_id = grammar.tokens['>'] # parse combinator definition comb_id = grammar.add_parselet('combinator', result_type=CombinatorNode) seq_id = grammar.add_parselet('combinator_sequence', result_type=SequenceNode) # combinator := name: Name ":" combinator=combinator ; named variable grammar.add_parser( comb_id, make_sequence(make_named('name', name_id), colon_id, make_named('combinator', comb_id)), make_ctor(NamedNode) ) # combinator := name: Name [ '<' priority: Number '>' ] ; reference to parselet or token grammar.add_parser( comb_id, make_sequence(make_named('name', name_id), make_optional(less_id, make_named('priority', number_id), great_id)), make_ctor(ReferenceNode) ) # combinator := value: String ; reference to implicit token grammar.add_parser(comb_id, make_named('value', string_id), make_ctor(ImplicitNode)) # combinator := '[' combinator: combinator_sequence ']' ; optional combinator grammar.add_parser( comb_id, make_sequence(square_open_id, make_named('combinator', seq_id), square_close_id), make_ctor(OptionalNode) ) # combinator := '{' combinator: combinator_sequence '}' ; repeat combinator grammar.add_parser( comb_id, make_sequence(curly_open_id, make_named('combinator', seq_id), curly_close_id), make_ctor(RepeatNode) ) # combinator := '(' combinator: combinator_sequence ')' ; parenthesis combinator grammar.add_parser( comb_id, make_sequence(parent_open_id, make_named('combinator', seq_id), parent_close_id), make_return_variable('combinator') ) # combinator_sequence := combinators:combinator combinators:{ combinator } ; sequence combinator grammar.add_parser( seq_id, make_sequence(make_named('combinators', comb_id), make_named('combinators', make_repeat(comb_id))), make_ctor(SequenceNode) ) return grammar