def __init__(self, grammar: Grammar, filename: str, content: str): super().__init__(grammar, filename, content) self.newline_id = grammar.add_token('NewLine') self.whitespace_id = grammar.add_token('Whitespace') self.indent_id = grammar.add_token('Indent') self.dedent_id = grammar.add_token('Dedend')
def test_variables(): grammar = Grammar() name_id = grammar.add_token('Name') # name: Name comb = make_named('name', name_id) assert 'name' in comb.variables assert comb.variables['name'] == SyntaxToken # names: Name names: Name comb = make_sequence(make_named('names', name_id), make_named('names', name_id)) assert 'names' in comb.variables assert comb.variables['names'] == Sequence[SyntaxToken] # [ name: Name ] comb = make_optional(make_named('name', name_id)) assert 'name' in comb.variables assert comb.variables['name'] == Optional[SyntaxToken] # { name: Name } comb = make_repeat(make_named('names', name_id)) assert comb.variables['names'] == Sequence[SyntaxToken] # names: Name { names: Name } comb = make_sequence(make_named('names', name_id), make_repeat(make_named('names', name_id))) assert 'names' in comb.variables assert comb.variables['names'] == Sequence[SyntaxToken] # names: { Name } comb = make_named('names', make_repeat(name_id)) assert 'names' in comb.variables assert comb.variables['names'] == Sequence[SyntaxToken]
def test_make_optional(): grammar = Grammar() name_id = grammar.add_token('Name') comb = make_optional(name_id) assert isinstance(comb, OptionalCombinator) assert isinstance(comb.combinator, TokenCombinator) assert comb.result_type == Optional[SyntaxToken]
def test_make_repeat(): grammar = Grammar() name_id = grammar.add_token('Name') comb = make_repeat(name_id) assert isinstance(comb, RepeatCombinator) assert isinstance(comb.combinator, TokenCombinator) assert comb.result_type == Sequence[SyntaxToken]
def test_add_parselet(): grammar = Grammar() symbol_count = len(grammar.symbols) expr_id = grammar.add_parselet('expr') assert expr_id.kind == ParseletKind.Packrat assert len(grammar.parselets) == 1 assert len(grammar.symbols) == symbol_count + 1
def test_extend_fail_grammar(): grammar1 = Grammar() grammar1.add_parselet('expr', kind=ParseletKind.Pratt) grammar2 = Grammar() grammar2.add_parselet('expr', kind=ParseletKind.Packrat) with pytest.raises(GrammarError): Grammar.merge(grammar1, grammar2)
def test_make_token(): grammar = Grammar() name_id = grammar.add_token('Name') comb = make_token(name_id) assert isinstance(comb, TokenCombinator) assert comb.token_id == name_id assert comb.result_type == SyntaxToken assert comb.variables == {}
def test_make_parselet(): grammar = Grammar() name_id = grammar.add_parselet('name') comb = make_parselet(name_id) assert isinstance(comb, ParseletCombinator) assert comb.parser_id == name_id assert comb.result_type == SyntaxNode assert comb.variables == {}
def test_add_token(): grammar = Grammar() token_id = grammar.add_token('Name') assert 'Name' in grammar.tokens assert len(grammar.patterns) == 0 assert token_id == token_id assert token_id.name == 'Name' assert token_id.description == 'name' assert not token_id.is_implicit
def test_make_sequence(): grammar = Grammar() name_id = grammar.add_token('Name') expr_id = grammar.add_parselet('expr') comb = make_sequence(name_id, expr_id) assert isinstance(comb, SequenceCombinator) assert len(comb) == 2 assert isinstance(comb[0], TokenCombinator) assert isinstance(comb[1], ParseletCombinator) assert comb.result_type == SyntaxNode
def test_add_pattern(): grammar = Grammar() token_id = grammar.add_token('Name') result_id = grammar.add_pattern(token_id, r'[a-zA-Z]*') assert result_id is token_id, "add_pattern must return token id" assert len(grammar.patterns) == 1 pattern = grammar.patterns[0] assert pattern.token_id == token_id assert pattern.pattern == re.compile(r'[a-zA-Z]*') assert pattern.priority == PRIORITY_MAX assert not pattern.is_implicit
def test_add_incorrect_token(): grammar = Grammar() symbol_count = len(grammar.symbols) for name in {'+', 'name'}: with pytest.raises(GrammarError): grammar.add_token(name) assert len( grammar.tokens ) == symbol_count, "Count of symbols in grammar is changed after failed call" assert len( grammar.symbols ) == symbol_count, "Count of symbols in grammar is changed after failed call"
def test_add_implicit_token(): grammar = Grammar() token_id = grammar.add_implicit('+') assert token_id.name == '+' assert token_id.description == '+' assert token_id.is_implicit assert '+' in grammar.tokens assert len(grammar.patterns) == 1 pattern = grammar.patterns[0] assert pattern.token_id == token_id assert pattern.pattern == re.compile(re.escape('+')) assert pattern.priority < 0 assert pattern.is_implicit
def test_extend_implicit_grammar(): grammar1 = Grammar() grammar1.add_implicit('(') result = Grammar() result.extend(grammar1) assert result.patterns[0].token_id == result.tokens['('] assert result.patterns[0].priority == -len('(') assert result.patterns[0].is_implicit
def convert_node(grammar: Grammar, node: CombinatorNode, location: Location) -> Combinator: if isinstance(node, SequenceNode): return make_sequence(*(convert_node(grammar, child, location) for child in node.combinators)) if isinstance(node, RepeatNode): return make_repeat(convert_node(grammar, node.combinator, location)) if isinstance(node, OptionalNode): return make_optional(convert_node(grammar, node.combinator, location)) if isinstance(node, NamedNode): return make_named(node.name.value, convert_node(grammar, node.combinator, location)) if isinstance(node, ImplicitNode): token_id = grammar.add_implicit(ast.literal_eval(node.value.value), location=location) return make_token(token_id) if isinstance(node, ReferenceNode): name = node.name.value if name in grammar.tokens: if node.priority: raise DiagnosticError(location, f'Token combinator can not have priority') return make_token(grammar.tokens[name]) elif name in grammar.parselets: priority = node.priority and ast.literal_eval(node.priority.value) return make_parselet(grammar.parselets[name], priority) else: raise DiagnosticError(location, f"Not found symbol {name} in grammar") raise NotImplementedError(f'Not implemented conversion from node to combinator: {type(node).__name__}')
def test_add_packrat_parser(): grammar = Grammar() stmt_id = grammar.add_parselet('stmt', kind=ParseletKind.Packrat, result_type=SyntaxToken) star_id = grammar.add_implicit('*') assert grammar.add_parser( stmt_id, make_sequence(grammar.add_implicit('('), stmt_id, grammar.add_implicit(')'))) assert grammar.add_parser(stmt_id, make_sequence(grammar.add_implicit('('))) assert grammar.add_parser(stmt_id, star_id) assert grammar.add_parser(stmt_id, stmt_id)
def test_flat_sequence(): grammar = Grammar() name_id = grammar.add_token('Name') expr_id = grammar.add_parselet('expr') combinators = tuple( flat_sequence(TokenCombinator(name_id), ParseletCombinator(expr_id), SequenceCombinator(( TokenCombinator(name_id), ParseletCombinator(expr_id), )), kind=SequenceCombinator)) assert len(combinators) == 4 assert isinstance(combinators[0], TokenCombinator) assert isinstance(combinators[1], ParseletCombinator) assert isinstance(combinators[2], TokenCombinator) assert isinstance(combinators[3], ParseletCombinator)
def test_flat_combinator(): grammar = Grammar() name_id = grammar.add_token('Name') expr_id = grammar.add_parselet('expr') # convert token id to token combinator comb = flat_combinator(name_id) assert isinstance(comb, TokenCombinator) assert comb.token_id is name_id # convert parselet id to parselet combinator comb = flat_combinator(expr_id) assert isinstance(comb, ParseletCombinator) assert comb.parser_id is expr_id assert comb.priority is None # don't convert combinator comb = TokenCombinator(name_id) result = flat_combinator(comb) assert comb is result
def test_add_pratt_parser(): grammar = Grammar() expr_id = grammar.add_parselet('expr', kind=ParseletKind.Pratt, result_type=SyntaxToken) integer_id = grammar.add_token('Integer') string_id = grammar.add_token('String') plus_id = grammar.add_implicit('+') star_id = grammar.add_implicit('*') table = cast(PrattTable, grammar.tables[expr_id]) assert table.prefix_tokens == set() assert grammar.add_parser(expr_id, integer_id) assert integer_id in table.prefix_tokens, "Cleanup of pratt table prefix tokens is not worked" assert grammar.add_parser(expr_id, make_named('value', string_id)) assert string_id in table.prefix_tokens, "Cleanup of pratt table prefix tokens is not worked" assert grammar.add_parser(expr_id, make_sequence(expr_id, plus_id, expr_id)) assert grammar.add_parser( expr_id, make_sequence(make_named('lhs', expr_id), make_named('op', star_id), expr_id))
def test_add_brackets(): grammar = Grammar() open_id = grammar.add_implicit('(') close_id = grammar.add_implicit(')') assert grammar.brackets == set() assert grammar.open_brackets == set() assert grammar.close_brackets == set() grammar.add_brackets(open_id, close_id) assert grammar.brackets == {(open_id, close_id)} assert grammar.open_brackets == {open_id} assert grammar.close_brackets == {close_id} assert grammar.bracket_pairs[open_id] == close_id
def grammar() -> Grammar: grammar = Grammar() whitespace_id = grammar.add_pattern(grammar.add_token('Whitespace'), r'\s+') grammar.add_trivia(whitespace_id) grammar.add_pattern(grammar.add_token('Name'), r'[a-zA-Z_][a-zA-Z0-9]*') grammar.add_pattern(grammar.add_token('Number'), r'[0-9]+') make_implicit = grammar.add_implicit expr_id = grammar.add_parselet('expr', kind=ParseletKind.Pratt, result_type=object) # expr := value:Number grammar.add_parser(expr_id, "value:Number", make_call(lambda value: value.value, object)) # expr := lhs:expr op:'+' rhs:expr grammar.add_parser(expr_id, 'lhs:expr "**" rhs:expr <899>', make_call(lambda lhs, rhs: (lhs, '**', rhs), object), priority=900) # expr := lhs:expr op:'+' rhs:expr grammar.add_parser(expr_id, 'lhs:expr "+" rhs:expr <600>', make_call(lambda lhs, rhs: (lhs, '+', rhs), object), priority=600) # expr := lhs:expr op:'-' rhs:expr grammar.add_parser(expr_id, 'lhs:expr "-" rhs:expr <600>', make_call(lambda lhs, rhs: (lhs, '-', rhs), object), priority=600) # expr := lhs:expr op:'*' rhs:expr grammar.add_parser(expr_id, 'lhs:expr "*" rhs:expr <700>', make_call(lambda lhs, rhs: (lhs, '*', rhs), object), priority=700) # expr := lhs:expr op:'/' rhs:expr grammar.add_parser(expr_id, 'lhs:expr "/" rhs:expr <700>', make_call(lambda lhs, rhs: (lhs, '/', rhs), object), priority=700) # expr := op:'-' value:expr grammar.add_parser(expr_id, '"-" value:expr <800>', make_call(lambda value: ('-', value), object)) # expr := op:'-' value:expr grammar.add_parser(expr_id, '"+" value:expr <800>', make_call(lambda value: ('+', value), object)) # expr := '(' value:expr ')' grammar.add_parser(expr_id, '"(" value:expr ")"', make_return_variable('value')) return grammar
def test_make_sequence_with_single_element(): grammar = Grammar() name_id = grammar.add_token('Name') comb = make_sequence(name_id) assert isinstance(comb, TokenCombinator)
def test_extend_brackets_grammar(): grammar1 = Grammar() grammar1.add_brackets(grammar1.add_implicit('('), grammar1.add_implicit(')')) grammar2 = Grammar() grammar2.add_brackets(grammar2.add_implicit('('), grammar2.add_implicit(')')) grammar2.add_brackets(grammar2.add_implicit('['), grammar2.add_implicit(']')) result = Grammar.merge(grammar1, grammar2) assert result.brackets == {(result.tokens['['], result.tokens[']']), (result.tokens['('], result.tokens[')'])}
def test_extend_packrat_grammar(): grammar1 = Grammar() grammar1.add_token('Number') grammar1.add_token('String') expr_id = grammar1.add_parselet('expr', result_type=object) grammar1.add_parser(expr_id, 'Number') grammar1.add_parser(expr_id, 'String') grammar2 = Grammar() grammar2.add_token('Number') grammar2.add_token('String') expr_id = grammar2.add_parselet('expr', result_type=object) grammar2.add_parser(expr_id, 'Number') grammar2.add_parser(expr_id, 'String') result = Grammar.merge(grammar1, grammar2) expr_id = result.parselets['expr'] assert expr_id in result.tables assert len(cast(PackratTable, result.tables[expr_id]).parselets) == 4
def test_add_idempotent_token(): grammar = Grammar() t1 = grammar.add_token('Name') t2 = grammar.add_token('Name') assert t1 is t2 and t1 == t2
def test_add_trivia(): grammar = Grammar() token_id = grammar.add_token('Whitespace') assert grammar.trivia == set() grammar.add_trivia(token_id) assert grammar.trivia == {token_id}
def test_add_idempotent_trivia(): grammar = Grammar() token_id = grammar.add_token('Whitespace') for _ in range(3): grammar.add_trivia(token_id) assert grammar.trivia == {token_id}
def grammar() -> Grammar: grammar = Grammar() whitespace_id = grammar.add_pattern(grammar.add_token('Whitespace'), r'\s+') grammar.add_trivia(whitespace_id) grammar.add_pattern(grammar.add_token('Number'), r'[0-9]+') grammar.add_pattern(grammar.add_token('Name'), r'[a-zA-Z_][a-zA-Z0-9]+') grammar.add_implicit("for") grammar.add_implicit("while") grammar.add_implicit("+") grammar.add_implicit("-") return grammar
def create_core_grammar() -> Grammar: """ This function is used for initialize default grammar """ grammar = Grammar() grammar.add_pattern(grammar.add_token('Comment'), RE_COMMENT) grammar.add_pattern(grammar.add_token('Whitespace'), RE_WHITESPACE) grammar.add_pattern(grammar.add_token('Name'), RE_NAME) grammar.add_pattern(grammar.add_token('NewLine'), RE_NEWLINE) grammar.add_pattern(grammar.add_token('String'), RE_STRING_SINGLE) grammar.add_pattern(grammar.add_token('String'), RE_STRING_DOUBLE) grammar.add_pattern(grammar.add_token('Integer'), RE_NUMBER_BINARY) grammar.add_pattern(grammar.add_token('Integer'), RE_NUMBER_OCTAL) grammar.add_pattern(grammar.add_token('Integer'), RE_NUMBER_DECIMAL) grammar.add_pattern(grammar.add_token('Integer'), RE_NUMBER_HEXADECIMAL) grammar.add_pattern(grammar.add_token('Float'), RE_FLOAT_POINT) grammar.add_pattern(grammar.add_token('Float'), RE_FLOAT_EXPONENT) grammar.add_implicit('(') grammar.add_implicit(')') grammar.add_implicit('[') grammar.add_implicit(']') grammar.add_implicit('{') grammar.add_implicit('}') grammar.add_implicit('<') grammar.add_implicit('>') grammar.add_trivia(grammar.tokens['Comment']) grammar.add_trivia(grammar.tokens['Whitespace']) grammar.add_brackets(grammar.tokens['('], grammar.tokens[')']) grammar.add_brackets(grammar.tokens['['], grammar.tokens[']']) grammar.add_brackets(grammar.tokens['{'], grammar.tokens['}']) return grammar
def create_combinator_grammar() -> Grammar: """ Create grammar for parse combinator definition P.S. This grammar is used for bootstrap process of initial grammar, e.g. definition of combinators in grammar """ grammar = Grammar() grammar.extend(create_core_grammar()) # tokens name_id = grammar.tokens['Name'] string_id = grammar.tokens['String'] number_id = grammar.tokens['Integer'] colon_id = grammar.add_implicit(':') parent_open_id = grammar.tokens['('] parent_close_id = grammar.tokens[')'] square_open_id = grammar.tokens['['] square_close_id = grammar.tokens[']'] curly_open_id = grammar.tokens['{'] curly_close_id = grammar.tokens['}'] less_id = grammar.tokens['<'] great_id = grammar.tokens['>'] # parse combinator definition comb_id = grammar.add_parselet('combinator', result_type=CombinatorNode) seq_id = grammar.add_parselet('combinator_sequence', result_type=SequenceNode) # combinator := name: Name ":" combinator=combinator ; named variable grammar.add_parser( comb_id, make_sequence(make_named('name', name_id), colon_id, make_named('combinator', comb_id)), make_ctor(NamedNode) ) # combinator := name: Name [ '<' priority: Number '>' ] ; reference to parselet or token grammar.add_parser( comb_id, make_sequence(make_named('name', name_id), make_optional(less_id, make_named('priority', number_id), great_id)), make_ctor(ReferenceNode) ) # combinator := value: String ; reference to implicit token grammar.add_parser(comb_id, make_named('value', string_id), make_ctor(ImplicitNode)) # combinator := '[' combinator: combinator_sequence ']' ; optional combinator grammar.add_parser( comb_id, make_sequence(square_open_id, make_named('combinator', seq_id), square_close_id), make_ctor(OptionalNode) ) # combinator := '{' combinator: combinator_sequence '}' ; repeat combinator grammar.add_parser( comb_id, make_sequence(curly_open_id, make_named('combinator', seq_id), curly_close_id), make_ctor(RepeatNode) ) # combinator := '(' combinator: combinator_sequence ')' ; parenthesis combinator grammar.add_parser( comb_id, make_sequence(parent_open_id, make_named('combinator', seq_id), parent_close_id), make_return_variable('combinator') ) # combinator_sequence := combinators:combinator combinators:{ combinator } ; sequence combinator grammar.add_parser( seq_id, make_sequence(make_named('combinators', comb_id), make_named('combinators', make_repeat(comb_id))), make_ctor(SequenceNode) ) return grammar