Beispiel #1
0
    def __init__(self, grammar: Grammar, filename: str, content: str):
        super().__init__(grammar, filename, content)

        self.newline_id = grammar.add_token('NewLine')
        self.whitespace_id = grammar.add_token('Whitespace')
        self.indent_id = grammar.add_token('Indent')
        self.dedent_id = grammar.add_token('Dedend')
Beispiel #2
0
def test_variables():
    grammar = Grammar()
    name_id = grammar.add_token('Name')

    # name: Name
    comb = make_named('name', name_id)
    assert 'name' in comb.variables
    assert comb.variables['name'] == SyntaxToken

    # names: Name names: Name
    comb = make_sequence(make_named('names', name_id),
                         make_named('names', name_id))
    assert 'names' in comb.variables
    assert comb.variables['names'] == Sequence[SyntaxToken]

    # [ name: Name ]
    comb = make_optional(make_named('name', name_id))
    assert 'name' in comb.variables
    assert comb.variables['name'] == Optional[SyntaxToken]

    # { name: Name }
    comb = make_repeat(make_named('names', name_id))
    assert comb.variables['names'] == Sequence[SyntaxToken]

    # names: Name { names: Name }
    comb = make_sequence(make_named('names', name_id),
                         make_repeat(make_named('names', name_id)))
    assert 'names' in comb.variables
    assert comb.variables['names'] == Sequence[SyntaxToken]

    # names: { Name }
    comb = make_named('names', make_repeat(name_id))
    assert 'names' in comb.variables
    assert comb.variables['names'] == Sequence[SyntaxToken]
Beispiel #3
0
def test_make_optional():
    grammar = Grammar()
    name_id = grammar.add_token('Name')

    comb = make_optional(name_id)
    assert isinstance(comb, OptionalCombinator)
    assert isinstance(comb.combinator, TokenCombinator)
    assert comb.result_type == Optional[SyntaxToken]
Beispiel #4
0
def test_make_repeat():
    grammar = Grammar()
    name_id = grammar.add_token('Name')

    comb = make_repeat(name_id)
    assert isinstance(comb, RepeatCombinator)
    assert isinstance(comb.combinator, TokenCombinator)
    assert comb.result_type == Sequence[SyntaxToken]
Beispiel #5
0
def test_add_parselet():
    grammar = Grammar()
    symbol_count = len(grammar.symbols)
    expr_id = grammar.add_parselet('expr')

    assert expr_id.kind == ParseletKind.Packrat
    assert len(grammar.parselets) == 1
    assert len(grammar.symbols) == symbol_count + 1
Beispiel #6
0
def test_extend_fail_grammar():
    grammar1 = Grammar()
    grammar1.add_parselet('expr', kind=ParseletKind.Pratt)

    grammar2 = Grammar()
    grammar2.add_parselet('expr', kind=ParseletKind.Packrat)

    with pytest.raises(GrammarError):
        Grammar.merge(grammar1, grammar2)
Beispiel #7
0
def test_make_token():
    grammar = Grammar()
    name_id = grammar.add_token('Name')

    comb = make_token(name_id)
    assert isinstance(comb, TokenCombinator)
    assert comb.token_id == name_id
    assert comb.result_type == SyntaxToken
    assert comb.variables == {}
Beispiel #8
0
def test_make_parselet():
    grammar = Grammar()
    name_id = grammar.add_parselet('name')

    comb = make_parselet(name_id)
    assert isinstance(comb, ParseletCombinator)
    assert comb.parser_id == name_id
    assert comb.result_type == SyntaxNode
    assert comb.variables == {}
Beispiel #9
0
def test_add_token():
    grammar = Grammar()
    token_id = grammar.add_token('Name')

    assert 'Name' in grammar.tokens
    assert len(grammar.patterns) == 0
    assert token_id == token_id
    assert token_id.name == 'Name'
    assert token_id.description == 'name'
    assert not token_id.is_implicit
Beispiel #10
0
def test_make_sequence():
    grammar = Grammar()
    name_id = grammar.add_token('Name')
    expr_id = grammar.add_parselet('expr')

    comb = make_sequence(name_id, expr_id)
    assert isinstance(comb, SequenceCombinator)
    assert len(comb) == 2
    assert isinstance(comb[0], TokenCombinator)
    assert isinstance(comb[1], ParseletCombinator)
    assert comb.result_type == SyntaxNode
Beispiel #11
0
def test_add_pattern():
    grammar = Grammar()
    token_id = grammar.add_token('Name')
    result_id = grammar.add_pattern(token_id, r'[a-zA-Z]*')
    assert result_id is token_id, "add_pattern must return token id"

    assert len(grammar.patterns) == 1
    pattern = grammar.patterns[0]
    assert pattern.token_id == token_id
    assert pattern.pattern == re.compile(r'[a-zA-Z]*')
    assert pattern.priority == PRIORITY_MAX
    assert not pattern.is_implicit
Beispiel #12
0
def test_add_incorrect_token():
    grammar = Grammar()
    symbol_count = len(grammar.symbols)
    for name in {'+', 'name'}:
        with pytest.raises(GrammarError):
            grammar.add_token(name)

    assert len(
        grammar.tokens
    ) == symbol_count, "Count of symbols in grammar is changed after failed call"
    assert len(
        grammar.symbols
    ) == symbol_count, "Count of symbols in grammar is changed after failed call"
Beispiel #13
0
def test_add_implicit_token():
    grammar = Grammar()
    token_id = grammar.add_implicit('+')

    assert token_id.name == '+'
    assert token_id.description == '+'
    assert token_id.is_implicit
    assert '+' in grammar.tokens

    assert len(grammar.patterns) == 1
    pattern = grammar.patterns[0]
    assert pattern.token_id == token_id
    assert pattern.pattern == re.compile(re.escape('+'))
    assert pattern.priority < 0
    assert pattern.is_implicit
Beispiel #14
0
def test_extend_implicit_grammar():
    grammar1 = Grammar()
    grammar1.add_implicit('(')
    result = Grammar()
    result.extend(grammar1)
    assert result.patterns[0].token_id == result.tokens['(']
    assert result.patterns[0].priority == -len('(')
    assert result.patterns[0].is_implicit
Beispiel #15
0
def convert_node(grammar: Grammar, node: CombinatorNode, location: Location) -> Combinator:
    if isinstance(node, SequenceNode):
        return make_sequence(*(convert_node(grammar, child, location) for child in node.combinators))
    if isinstance(node, RepeatNode):
        return make_repeat(convert_node(grammar, node.combinator, location))
    if isinstance(node, OptionalNode):
        return make_optional(convert_node(grammar, node.combinator, location))
    if isinstance(node, NamedNode):
        return make_named(node.name.value, convert_node(grammar, node.combinator, location))
    if isinstance(node, ImplicitNode):
        token_id = grammar.add_implicit(ast.literal_eval(node.value.value), location=location)
        return make_token(token_id)
    if isinstance(node, ReferenceNode):
        name = node.name.value
        if name in grammar.tokens:
            if node.priority:
                raise DiagnosticError(location, f'Token combinator can not have priority')
            return make_token(grammar.tokens[name])
        elif name in grammar.parselets:
            priority = node.priority and ast.literal_eval(node.priority.value)
            return make_parselet(grammar.parselets[name], priority)
        else:
            raise DiagnosticError(location, f"Not found symbol {name} in grammar")

    raise NotImplementedError(f'Not implemented conversion from node to combinator: {type(node).__name__}')
Beispiel #16
0
def test_add_packrat_parser():
    grammar = Grammar()
    stmt_id = grammar.add_parselet('stmt',
                                   kind=ParseletKind.Packrat,
                                   result_type=SyntaxToken)
    star_id = grammar.add_implicit('*')

    assert grammar.add_parser(
        stmt_id,
        make_sequence(grammar.add_implicit('('), stmt_id,
                      grammar.add_implicit(')')))
    assert grammar.add_parser(stmt_id,
                              make_sequence(grammar.add_implicit('(')))
    assert grammar.add_parser(stmt_id, star_id)
    assert grammar.add_parser(stmt_id, stmt_id)
Beispiel #17
0
def test_flat_sequence():
    grammar = Grammar()
    name_id = grammar.add_token('Name')
    expr_id = grammar.add_parselet('expr')

    combinators = tuple(
        flat_sequence(TokenCombinator(name_id),
                      ParseletCombinator(expr_id),
                      SequenceCombinator((
                          TokenCombinator(name_id),
                          ParseletCombinator(expr_id),
                      )),
                      kind=SequenceCombinator))

    assert len(combinators) == 4
    assert isinstance(combinators[0], TokenCombinator)
    assert isinstance(combinators[1], ParseletCombinator)
    assert isinstance(combinators[2], TokenCombinator)
    assert isinstance(combinators[3], ParseletCombinator)
Beispiel #18
0
def test_flat_combinator():
    grammar = Grammar()
    name_id = grammar.add_token('Name')
    expr_id = grammar.add_parselet('expr')

    # convert token id to token combinator
    comb = flat_combinator(name_id)
    assert isinstance(comb, TokenCombinator)
    assert comb.token_id is name_id

    # convert parselet id to parselet combinator
    comb = flat_combinator(expr_id)
    assert isinstance(comb, ParseletCombinator)
    assert comb.parser_id is expr_id
    assert comb.priority is None

    # don't convert combinator
    comb = TokenCombinator(name_id)
    result = flat_combinator(comb)
    assert comb is result
Beispiel #19
0
def test_add_pratt_parser():
    grammar = Grammar()
    expr_id = grammar.add_parselet('expr',
                                   kind=ParseletKind.Pratt,
                                   result_type=SyntaxToken)
    integer_id = grammar.add_token('Integer')
    string_id = grammar.add_token('String')
    plus_id = grammar.add_implicit('+')
    star_id = grammar.add_implicit('*')

    table = cast(PrattTable, grammar.tables[expr_id])

    assert table.prefix_tokens == set()
    assert grammar.add_parser(expr_id, integer_id)
    assert integer_id in table.prefix_tokens, "Cleanup of pratt table prefix tokens is not worked"
    assert grammar.add_parser(expr_id, make_named('value', string_id))
    assert string_id in table.prefix_tokens, "Cleanup of pratt table prefix tokens is not worked"
    assert grammar.add_parser(expr_id, make_sequence(expr_id, plus_id,
                                                     expr_id))
    assert grammar.add_parser(
        expr_id,
        make_sequence(make_named('lhs', expr_id), make_named('op', star_id),
                      expr_id))
Beispiel #20
0
def test_add_brackets():
    grammar = Grammar()
    open_id = grammar.add_implicit('(')
    close_id = grammar.add_implicit(')')
    assert grammar.brackets == set()
    assert grammar.open_brackets == set()
    assert grammar.close_brackets == set()
    grammar.add_brackets(open_id, close_id)
    assert grammar.brackets == {(open_id, close_id)}
    assert grammar.open_brackets == {open_id}
    assert grammar.close_brackets == {close_id}
    assert grammar.bracket_pairs[open_id] == close_id
Beispiel #21
0
def grammar() -> Grammar:
    grammar = Grammar()

    whitespace_id = grammar.add_pattern(grammar.add_token('Whitespace'),
                                        r'\s+')
    grammar.add_trivia(whitespace_id)

    grammar.add_pattern(grammar.add_token('Name'), r'[a-zA-Z_][a-zA-Z0-9]*')
    grammar.add_pattern(grammar.add_token('Number'), r'[0-9]+')

    make_implicit = grammar.add_implicit

    expr_id = grammar.add_parselet('expr',
                                   kind=ParseletKind.Pratt,
                                   result_type=object)

    # expr := value:Number
    grammar.add_parser(expr_id, "value:Number",
                       make_call(lambda value: value.value, object))

    # expr := lhs:expr op:'+' rhs:expr
    grammar.add_parser(expr_id,
                       'lhs:expr "**" rhs:expr <899>',
                       make_call(lambda lhs, rhs: (lhs, '**', rhs), object),
                       priority=900)

    # expr := lhs:expr op:'+' rhs:expr
    grammar.add_parser(expr_id,
                       'lhs:expr "+" rhs:expr <600>',
                       make_call(lambda lhs, rhs: (lhs, '+', rhs), object),
                       priority=600)

    # expr := lhs:expr op:'-' rhs:expr
    grammar.add_parser(expr_id,
                       'lhs:expr "-" rhs:expr <600>',
                       make_call(lambda lhs, rhs: (lhs, '-', rhs), object),
                       priority=600)

    # expr := lhs:expr op:'*' rhs:expr
    grammar.add_parser(expr_id,
                       'lhs:expr "*" rhs:expr <700>',
                       make_call(lambda lhs, rhs: (lhs, '*', rhs), object),
                       priority=700)

    # expr := lhs:expr op:'/' rhs:expr
    grammar.add_parser(expr_id,
                       'lhs:expr "/" rhs:expr <700>',
                       make_call(lambda lhs, rhs: (lhs, '/', rhs), object),
                       priority=700)

    # expr := op:'-' value:expr
    grammar.add_parser(expr_id, '"-" value:expr <800>',
                       make_call(lambda value: ('-', value), object))

    # expr := op:'-' value:expr
    grammar.add_parser(expr_id, '"+" value:expr <800>',
                       make_call(lambda value: ('+', value), object))

    # expr := '(' value:expr ')'
    grammar.add_parser(expr_id, '"(" value:expr ")"',
                       make_return_variable('value'))

    return grammar
Beispiel #22
0
def test_make_sequence_with_single_element():
    grammar = Grammar()
    name_id = grammar.add_token('Name')

    comb = make_sequence(name_id)
    assert isinstance(comb, TokenCombinator)
Beispiel #23
0
def test_extend_brackets_grammar():
    grammar1 = Grammar()
    grammar1.add_brackets(grammar1.add_implicit('('),
                          grammar1.add_implicit(')'))
    grammar2 = Grammar()
    grammar2.add_brackets(grammar2.add_implicit('('),
                          grammar2.add_implicit(')'))
    grammar2.add_brackets(grammar2.add_implicit('['),
                          grammar2.add_implicit(']'))
    result = Grammar.merge(grammar1, grammar2)
    assert result.brackets == {(result.tokens['['], result.tokens[']']),
                               (result.tokens['('], result.tokens[')'])}
Beispiel #24
0
def test_extend_packrat_grammar():
    grammar1 = Grammar()
    grammar1.add_token('Number')
    grammar1.add_token('String')
    expr_id = grammar1.add_parselet('expr', result_type=object)
    grammar1.add_parser(expr_id, 'Number')
    grammar1.add_parser(expr_id, 'String')

    grammar2 = Grammar()
    grammar2.add_token('Number')
    grammar2.add_token('String')
    expr_id = grammar2.add_parselet('expr', result_type=object)
    grammar2.add_parser(expr_id, 'Number')
    grammar2.add_parser(expr_id, 'String')

    result = Grammar.merge(grammar1, grammar2)
    expr_id = result.parselets['expr']
    assert expr_id in result.tables
    assert len(cast(PackratTable, result.tables[expr_id]).parselets) == 4
Beispiel #25
0
def test_add_idempotent_token():
    grammar = Grammar()
    t1 = grammar.add_token('Name')
    t2 = grammar.add_token('Name')

    assert t1 is t2 and t1 == t2
Beispiel #26
0
def test_add_trivia():
    grammar = Grammar()
    token_id = grammar.add_token('Whitespace')
    assert grammar.trivia == set()
    grammar.add_trivia(token_id)
    assert grammar.trivia == {token_id}
Beispiel #27
0
def test_add_idempotent_trivia():
    grammar = Grammar()
    token_id = grammar.add_token('Whitespace')
    for _ in range(3):
        grammar.add_trivia(token_id)
        assert grammar.trivia == {token_id}
Beispiel #28
0
def grammar() -> Grammar:
    grammar = Grammar()

    whitespace_id = grammar.add_pattern(grammar.add_token('Whitespace'),
                                        r'\s+')
    grammar.add_trivia(whitespace_id)
    grammar.add_pattern(grammar.add_token('Number'), r'[0-9]+')
    grammar.add_pattern(grammar.add_token('Name'), r'[a-zA-Z_][a-zA-Z0-9]+')
    grammar.add_implicit("for")
    grammar.add_implicit("while")
    grammar.add_implicit("+")
    grammar.add_implicit("-")

    return grammar
Beispiel #29
0
def create_core_grammar() -> Grammar:
    """ This function is used for initialize default grammar """
    grammar = Grammar()
    grammar.add_pattern(grammar.add_token('Comment'), RE_COMMENT)
    grammar.add_pattern(grammar.add_token('Whitespace'), RE_WHITESPACE)
    grammar.add_pattern(grammar.add_token('Name'), RE_NAME)
    grammar.add_pattern(grammar.add_token('NewLine'), RE_NEWLINE)
    grammar.add_pattern(grammar.add_token('String'), RE_STRING_SINGLE)
    grammar.add_pattern(grammar.add_token('String'), RE_STRING_DOUBLE)
    grammar.add_pattern(grammar.add_token('Integer'), RE_NUMBER_BINARY)
    grammar.add_pattern(grammar.add_token('Integer'), RE_NUMBER_OCTAL)
    grammar.add_pattern(grammar.add_token('Integer'), RE_NUMBER_DECIMAL)
    grammar.add_pattern(grammar.add_token('Integer'), RE_NUMBER_HEXADECIMAL)
    grammar.add_pattern(grammar.add_token('Float'), RE_FLOAT_POINT)
    grammar.add_pattern(grammar.add_token('Float'), RE_FLOAT_EXPONENT)
    grammar.add_implicit('(')
    grammar.add_implicit(')')
    grammar.add_implicit('[')
    grammar.add_implicit(']')
    grammar.add_implicit('{')
    grammar.add_implicit('}')
    grammar.add_implicit('<')
    grammar.add_implicit('>')

    grammar.add_trivia(grammar.tokens['Comment'])
    grammar.add_trivia(grammar.tokens['Whitespace'])
    grammar.add_brackets(grammar.tokens['('], grammar.tokens[')'])
    grammar.add_brackets(grammar.tokens['['], grammar.tokens[']'])
    grammar.add_brackets(grammar.tokens['{'], grammar.tokens['}'])

    return grammar
Beispiel #30
0
def create_combinator_grammar() -> Grammar:
    """
    Create grammar for parse combinator definition

    P.S. This grammar is used for bootstrap process of initial grammar, e.g. definition of combinators in grammar
    """
    grammar = Grammar()
    grammar.extend(create_core_grammar())

    # tokens
    name_id = grammar.tokens['Name']
    string_id = grammar.tokens['String']
    number_id = grammar.tokens['Integer']
    colon_id = grammar.add_implicit(':')
    parent_open_id = grammar.tokens['(']
    parent_close_id = grammar.tokens[')']
    square_open_id = grammar.tokens['[']
    square_close_id = grammar.tokens[']']
    curly_open_id = grammar.tokens['{']
    curly_close_id = grammar.tokens['}']
    less_id = grammar.tokens['<']
    great_id = grammar.tokens['>']

    # parse combinator definition
    comb_id = grammar.add_parselet('combinator', result_type=CombinatorNode)
    seq_id = grammar.add_parselet('combinator_sequence', result_type=SequenceNode)

    # combinator := name: Name ":" combinator=combinator            ; named variable
    grammar.add_parser(
        comb_id,
        make_sequence(make_named('name', name_id), colon_id, make_named('combinator', comb_id)),
        make_ctor(NamedNode)
    )

    # combinator := name: Name  [ '<' priority: Number '>' ]        ; reference to parselet or token
    grammar.add_parser(
        comb_id,
        make_sequence(make_named('name', name_id), make_optional(less_id, make_named('priority', number_id), great_id)),
        make_ctor(ReferenceNode)
    )

    # combinator := value: String                                   ; reference to implicit token
    grammar.add_parser(comb_id, make_named('value', string_id), make_ctor(ImplicitNode))

    # combinator := '[' combinator: combinator_sequence ']'         ; optional combinator
    grammar.add_parser(
        comb_id,
        make_sequence(square_open_id, make_named('combinator', seq_id), square_close_id),
        make_ctor(OptionalNode)
    )

    # combinator := '{' combinator: combinator_sequence '}'         ; repeat combinator
    grammar.add_parser(
        comb_id,
        make_sequence(curly_open_id, make_named('combinator', seq_id), curly_close_id),
        make_ctor(RepeatNode)
    )

    # combinator := '(' combinator: combinator_sequence ')'         ; parenthesis combinator
    grammar.add_parser(
        comb_id,
        make_sequence(parent_open_id, make_named('combinator', seq_id), parent_close_id),
        make_return_variable('combinator')
    )

    # combinator_sequence := combinators:combinator combinators:{ combinator }              ; sequence combinator
    grammar.add_parser(
        seq_id,
        make_sequence(make_named('combinators', comb_id), make_named('combinators', make_repeat(comb_id))),
        make_ctor(SequenceNode)
    )

    return grammar