Ejemplo n.º 1
0
 def __init__(
     self,
     source: typing.Union[str, typing.TextIO],
     path: Path,
     indentation: typing.Optional[str] = None,
 ):
     self.lexer = Lexer(source=source, path=path, indentation=indentation)
Ejemplo n.º 2
0
def simple_test(
    token_type: Type[tokens.Token],
    lex_method: str,
    src: str,
    expected_value: Any,
) -> None:
    """Returns the first token"""
    filepath = "test"
    lexer = Lexer(source=StringIO(src), path=filepath)
    lexer.next()  # skips nodent
    t = lexer.next()
    assert t == token_type(expected_value)
Ejemplo n.º 3
0
def test_lex_program() -> None:

    src = dedent("""
        ()[]{}:`,;
            0b10 0o51 0Xa8
            # comment
        12 -12 -12.01 -12e3
        -12.01e30 12. 12.E30
        0 -0
            -
                --wow abc
        "wow"
        """)
    filepath = "test"

    expected = [
        tokens.Nodent(),
        tokens.LParen(),
        tokens.RParen(),
        tokens.LBracket(),
        tokens.RBracket(),
        tokens.LCurly(),
        tokens.RCurly(),
        tokens.Colon(),
        tokens.Tick(),
        tokens.Comma(),
        tokens.Semicolon(),
        tokens.Indent(),
        tokens.Integer(0b10),
        tokens.Integer(0o51),
        tokens.Integer(0xA8),
        tokens.Dedent(),
        tokens.Integer(12),
        tokens.Integer(-12),
        tokens.Float(-12.01),
        tokens.Float(-12e3),
        tokens.Nodent(),
        tokens.Float(-12.01e30),
        tokens.Float(12.0),
        tokens.Float(12.0e30),
        tokens.Nodent(),
        tokens.Integer(0),
        tokens.Integer(-0),
        tokens.Indent(),
        tokens.Identifier("-"),
        tokens.Indent(),
        tokens.Identifier("--wow"),
        tokens.Identifier("abc"),
        tokens.Dedent(),
        tokens.Dedent(),
        tokens.String("wow"),
    ]

    actual = list(Lexer(source=src, path=filepath))

    assert actual == expected
Ejemplo n.º 4
0
def test_lexer_token_iter() -> None:
    lexer = Lexer(source="123 abc", path="")
    assert lexer.next() == tokens.Nodent()
    assert lexer.next() == tokens.Integer(123)
    assert lexer.peek() == tokens.Identifier("abc")
    assert lexer.peek(2) == tokens.End()
    assert lexer.next() == tokens.Identifier("abc")
    assert lexer.next() == tokens.End()
    assert lexer.next() == tokens.End()
Ejemplo n.º 5
0
class Parser:
    lexer: Lexer

    def __init__(
        self,
        source: typing.Union[str, typing.TextIO],
        path: Path,
        indentation: typing.Optional[str] = None,
    ):
        self.lexer = Lexer(source=source, path=path, indentation=indentation)

    def parse_module(self) -> ast.Module:
        self._assert(tokens.Start)
        lines: typing.List[ast.Node] = []
        while self.lexer.curr != tokens.End:
            self._assert(tokens.Nodent)
            lines.append(self._parse_block())
        return ast.Module(lines)

    def _parse_block(self) -> ast.Node:
        exprs: typing.List[ast.Node] = []
        exprs.append(self._parse_expr())
        if type(self.lexer.curr) in (
            tokens.Indent,
            tokens.Nodent,
            tokens.Dedent,
            tokens.End,
        ):
            return exprs[0]
        while type(self.lexer.curr) not in (
            tokens.Semicolon,
            tokens.Indent,
            tokens.Nodent,
            tokens.Dedent,
            tokens.End,
        ):
            exprs.append(self._parse_expr())
        self._optional(tokens.Semicolon)
        if type(self.lexer.curr) in (tokens.Nodent, tokens.Dedent, tokens.End):
            if len(exprs) < 2:
                self._error("line must contain at least 2 sub-expressions")
            return ast.Line(exprs)
        if type(self.lexer.curr) == tokens.Colon:
            self.lexer.next()
        if type(self.lexer.curr) == tokens.Indent:
            self.lexer.next()
            exprs.append(self._parse_block())
            while type(self.lexer.curr) != tokens.Dedent:
                self._assert(tokens.Nodent)
                exprs.append(self._parse_block())
            self.lexer.next()
            return ast.Paragraph(exprs)
        self._fail()

    def parse_block(self) -> ast.Node:
        self._assert(tokens.Start)
        self._assert(tokens.Nodent)
        return self._parse_block()

    def _parse_expr(self) -> ast.Node:
        if type(self.lexer.curr) == tokens.LParen:
            return self._parse_list_expr()
        return self._parse_value()

    def parse_expr(self) -> ast.Node:
        self._assert(tokens.Start)
        self._assert(tokens.Nodent)
        return self._parse_expr()

    def _parse_list_expr(self) -> ast.Expression:
        self._assert(tokens.LParen)
        exprs: typing.List[ast.Node] = []
        while type(self.lexer.curr) != tokens.RParen:
            exprs.append(self._parse_expr())
            self._optional(tokens.Comma)
        self._assert(tokens.RParen)
        return ast.Expression(exprs)

    def _parse_value(self) -> ast.Value:
        if type(self.lexer.curr) in (tokens.String, tokens.Integer, tokens.Float):
            res = ast.Literal(self.lexer.curr.value)
            self.lexer.next()
            return res
        if type(self.lexer.curr) == tokens.Name:
            res2 = ast.Name(typing.cast(str, self.lexer.curr.value))
            self.lexer.next()
            return res2
        if type(self.lexer.curr) == tokens.LBracket:
            return self._parse_list()
        if type(self.lexer.curr) == tokens.LCurly:
            return self._parse_set_or_map()
        self._fail()

    def _parse_list(self) -> ast.List:
        self._assert(tokens.LBracket)
        elems: typing.List[ast.Node] = []
        while type(self.lexer.curr) != tokens.RBracket:
            elems.append(self._parse_expr())
            self._assert(tokens.Comma)
        self.lexer.next()
        return ast.List(elems)

    def _parse_set_or_map(self) -> typing.Union[ast.Set, ast.Map]:
        self._assert(tokens.LCurly)
        if type(self.lexer.curr) == tokens.RCurly:
            # empty map literal "{}"
            return ast.Map(())
        first = self._parse_expr()
        if type(self.lexer.curr) == tokens.Colon:
            # parse as map
            exprs: typing.List[typing.Tuple[ast.Node, ast.Node]] = []
            self.lexer.next()
            second = self._parse_expr()
            exprs.append((first, second))
            while type(self.lexer.curr) != tokens.RCurly:
                first = self._parse_expr()
                self._assert(tokens.Colon)
                second = self._parse_expr()
                self._assert(tokens.Comma)
            return ast.Map(exprs)
        elif type(self.lexer.curr) == tokens.Comma:
            # parse as set
            exprs2: typing.List[ast.Node] = [first]
            self.lexer.next()
            while type(self.lexer.curr) != tokens.RCurly:
                exprs2.append(self._parse_expr())
                self._assert(tokens.Comma)
        self._fail()

    def _assert(self, expected: typing.Type[tokens.Token]) -> None:
        """
        Check if current token is expected one and consumes it.

        Passing :value:`None` or giving no argument causes the assert to always fail
        and error.
        """
        if type(self.lexer.curr) != expected:
            self._error(
                f"expected token of type: {expected.__name__}, found: {self.lexer.curr}"
            )
        self.lexer.next()

    def _fail(self) -> typing.NoReturn:
        self._error(f"unexpected token: {self.lexer.curr}")

    def _optional(self, expected: typing.Type[tokens.Token]) -> None:
        """
        Check if the current token is the expected one and consumes it only if it is
        """
        if type(self.lexer.curr) == expected:
            self.lexer.next()

    def _error(self, msg: str) -> typing.NoReturn:
        """Formats and raises a ParseError"""
        raise ParseError(msg)
Ejemplo n.º 6
0
def test_lexer_peek_at_end() -> None:
    actuals = list(Lexer(source="0", path=""))
    expecteds = [tokens.Nodent(), tokens.Integer(0)]
    assert expecteds == actuals
Ejemplo n.º 7
0
def test_lexer_bad_indentation() -> None:
    with pytest.raises(LexError):
        list(Lexer(source="  a\n\tb", path=""))
    with pytest.raises(LexError):
        list(Lexer(source=" \tstuff", path=""))
Ejemplo n.º 8
0
def test_lexer_unexpected_char():
    with pytest.raises(LexError):
        list(Lexer(source="\0", path=""))