Exemple #1
0
    def comp_op(self):
        # '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
        # <> isn't actually a valid comparison operator in Python. It's here for the
        # sake of a __future__ import described in PEP 401 (which really works :-)
        # so, 我删了'<>'
        if self.current_token.type == COMP_OP:
            token = self.current_token
            self.eat(COMP_OP)
            return token

        if self.current_token.type == IN:
            token = self.current_token
            self.eat(IN)
            return Token(IN, 'in')

        if self.current_token.type == NOT:
            self.eat(NOT)
            self.eat(IN)
            return Token('NOTIN', 'not in')

        self.eat(IS)
        if self.current_token.type == NOT:
            self.eat(NOT)
            return Token(COMP_OP, 'is not')
        return Token(COMP_OP, 'is')
Exemple #2
0
 def bcmpr(self):
     if self.current_token.type == '¬':
         self.eat('¬')
         b = self.b_or()
         return Neg(b)
     if self.current_token.type == LPAREN:
         self.eat(LPAREN)
         node = self.b_or()
         self.eat(RPAREN)
         return node
     if self.current_token.type == BOOLEAN:
         b = Bool(self.current_token)
         self.eat(BOOLEAN)
         return b
     node = self.aexp()
     if self.current_token.type == '=':
         self.eat('=')
         return BinOp(left=node, op=Token('=', '='), right=self.aexp())
     if self.current_token.type == '<':
         self.eat('<')
         return BinOp(left=node, op=Token('<','<'), right=self.aexp())
     if self.current_token.type == '>':
         self.eat('>')
         return BinOp(left=node, op=Token('>','>'), right=self.aexp())
     if self.current_token.type == '<=':
         self.eat('<=')
         return BinOp(left=node, op=Token('<=','<='), right=self.aexp())
     if self.current_token.type == '>=':
         self.eat('>=')
         return BinOp(left=node, op=Token('>=','>='), right=self.aexp())
Exemple #3
0
    def test_float_missing_integer_missing_fractional_normal(self):
        tokens = self.token_list('''
            0.
            0000.
            00432141.
            341314.
            .0
            .5
            .000005432524234124
            4312431.43124214
            1.2
            1.0
            ''')

        self.assertEqual(tokens, [
            Token('Float Literal', '0.', False, False, 1, 1),
            Token('Float Literal', '0000.', False, False, 2, 1),
            Token('Float Literal', '00432141.', False, False, 3, 1),
            Token('Float Literal', '341314.', False, False, 4, 1),
            Token('Float Literal', '.0', False, False, 5, 1),
            Token('Float Literal', '.5', False, False, 6, 1),
            Token('Float Literal', '.000005432524234124', False, False, 7, 1),
            Token('Float Literal', '4312431.43124214', False, False, 8, 1),
            Token('Float Literal', '1.2', False, False, 9, 1),
            Token('Float Literal', '1.0', False, False, 10, 1)
        ])
Exemple #4
0
    def _next_tok(self):
        if self.pos + 1 < len(self.list):
            self.last_token = self.list[self.pos + 1]
            return self.list[self.pos + 1]

        tok = Token(Token.EOL, "")
        tok.pos = self.pos
        return tok
Exemple #5
0
def test_basic_expression():
    token_list: TokenList = deque([
        Token(TokenType.LEFT_PARENTHESIS, '('),
        Token(TokenType.OPERATOR, '+'),
        Token(TokenType.NUMBER, '1'),
        Token(TokenType.VARIABLE, '$var'),
        Token(TokenType.RIGHT_PARENTHESIS, ')'),
    ])
    parse(token_list)
Exemple #6
0
    def next_tok(self):
        self.pos += 1
        if self.pos < len(self.list):
            self.last_token = self.list[self.pos]
            return self.list[self.pos]

        tok = Token(Token.EOL, "")
        tok.pos = self.pos
        return tok
Exemple #7
0
def test_incomplete_expression():
    token_list: TokenList = deque([
        Token(TokenType.LEFT_PARENTHESIS, '('),
        Token(TokenType.OPERATOR, '+'),
        Token(TokenType.NUMBER, '1'),
        Token(TokenType.VARIABLE, '$var'),
    ])
    with pytest.raises(NoMoreTokensException):
        parse(token_list)
Exemple #8
0
def test_bad_expression():
    token_list: TokenList = deque([
        Token(TokenType.LEFT_PARENTHESIS, '('),
        Token(TokenType.NUMBER, '1'),
        Token(TokenType.NUMBER, '1'),
        Token(TokenType.VARIABLE, '$var'),
        Token(TokenType.RIGHT_PARENTHESIS, ')'),
    ])
    with pytest.raises(IncorrectTokenException):
        parse(token_list)
Exemple #9
0
 def lambdef(self):
     #  'lambda' [varargslist] ':' test
     self.eat(LAMBDA)
     if self.current_token.type == COLON:
         self.eat(COLON)
         node = self.test()
         return UnaryOp(Token(LAMBDA, LAMBDA), node)
     node = self.varargslist()
     self.eat(COLON)
     return BinOp(node, Token(LAMBDA, LAMBDA), self.test())
Exemple #10
0
def test_nested_expression():
    input_string: str = "(* (/ 4 2) 1)"
    token_list: List[Token] = [
        Token(
            TokenType.LEFT_PARENTHESIS,
            '('),
        Token(
            TokenType.OPERATOR,
            '*'),
        Token(
            TokenType.LEFT_PARENTHESIS,
            '('),
        Token(
            TokenType.OPERATOR,
            '/'),
        Token(
            TokenType.NUMBER,
            '4'),
        Token(
            TokenType.NUMBER,
            '2'),
        Token(
            TokenType.RIGHT_PARENTHESIS,
            ')'),
        Token(
            TokenType.NUMBER,
            '1'),
        Token(
            TokenType.RIGHT_PARENTHESIS,
            ')'),
    ]
    assert_tokenize(token_list, input_string)
Exemple #11
0
 def lambdef_nocond(self):
     # 'lambda' [varargslist] ':' test_nocond
     self.eat(LAMBDA)
     node = None
     if self.current_token.type != COLON:
         node = self.varargslist()
     self.eat(COLON)
     stmt = self.test_nocond()
     if node:
         return BinOp(node, Token(LAMBDA, LAMBDA), stmt)
     return UnaryOp(Token(LAMBDA, LAMBDA), stmt)
Exemple #12
0
    def __init__(self, lexer: Lexer) -> None:
        self.errors: List[str] = []
        self._lexer = lexer

        # Acts like _position and _peek_char within the lexer, but instead of
        # pointing to characters in the source they point to current and next
        # tokens. We need _current_token, the current token under examination,
        # to decide what to do next, and we need _peekToken to guide the
        # decision in case _current_token doesn't provide us with enough
        # information, e.g., with source "5;", _current_token is Int and we
        # require _peek_token to decide if we're at the end of the line or at
        # the start of an arithmetic expression. This implements a parser with
        # one token lookahead.
        #
        # We initialize both with a dummy token rather than define their type as
        # Optional[Token] which would cause lots of unnecessary changes to
        # satisfy the type checker. In practice, these tokens would only be None
        # between declaration and until the calls to _next_token().
        self._current_token: Token = Token(TokenType.ILLEGAL, "")
        self._peek_token: Token = Token(TokenType.ILLEGAL, "")

        # Functions based on token type called as part of Pratt parsing.
        self._prefix_parse_fns: Dict[TokenType, PrefixParseFn] = {}
        self._infix_parse_fns: Dict[TokenType, InfixParseFn] = {}

        # Read two tokens so _current_token and _peekToken tokens are both set.
        self._next_token()
        self._next_token()

        self._register_prefix(TokenType.IDENT, self._parse_identifier)
        self._register_prefix(TokenType.INT, self._parse_integer_literal)
        self._register_prefix(TokenType.BANG, self._parse_prefix_expression)
        self._register_prefix(TokenType.MINUS, self._parse_prefix_expression)
        self._register_prefix(TokenType.TRUE, self._parse_boolean)
        self._register_prefix(TokenType.FALSE, self._parse_boolean)
        self._register_prefix(TokenType.LPAREN, self._parse_group_expression)
        self._register_prefix(TokenType.IF, self._parse_if_expression)
        self._register_prefix(TokenType.FUNCTION, self._parse_function_literal)
        self._register_prefix(TokenType.STRING, self._parse_string_literal)
        self._register_prefix(TokenType.LBRACKET, self._parse_array_literal)
        self._register_prefix(TokenType.LBRACE, self._parse_hash_literal)

        self._register_infix(TokenType.PLUS, self._parse_infix_expression)
        self._register_infix(TokenType.MINUS, self._parse_infix_expression)
        self._register_infix(TokenType.SLASH, self._parse_infix_expression)
        self._register_infix(TokenType.ASTERISK, self._parse_infix_expression)
        self._register_infix(TokenType.EQ, self._parse_infix_expression)
        self._register_infix(TokenType.NOT_EQ, self._parse_infix_expression)
        self._register_infix(TokenType.LT, self._parse_infix_expression)
        self._register_infix(TokenType.GT, self._parse_infix_expression)
        self._register_infix(TokenType.LPAREN, self._parse_call_expression)
        self._register_infix(TokenType.LBRACKET, self._parse_index_expression)
Exemple #13
0
    def anonFunctionExpr(self):  # anon function declaration expression
        # To do: needs to check user program for error in the following code
        if (self.peek().tipe == TokenType.WALL):
            function_token = self.advance()  # consume the wall token
            #this is an anonymous function declaration
            function_identifier_token = Token(TokenType.IDENTIFIER,
                                              lexeme='',
                                              literal='',
                                              line=function_token.line)
            function_identifier_token.literal = f"@{hash(function_identifier_token)}"  # generate unique id for anonyous function
            function_identifier_expression = LiteralExpression(
                function_identifier_token)

            params_list = []
            if (self.peek().tipe == TokenType.IDENTIFIER
                ):  #handles case of zero argument
                arg = self.advance()
                params_list.append(LiteralExpression(arg))

            while (self.peek().tipe != TokenType.WALL):
                if (self.peek().tipe == TokenType.EOF):
                    raise Exception(
                        f"parenthesis is not terminated by matching parenthesis at line # {left_paren.line}"
                    )

                if (self.peek().tipe == TokenType.COMMA):
                    self.advance()  # consume comma
                    if (self.peek().tipe == TokenType.IDENTIFIER):
                        arg = self.advance()
                        params_list.append(LiteralExpression(arg))
                    else:
                        raise Exception(
                            'function parameter must be identifier')
                else:
                    raise Exception(
                        f"function argument must be separated by comma at line # {self.peek().line}"
                    )

            self.advance()  # consume right paren

            block_statement = self.blockStatement()

            # self.AST.append(FunctionStatement(function_identifier_token, params_list, block_statement))
            #side effect
            function_statement = FunctionStatement(
                function_identifier_expression, params_list, block_statement)

            self.getCurrentAST().append(function_statement)
            #end side effect

            return function_identifier_expression
    def test_mixed(self):
        s = '''
        {
            "name": "Brent Pappas",
            "age": 22,
            "interests": ["juggling", "programming", "reading"]
        }

        '''
        self.assertEqual(lex(s), [
            Token(Tag.LEFT_BRACE),
            ObjectKey("name"),
            Literal("Brent Pappas"),
            Token(Tag.COMMA),
            ObjectKey("age"),
            Number(22),
            Token(Tag.COMMA),
            ObjectKey("interests"),
            Token(Tag.LEFT_BRACKET),
            Literal("juggling"),
            Token(Tag.COMMA),
            Literal("programming"),
            Token(Tag.COMMA),
            Literal("reading"),
            Token(Tag.RIGHT_BRACKET),
            Token(Tag.RIGHT_BRACE)
        ])
Exemple #15
0
def fixup_token(tok: Token) -> None:
    """Adjust the token to make it easier to work with.

        1. If the token is a keyword that is aliased to another one,
           change it to be the real one.
        2. If it's an 'INT', store the integer value in the token.
        3. If it's an 'STR', strip off the quotes at either end of the string.
    """
    if tok.type in aliases:
        tok.type = aliases[tok.type]
    if tok.type == 'INT':
        tok.val_int = int(tok.val)
    if tok.type == 'STR':
        # Strip off the quotes
        tok.val = tok.val[1:-1]
Exemple #16
0
 def decorator(self):
     # '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
     self.eat(DEC)
     name = self.dotted_name()
     node = None
     if self.current_token.type == LB:
         self.eat(LB)
         if self.current_token.type != RB:
             node = self.arglist()
         self.eat(RB)
     self.eat(NEWLINE)
     if node:
         return UnaryOp(Token(DEC, DEC), BinOp(name, Token('()', '()'),
                                               node))
     return UnaryOp(Token(DEC, DEC), name)
Exemple #17
0
    def test_parse_toy_grammar(self):
        expansions = parse_grammar([
            'x -> y | "A" "B"',
            'y -> "C"'
        ])

        self.assertEqual({
            NonTerminal("x"): [
                Expansion([NonTerminal("y")]),
                Expansion([Terminal(Token("A")), Terminal(Token("B"))])
            ],
            NonTerminal("y"): [
                Expansion([Terminal(Token("C"))])
            ]
        }, expansions)
Exemple #18
0
 def eat(self):
     if self._tok_idx >= len(self.tokens):
         return Token.fromEOF()
     else:
         tok = self.tokens[self._tok_idx]
         self._tok_idx += 1
         return tok
Exemple #19
0
 def with_item(self):
     # test ['as' expr]
     node = self.test()
     if self.current_token.type == AS:
         self.eat(AS)
         return BinOp(node, Token(AS, AS), self.expr())
     return node
Exemple #20
0
    def test_bool_none_literals_whitespace(self):
        tokens = self.token_list('''
            True

                False

            None       True           None
            ''')

        self.assertEqual(tokens, [
            Token('Bool Literal', 'True', False, False, 1, 1),
            Token('Bool Literal', 'False', False, False, 3, 5),
            Token('None Literal', 'None', False, False, 5, 1),
            Token('Bool Literal', 'True', False, False, 5, 12),
            Token('None Literal', 'None', False, False, 5, 27),
        ])
Exemple #21
0
    def test_zeros_signed_unsinged_single_multiple(self):
        tokens = self.token_list('''
            0
            +0
            -0
            00
            0000000000000
            ''')

        self.assertEqual(tokens, [
            Token('Single Zero Literal', '0', False, False, 1, 1),
            Token('Single Zero Literal', '+0', False, False, 2, 1),
            Token('Single Zero Literal', '-0', False, False, 3, 1),
            Token('Zero Literal', '00', False, False, 4, 1),
            Token('Zero Literal', '0000000000000', False, False, 5, 1)
        ])
Exemple #22
0
    def __init__(self, token=None, token_type=None, token_text=None, token_name=None, artificial=False, name=None):
        self.artificial = artificial
        self.name = name
        self.token = token  # From which token did we create node?
        self.children = []  # normalized list of AST nodes

        if token_type: self.token = Token(token_type, token_text, token_name)
Exemple #23
0
    def test_if_else_whitespace_error(self):
        tokens = self.token_list('''
            if else if

              if ? else
            !
            ''')

        self.assertEqual(tokens, [
            # type, value, is_recovered, is_error, line, column
            Token('Decisional Keyword', 'if', False, False, 1, 1),
            Token('Decisional Keyword', 'else', False, False, 1, 4),
            Token('Decisional Keyword', 'if', False, False, 1, 9),
            Token('Decisional Keyword', 'if', False, False, 3, 3),
            Token('*Error*', '? els...', False, True, 3, 6)
        ])
Exemple #24
0
def test_white_spaces():
    input_string: str = "    $var1       "
    token_list: List[Token] = [
        Token(
            TokenType.VARIABLE,
            '$var1'),
    ]
    assert_tokenize(token_list, input_string)
    def format_file_(self, token_list):
        # deprecated
        previous_token = None
        formatted_token_list = []
        whitespace_stack = []
        for i in range(len(token_list)):
            assert (type(token_list[i]) == Token)
            if token_list[i].is_whitespace():
                whitespace_stack.append(token_list[i])
                if token_list[i].content() == '\n':
                    formatted_token_list.append(
                        Token("\n", TokenType.whitespace))
                continue
            if token_list[i].content() == '(':
                if self.space_before_parentheses(previous_token):
                    formatted_token_list.append(
                        Token(" ", TokenType.whitespace))
                    formatted_token_list.append(Token("(",
                                                      TokenType.separator))
            elif token_list[i].is_operator():
                if self.space_around_operators(token_list[i]):
                    formatted_token_list.append(
                        Token(" ", TokenType.whitespace))
                    formatted_token_list.append(
                        Token(token_list[i].content(), TokenType.separator))
                    formatted_token_list.append(
                        Token(" ", TokenType.whitespace))
            else:
                formatted_token_list.append(
                    Token(token_list[i].content(), token_list[i].get_type))

            previous_token = token_list[i]

        return formatted_token_list
Exemple #26
0
 def _read(self):
     """
 (Somente) lê o próximo token da lista
 """
     if self.tokens:
         self._current_line = self.tokens[0].line
         return self.tokens[0]
     else:
         return Token(False, False, False, False)
Exemple #27
0
 def Next(self):
     """Move to the next token."""
     try:
         t = self.lexer.__next__()
         if t.kind in ['OPER', 'SYNT']:
             t.kind = t.lexem
     except StopIteration:
         t = Token('eof', 'eof')
     self.token = t
Exemple #28
0
 def annassign(self):
     # ':' test ['=' test]
     self.eat(COLON)
     node = self.test()
     if self.current_token.type == ASSIGN:
         self.eat(ASSIGN)
         value = self.test()
         return BinOp(node, Token(ASSIGN, ASSIGN), value)
     return node
Exemple #29
0
 def vfpdef(self):
     #  NAME [':' test]
     name = self.current_token
     self.eat(name.type)
     if self.current_token.type == COLON:
         self.eat(COLON)
         node = self.test()
         return BinOp(name, Token(COLON, COLON), node)
     return name
Exemple #30
0
 def dotted_name(self):
     #  dotted_name: NAME ('.' NAME)*
     node = self.current_token
     self.eat(ID)
     while self.current_token.type == DOT:
         self.eat(DOT)
         node = BinOp(node, Token(DOT, DOT), self.current_token)
         self.eat(ID)
     return node