예제 #1
0
 def __init__(self, lexer: Lexer):
     self.lexer = lexer
     self.current_whitespace: List[Token] = []
     self.current_token: Token = Token(token.ILLEGAL, "")
     self.peek_whitespace: List[Token] = []
     self.peek_token: Token = Token(token.ILLEGAL, "")
     # Fill current and peek values
     self.next_token()
     self.next_token()
예제 #2
0
 def test_identifier(self):
     query = """some_schema."some_table".some_field"""
     lexer = Lexer(query)
     tokens = read_all_tokens(lexer)
     self.assertEqual(
         tokens,
         [
             Token(token.IDENTIFIER, 'some_schema."some_table".some_field'),
             Token(token.EOF, ""),
         ],
     )
예제 #3
0
 def test_simple(self):
     query = "+()"
     lexer = Lexer(query)
     tokens = read_all_tokens(lexer)
     self.assertEqual(
         tokens,
         [
             Token(token.PLUS, "+"),
             Token(token.LPAREN, "("),
             Token(token.RPAREN, ")"),
             Token(token.EOF, ""),
         ],
     )
예제 #4
0
    def test_keyword_vs_function_identifier(self):
        # LEFT is a keyword as well as a function
        query = "LEFT JOIN"
        lexer = Lexer(query)
        tokens = read_all_tokens(lexer)
        self.assertEqual(
            tokens,
            [
                Token(token.LEFT, "LEFT"),
                Token(token.WHITESPACE, " "),
                Token(token.JOIN, "JOIN"),
                Token(token.EOF, ""),
            ],
        )

        query = "LEFT()"
        lexer = Lexer(query)
        tokens = read_all_tokens(lexer)
        self.assertEqual(
            tokens,
            [
                Token(token.IDENTIFIER, "LEFT"),
                Token(token.LPAREN, "("),
                Token(token.RPAREN, ")"),
                Token(token.EOF, ""),
            ],
        )
예제 #5
0
 def test_is_not_null(self):
     query = "x IS NOT NULL"
     lexer = Lexer(query)
     tokens = read_all_tokens(lexer)
     self.assertEqual(
         tokens,
         [
             Token(token.IDENTIFIER, "x"),
             Token(token.WHITESPACE, " "),
             Token(token.IS, "IS"),
             Token(token.WHITESPACE, " "),
             Token(token.NOT, "NOT"),
             Token(token.WHITESPACE, " "),
             Token(token.NULL, "NULL"),
             Token(token.EOF, ""),
         ],
     )
예제 #6
0
 def test_words(self):
     query = '''select 'hello' AS world, "select"'''
     lexer = Lexer(query)
     tokens = read_all_tokens(lexer)
     self.assertEqual(
         tokens,
         [
             Token(token.SELECT, "select"),
             Token(token.WHITESPACE, " "),
             Token(token.STRING, "'hello'"),
             Token(token.WHITESPACE, " "),
             Token(token.AS, "AS"),
             Token(token.WHITESPACE, " "),
             Token(token.IDENTIFIER, "world"),
             Token(token.COMMA, ","),
             Token(token.WHITESPACE, " "),
             Token(token.IDENTIFIER, '"select"'),
             Token(token.EOF, ""),
         ],
     )
예제 #7
0
    def test_query(self):
        query = """
SELECT u.id, u.first_name || ' ' || u.last_name AS user_name
FROM users u
"""
        lexer = Lexer(query)
        tokens = read_all_tokens(lexer)
        self.assertEqual(
            tokens,
            [
                Token(token.WHITESPACE, "\n"),
                Token(token.SELECT, "SELECT"),
                Token(token.WHITESPACE, " "),
                Token(token.IDENTIFIER, "u.id"),
                Token(token.COMMA, ","),
                Token(token.WHITESPACE, " "),
                Token(token.IDENTIFIER, "u.first_name"),
                Token(token.WHITESPACE, " "),
                Token(token.PIPEPIPE, "||"),
                Token(token.WHITESPACE, " "),
                Token(token.STRING, "' '"),
                Token(token.WHITESPACE, " "),
                Token(token.PIPEPIPE, "||"),
                Token(token.WHITESPACE, " "),
                Token(token.IDENTIFIER, "u.last_name"),
                Token(token.WHITESPACE, " "),
                Token(token.AS, "AS"),
                Token(token.WHITESPACE, " "),
                Token(token.IDENTIFIER, "user_name"),
                Token(token.WHITESPACE, "\n"),
                Token(token.FROM, "FROM"),
                Token(token.WHITESPACE, " "),
                Token(token.IDENTIFIER, "users"),
                Token(token.WHITESPACE, " "),
                Token(token.IDENTIFIER, "u"),
                Token(token.WHITESPACE, "\n"),
                Token(token.EOF, ""),
            ],
        )
예제 #8
0
파일: lexer.py 프로젝트: asavoy/husky-whale
    def next_token(self) -> Token:
        c = self.char
        t = Token(token.ILLEGAL, c)

        if is_whitespace(c):
            return Token(token.WHITESPACE, self.read_whitespace())
        elif c == '"':
            return Token(token.IDENTIFIER, self.read_identifier())
        elif c == "'":
            return Token(token.STRING, self.read_string())
        elif c == "+":
            t = Token(token.PLUS, c)
        elif c == "-":
            t = Token(token.SUBTRACT, c)
        elif c == "*":
            t = Token(token.ASTERISK, c)
        elif c == "/":
            t = Token(token.SLASH, c)
        elif c == "|":
            if self.peek_char() == "|":
                self.read_char()
                t = Token(token.PIPEPIPE, c + self.char)
            else:
                t = Token(token.PIPE, c)
        elif c == "=":
            t = Token(token.EQUAL, c)
        elif c == "!":
            if self.peek_char() == "=":
                self.read_char()
                t = Token(token.BANGEQUAL, c + self.char)
            else:
                t = Token(token.BANG, c)
        elif c == "<":
            pc = self.peek_char()
            if pc == "=":
                self.read_char()
                t = Token(token.LTEQUAL, c + self.char)
            elif pc == ">":
                self.read_char()
                t = Token(token.LTGT, c + self.char)
            else:
                t = Token(token.LT, c)
        elif c == ">":
            if self.peek_char() == "=":
                self.read_char()
                t = Token(token.GTEQUAL, c + self.char)
            else:
                t = Token(token.GT, c)
        elif c == ",":
            t = Token(token.COMMA, c)
        elif c == ".":
            t = Token(token.FULLSTOP, c)
        elif c == ":":
            if self.peek_char() == ":":
                self.read_char()
                t = Token(token.COLONCOLON, c + self.char)
            else:
                t = Token(token.COLON, c)
        elif c == "(":
            t = Token(token.LPAREN, c)
        elif c == ")":
            t = Token(token.RPAREN, c)
        elif c == "":
            t = Token(token.EOF, c)
        else:
            if is_letter(c) or c == "_":
                identifier = self.read_identifier()
                identifier_upper = identifier.upper()
                if identifier_upper in keyword_tokens and self.char != "(":
                    return Token(keyword_tokens[identifier_upper], identifier)
                else:
                    return Token(token.IDENTIFIER, identifier)
            elif is_number(c):
                integer = self.read_integer()
                return Token(token.INTEGER, integer)
            else:
                pass
        self.read_char()
        return t