def __init__(self, lexer: Lexer): self.lexer = lexer self.current_whitespace: List[Token] = [] self.current_token: Token = Token(token.ILLEGAL, "") self.peek_whitespace: List[Token] = [] self.peek_token: Token = Token(token.ILLEGAL, "") # Fill current and peek values self.next_token() self.next_token()
def test_identifier(self): query = """some_schema."some_table".some_field""" lexer = Lexer(query) tokens = read_all_tokens(lexer) self.assertEqual( tokens, [ Token(token.IDENTIFIER, 'some_schema."some_table".some_field'), Token(token.EOF, ""), ], )
def test_simple(self): query = "+()" lexer = Lexer(query) tokens = read_all_tokens(lexer) self.assertEqual( tokens, [ Token(token.PLUS, "+"), Token(token.LPAREN, "("), Token(token.RPAREN, ")"), Token(token.EOF, ""), ], )
def test_keyword_vs_function_identifier(self): # LEFT is a keyword as well as a function query = "LEFT JOIN" lexer = Lexer(query) tokens = read_all_tokens(lexer) self.assertEqual( tokens, [ Token(token.LEFT, "LEFT"), Token(token.WHITESPACE, " "), Token(token.JOIN, "JOIN"), Token(token.EOF, ""), ], ) query = "LEFT()" lexer = Lexer(query) tokens = read_all_tokens(lexer) self.assertEqual( tokens, [ Token(token.IDENTIFIER, "LEFT"), Token(token.LPAREN, "("), Token(token.RPAREN, ")"), Token(token.EOF, ""), ], )
def test_is_not_null(self): query = "x IS NOT NULL" lexer = Lexer(query) tokens = read_all_tokens(lexer) self.assertEqual( tokens, [ Token(token.IDENTIFIER, "x"), Token(token.WHITESPACE, " "), Token(token.IS, "IS"), Token(token.WHITESPACE, " "), Token(token.NOT, "NOT"), Token(token.WHITESPACE, " "), Token(token.NULL, "NULL"), Token(token.EOF, ""), ], )
def test_words(self): query = '''select 'hello' AS world, "select"''' lexer = Lexer(query) tokens = read_all_tokens(lexer) self.assertEqual( tokens, [ Token(token.SELECT, "select"), Token(token.WHITESPACE, " "), Token(token.STRING, "'hello'"), Token(token.WHITESPACE, " "), Token(token.AS, "AS"), Token(token.WHITESPACE, " "), Token(token.IDENTIFIER, "world"), Token(token.COMMA, ","), Token(token.WHITESPACE, " "), Token(token.IDENTIFIER, '"select"'), Token(token.EOF, ""), ], )
def test_query(self): query = """ SELECT u.id, u.first_name || ' ' || u.last_name AS user_name FROM users u """ lexer = Lexer(query) tokens = read_all_tokens(lexer) self.assertEqual( tokens, [ Token(token.WHITESPACE, "\n"), Token(token.SELECT, "SELECT"), Token(token.WHITESPACE, " "), Token(token.IDENTIFIER, "u.id"), Token(token.COMMA, ","), Token(token.WHITESPACE, " "), Token(token.IDENTIFIER, "u.first_name"), Token(token.WHITESPACE, " "), Token(token.PIPEPIPE, "||"), Token(token.WHITESPACE, " "), Token(token.STRING, "' '"), Token(token.WHITESPACE, " "), Token(token.PIPEPIPE, "||"), Token(token.WHITESPACE, " "), Token(token.IDENTIFIER, "u.last_name"), Token(token.WHITESPACE, " "), Token(token.AS, "AS"), Token(token.WHITESPACE, " "), Token(token.IDENTIFIER, "user_name"), Token(token.WHITESPACE, "\n"), Token(token.FROM, "FROM"), Token(token.WHITESPACE, " "), Token(token.IDENTIFIER, "users"), Token(token.WHITESPACE, " "), Token(token.IDENTIFIER, "u"), Token(token.WHITESPACE, "\n"), Token(token.EOF, ""), ], )
def next_token(self) -> Token: c = self.char t = Token(token.ILLEGAL, c) if is_whitespace(c): return Token(token.WHITESPACE, self.read_whitespace()) elif c == '"': return Token(token.IDENTIFIER, self.read_identifier()) elif c == "'": return Token(token.STRING, self.read_string()) elif c == "+": t = Token(token.PLUS, c) elif c == "-": t = Token(token.SUBTRACT, c) elif c == "*": t = Token(token.ASTERISK, c) elif c == "/": t = Token(token.SLASH, c) elif c == "|": if self.peek_char() == "|": self.read_char() t = Token(token.PIPEPIPE, c + self.char) else: t = Token(token.PIPE, c) elif c == "=": t = Token(token.EQUAL, c) elif c == "!": if self.peek_char() == "=": self.read_char() t = Token(token.BANGEQUAL, c + self.char) else: t = Token(token.BANG, c) elif c == "<": pc = self.peek_char() if pc == "=": self.read_char() t = Token(token.LTEQUAL, c + self.char) elif pc == ">": self.read_char() t = Token(token.LTGT, c + self.char) else: t = Token(token.LT, c) elif c == ">": if self.peek_char() == "=": self.read_char() t = Token(token.GTEQUAL, c + self.char) else: t = Token(token.GT, c) elif c == ",": t = Token(token.COMMA, c) elif c == ".": t = Token(token.FULLSTOP, c) elif c == ":": if self.peek_char() == ":": self.read_char() t = Token(token.COLONCOLON, c + self.char) else: t = Token(token.COLON, c) elif c == "(": t = Token(token.LPAREN, c) elif c == ")": t = Token(token.RPAREN, c) elif c == "": t = Token(token.EOF, c) else: if is_letter(c) or c == "_": identifier = self.read_identifier() identifier_upper = identifier.upper() if identifier_upper in keyword_tokens and self.char != "(": return Token(keyword_tokens[identifier_upper], identifier) else: return Token(token.IDENTIFIER, identifier) elif is_number(c): integer = self.read_integer() return Token(token.INTEGER, integer) else: pass self.read_char() return t