def tokens(self) -> List[SQLToken]: """ Tokenizes the query """ if self._tokens is not None: return self._tokens parsed = sqlparse.parse(self._query) tokens = [] # handle empty queries (#12) if not parsed: return tokens self._get_sqlparse_tokens(parsed) last_keyword = None combine_flag = False for index, tok in enumerate(self.non_empty_tokens): # combine dot separated identifiers if self._is_token_part_of_complex_identifier(token=tok, index=index): combine_flag = True continue token = SQLToken( tok=tok, index=index, subquery_level=self._subquery_level, last_keyword=last_keyword, ) if combine_flag: self._combine_qualified_names(index=index, token=token) combine_flag = False previous_token = tokens[-1] if index > 0 else EmptyToken token.previous_token = previous_token previous_token.next_token = token if index > 0 else None if token.is_left_parenthesis: token.token_type = TokenType.PARENTHESIS self._determine_opening_parenthesis_type(token=token) elif token.is_right_parenthesis: token.token_type = TokenType.PARENTHESIS self._determine_closing_parenthesis_type(token=token) last_keyword = self._determine_last_relevant_keyword( token=token, last_keyword=last_keyword ) token.is_in_nested_function = self._is_in_nested_function token.parenthesis_level = self._parenthesis_level tokens.append(token) self._tokens = tokens # since tokens are used in all methods required parsing (so w/o generalization) # we set the query type here (and not in init) to allow for generalization # but disallow any other usage for not supported queries to avoid unexpected # results which are not really an error _ = self.query_type return tokens
def tokens(self) -> List[SQLToken]: """ Tokenizes the query """ if self._tokens is not None: return self._tokens parsed = sqlparse.parse(self._query) tokens = [] # handle empty queries (#12) if not parsed: return tokens self.sqlparse_tokens = parsed[0].tokens sqlparse_tokens = self._flatten_sqlparse() non_empty_tokens = [ token for token in sqlparse_tokens if token.ttype is not Whitespace and token.ttype.parent is not Whitespace ] last_keyword = None for index, tok in enumerate(non_empty_tokens): token = SQLToken( tok=tok, index=index, subquery_level=self._subquery_level, last_keyword=last_keyword, ) if index > 0: # create links between consecutive tokens token.previous_token = tokens[index - 1] tokens[index - 1].next_token = token if token.is_left_parenthesis: self._determine_opening_parenthesis_type(token=token) elif token.is_right_parenthesis: self._determine_closing_parenthesis_type(token=token) if tok.is_keyword and "".join(tok.normalized.split()) in RELEVANT_KEYWORDS: last_keyword = tok.normalized token.is_in_nested_function = self._is_in_nested_function token.parenthesis_level = self._parenthesis_level tokens.append(token) self._tokens = tokens # since tokens are used in all methods required parsing (so w/o generalization) # we set the query type here (and not in init) to allow for generalization # but disallow any other usage for not supported queries to avoid unexpected # results which are not really an error _ = self.query_type return tokens