Esempio n. 1
0
 def _handle_column_save(self, token: SQLToken, columns: List[str]):
     column = token.table_prefixed_column(self.tables_aliases)
     if self._is_with_query_already_resolved(column):
         self._add_to_columns_aliases_subsection(token=token, left_expand=False)
         token.token_type = TokenType.COLUMN_ALIAS
         return
     column = self._resolve_sub_queries(column)
     self._add_to_columns_with_tables(token, column)
     self._add_to_columns_subsection(
         keyword=token.last_keyword_normalized, column=column
     )
     token.token_type = TokenType.COLUMN
     columns.extend(column)
Esempio n. 2
0
    def tokens(self) -> List[SQLToken]:
        """
        Tokenizes the query
        """
        if self._tokens is not None:
            return self._tokens

        parsed = sqlparse.parse(self._query)
        tokens = []
        # handle empty queries (#12)
        if not parsed:
            return tokens

        self.sqlparse_tokens = parsed[0].tokens
        sqlparse_tokens = self._flatten_sqlparse()
        non_empty_tokens = [
            token
            for token in sqlparse_tokens
            if token.ttype is not Whitespace and token.ttype.parent is not Whitespace
        ]
        last_keyword = None
        for index, tok in enumerate(non_empty_tokens):
            token = SQLToken(
                tok=tok,
                index=index,
                subquery_level=self._subquery_level,
                last_keyword=last_keyword,
            )
            if index > 0:
                # create links between consecutive tokens
                token.previous_token = tokens[index - 1]
                tokens[index - 1].next_token = token

            if token.is_left_parenthesis:
                self._determine_opening_parenthesis_type(token=token)
            elif token.is_right_parenthesis:
                self._determine_closing_parenthesis_type(token=token)

            if tok.is_keyword and "".join(tok.normalized.split()) in RELEVANT_KEYWORDS:
                last_keyword = tok.normalized
            token.is_in_nested_function = self._is_in_nested_function
            token.parenthesis_level = self._parenthesis_level
            tokens.append(token)

        self._tokens = tokens
        # since tokens are used in all methods required parsing (so w/o generalization)
        # we set the query type here (and not in init) to allow for generalization
        # but disallow any other usage for not supported queries to avoid unexpected
        # results which are not really an error
        _ = self.query_type
        return tokens
Esempio n. 3
0
 def _determine_opening_parenthesis_type(self, token: SQLToken):
     """
     Determines the type of left parenthesis in query
     """
     if token.previous_token.normalized in SUBQUERY_PRECEDING_KEYWORDS:
         # inside subquery / derived table
         token.is_subquery_start = True
         self._subquery_level += 1
         token.subquery_level = self._subquery_level
     elif token.previous_token.normalized in KEYWORDS_BEFORE_COLUMNS.union({","}):
         # we are in columns and in a column subquery definition
         token.is_column_definition_start = True
     elif token.previous_token.normalized == "AS":
         token.is_with_query_start = True
     elif (
         token.get_nth_previous(2).normalized == "TABLE"
         and token.get_nth_previous(3).normalized == "CREATE"
     ):
         token.is_create_table_columns_declaration_start = True
     else:
         # nested function
         token.is_nested_function_start = True
         self._nested_level += 1
         self._is_in_nested_function = True
     self._open_parentheses.append(token)
     self._parenthesis_level += 1
Esempio n. 4
0
 def _determine_opening_parenthesis_type(self, token: SQLToken):
     """
     Determines the type of left parenthesis in query
     """
     if token.previous_token.normalized in SUBQUERY_PRECEDING_KEYWORDS:
         # inside subquery / derived table
         token.is_subquery_start = True
         self._subquery_level += 1
         token.subquery_level = self._subquery_level
     elif token.previous_token.normalized in KEYWORDS_BEFORE_COLUMNS.union({","}):
         # we are in columns and in a column subquery definition
         token.is_column_definition_start = True
     elif (
         token.previous_token.is_as_keyword
         and token.last_keyword_normalized != "WINDOW"
     ):
         # window clause also contains AS keyword, but it is not a query
         token.is_with_query_start = True
     elif (
         token.last_keyword_normalized == "TABLE"
         and token.find_nearest_token("(") is EmptyToken
     ):
         token.is_create_table_columns_declaration_start = True
     elif token.previous_token.normalized == "OVER":
         token.is_partition_clause_start = True
     else:
         # nested function
         token.is_nested_function_start = True
         self._nested_level += 1
         self._is_in_nested_function = True
     self._open_parentheses.append(token)
     self._parenthesis_level += 1
Esempio n. 5
0
 def _handle_with_name_save(token: SQLToken, with_names: List[str]) -> None:
     if token.is_right_parenthesis:
         # inside columns of with statement
         # like: with (col1, col2) as (subquery)
         token.is_with_columns_end = True
         token.is_nested_function_end = False
         start_token = token.find_nearest_token("(")
         start_token.is_with_columns_start = True
         start_token.is_nested_function_start = False
         prev_token = start_token.previous_token
         prev_token.token_type = TokenType.WITH_NAME
         with_names.append(prev_token.value)
     else:
         token.token_type = TokenType.WITH_NAME
         with_names.append(token.value)
Esempio n. 6
0
 def _handle_column_alias_subquery_level_update(self, token: SQLToken) -> None:
     token.token_type = TokenType.COLUMN_ALIAS
     self._add_to_columns_aliases_subsection(token=token)
     current_level = self._column_aliases_max_subquery_level.setdefault(
         token.value, 0
     )
     if token.subquery_level > current_level:
         self._column_aliases_max_subquery_level[token.value] = token.subquery_level
Esempio n. 7
0
 def _determine_closing_parenthesis_type(self, token: SQLToken):
     """
     Determines the type of right parenthesis in query
     """
     last_open_parenthesis = self._open_parentheses.pop(-1)
     if last_open_parenthesis.is_subquery_start:
         token.is_subquery_end = True
         self._subquery_level -= 1
     elif last_open_parenthesis.is_column_definition_start:
         token.is_column_definition_end = True
     elif last_open_parenthesis.is_with_query_start:
         token.is_with_query_end = True
     else:
         token.is_nested_function_end = True
         self._nested_level -= 1
         if self._nested_level == 0:
             self._is_in_nested_function = False
Esempio n. 8
0
 def _combine_qualified_names(self, index: int, token: SQLToken) -> None:
     """
     Combines names like <schema>.<table>.<column> or <table/sub_query>.<column>
     """
     value = token.value
     is_complex = True
     while is_complex:
         value, is_complex = self._combine_tokens(index=index, value=value)
         index = index - 2
     token.value = value
Esempio n. 9
0
    def tokens(self) -> List[SQLToken]:
        """
        Tokenizes the query
        """
        if self._tokens is not None:
            return self._tokens

        parsed = sqlparse.parse(self.query)
        tokens = []
        # handle empty queries (#12)
        if not parsed:
            return tokens

        sqlparse_tokens = TokenList(parsed[0].tokens).flatten()
        non_empty_tokens = [
            token for token in sqlparse_tokens if token.ttype is not Whitespace
        ]
        last_keyword = None
        for index, tok in enumerate(non_empty_tokens):
            token = SQLToken(
                tok=tok,
                index=index,
                subquery_level=self._subquery_level,
                last_keyword=last_keyword,
            )
            if index > 0:
                # create links between consecutive tokens
                token.previous_token = tokens[index - 1]
                tokens[index - 1].next_token = token

            if token.is_left_parenthesis:
                self._determine_opening_parenthesis_type(token=token)
            elif token.is_right_parenthesis:
                self._determine_closing_parenthesis_type(token=token)

            if tok.is_keyword and tok.normalized not in KEYWORDS_IGNORED:
                last_keyword = tok.normalized
            token.is_in_nested_function = self._is_in_nested_function
            tokens.append(token)

        self._tokens = tokens
        return tokens
Esempio n. 10
0
 def _determine_opening_parenthesis_type(self, token: SQLToken):
     """
     Determines the type of left parenthesis in query
     """
     if token.previous_token.normalized in SUBQUERY_PRECEDING_KEYWORDS:
         # inside subquery / derived table
         token.is_subquery_start = True
         self._subquery_level += 1
         token.subquery_level = self._subquery_level
     elif token.previous_token.normalized in KEYWORDS_BEFORE_COLUMNS + [","]:
         # we are in columns and in a column subquery definition
         token.is_column_definition_start = True
     elif token.previous_token.normalized == "AS":
         token.is_with_query_start = True
     else:
         # nested function
         token.is_nested_function_start = True
         self._nested_level += 1
         self._is_in_nested_function = True
     self._open_parentheses.append(token)
Esempio n. 11
0
    def tokens(self) -> List[SQLToken]:
        """
        Tokenizes the query
        """
        if self._tokens is not None:
            return self._tokens

        parsed = sqlparse.parse(self._query)
        tokens = []
        # handle empty queries (#12)
        if not parsed:
            return tokens
        self._get_sqlparse_tokens(parsed)
        last_keyword = None
        combine_flag = False
        for index, tok in enumerate(self.non_empty_tokens):
            # combine dot separated identifiers
            if self._is_token_part_of_complex_identifier(token=tok, index=index):
                combine_flag = True
                continue
            token = SQLToken(
                tok=tok,
                index=index,
                subquery_level=self._subquery_level,
                last_keyword=last_keyword,
            )
            if combine_flag:
                self._combine_qualified_names(index=index, token=token)
                combine_flag = False

            previous_token = tokens[-1] if index > 0 else EmptyToken
            token.previous_token = previous_token
            previous_token.next_token = token if index > 0 else None

            if token.is_left_parenthesis:
                token.token_type = TokenType.PARENTHESIS
                self._determine_opening_parenthesis_type(token=token)
            elif token.is_right_parenthesis:
                token.token_type = TokenType.PARENTHESIS
                self._determine_closing_parenthesis_type(token=token)

            last_keyword = self._determine_last_relevant_keyword(
                token=token, last_keyword=last_keyword
            )
            token.is_in_nested_function = self._is_in_nested_function
            token.parenthesis_level = self._parenthesis_level
            tokens.append(token)

        self._tokens = tokens
        # since tokens are used in all methods required parsing (so w/o generalization)
        # we set the query type here (and not in init) to allow for generalization
        # but disallow any other usage for not supported queries to avoid unexpected
        # results which are not really an error
        _ = self.query_type
        return tokens
Esempio n. 12
0
 def _add_to_columns_aliases_subsection(self, token: SQLToken):
     """
     Add alias to the section in which it appears in query
     """
     keyword = token.last_keyword_normalized
     alias = token.left_expanded
     if (
         token.last_keyword_normalized in ["FROM", "WITH"]
         and token.find_nearest_token("(").is_with_columns_start
     ):
         keyword = "SELECT"
     section = COLUMNS_SECTIONS[keyword]
     self._columns_aliases_dict = self._columns_aliases_dict or dict()
     self._columns_aliases_dict.setdefault(section, UniqueList()).append(alias)
Esempio n. 13
0
 def _resolve_function_alias(self, token: SQLToken) -> Union[str, List[str]]:
     # it can be one function or a chain of functions
     # like: sum(a) + sum(b) as alias
     # or operation on columns like: col1 + col2 as alias
     start_token = token.find_nearest_token(
         [",", "SELECT"], value_attribute="normalized"
     )
     while start_token.is_in_nested_function:
         start_token = start_token.find_nearest_token(
             [",", "SELECT"], value_attribute="normalized"
         )
     return self._find_all_columns_between_tokens(
         start_token=start_token, end_token=token
     )
Esempio n. 14
0
 def _determine_last_relevant_keyword(self, token: SQLToken, last_keyword: str):
     if token.is_keyword and "".join(token.normalized.split()) in RELEVANT_KEYWORDS:
         if (
             not (
                 token.normalized == "FROM"
                 and token.get_nth_previous(3).normalized == "EXTRACT"
             )
             and not (
                 token.normalized == "ORDERBY"
                 and len(self._open_parentheses) > 0
                 and self._open_parentheses[-1].is_partition_clause_start
             )
             and not (token.normalized == "USING" and last_keyword == "SELECT")
         ):
             last_keyword = token.normalized
     return last_keyword
Esempio n. 15
0
 def _find_column_for_with_column_alias(self, token: SQLToken) -> str:
     start_token = token.find_nearest_token(
         True, direction="right", value_attribute="is_with_query_start"
     )
     if start_token not in self._with_columns_candidates:
         end_token = start_token.find_nearest_token(
             True, direction="right", value_attribute="is_with_query_end"
         )
         columns = self._find_all_columns_between_tokens(
             start_token=start_token, end_token=end_token
         )
         self._with_columns_candidates[start_token] = columns
     if isinstance(self._with_columns_candidates[start_token], list):
         alias_of = self._with_columns_candidates[start_token].pop(0)
     else:
         alias_of = self._with_columns_candidates[start_token]
     return alias_of
Esempio n. 16
0
    def _resolve_subquery_alias(self, token: SQLToken) -> Union[str, List[str]]:
        # nested subquery like select a, (select a as b from x) as column
        start_token = token.find_nearest_token(
            True, value_attribute="is_column_definition_start"
        )
        if start_token.next_token.normalized == "SELECT":
            # we have a subquery
            alias_token = start_token.next_token.find_nearest_token(
                self._aliases_to_check,
                direction="right",
                value_attribute="value",
            )
            return self._resolve_alias_to_column(alias_token)

        # chain of functions or redundant parenthesis
        return self._find_all_columns_between_tokens(
            start_token=start_token, end_token=token
        )