Ejemplo n.º 1
0
    def columns_aliases_names(self) -> List[str]:
        """
        Extract names of the column aliases used in query
        """
        if self._columns_aliases_names is not None:
            return self._columns_aliases_names
        column_aliases_names = UniqueList()
        with_names = self.with_names
        subqueries_names = self.subqueries_names
        for token in self.tokens:
            if (
                token.is_name
                or (token.is_keyword and token.previous_token.normalized == "AS")
            ) and not token.next_token.is_dot:
                if (
                    token.last_keyword_normalized in KEYWORDS_BEFORE_COLUMNS
                    and token.normalized not in ["DIV"]
                    and token.is_alias_definition
                    or token.is_in_with_columns
                ) and token.value not in with_names + subqueries_names:
                    alias = token.left_expanded
                    column_aliases_names.append(alias)
                    current_level = self._column_aliases_max_subquery_level.setdefault(
                        alias, 0
                    )
                    if token.subquery_level > current_level:
                        self._column_aliases_max_subquery_level[
                            alias
                        ] = token.subquery_level

        self._columns_aliases_names = column_aliases_names
        return self._columns_aliases_names
Ejemplo n.º 2
0
    def tables(self) -> List[str]:
        """
        Return the list of tables this query refers to
        """
        if self._tables is not None:
            return self._tables
        tables = UniqueList()
        with_names = self.with_names

        for token in self._not_parsed_tokens:
            if token.is_potential_table_name:
                if (
                    token.is_alias_of_table_or_alias_of_subquery
                    or token.is_with_statement_nested_in_subquery
                    or token.is_constraint_definition_inside_create_table_clause(
                        query_type=self.query_type
                    )
                    or token.is_columns_alias_of_with_query_or_column_in_insert_query(
                        with_names=with_names
                    )
                ):
                    continue
                table_name = str(token.value.strip("`"))
                token.token_type = TokenType.TABLE
                tables.append(table_name)

        self._tables = tables - with_names
        return self._tables
Ejemplo n.º 3
0
    def columns_aliases_names(self) -> List[str]:
        """
        Extract names of the column aliases used in query
        """
        if self._columns_aliases_names is not None:
            return self._columns_aliases_names
        column_aliases_names = UniqueList()
        with_names = self.with_names
        subqueries_names = self.subqueries_names
        for token in self.tokens:
            if (
                token.is_name
                or (token.is_keyword and token.previous_token.normalized == "AS")
            ) and not token.next_token.is_dot:
                if (
                    token.last_keyword_normalized in KEYWORDS_BEFORE_COLUMNS
                    and (
                        token.previous_token.normalized in ["AS", ")"]
                        or token.is_alias_without_as
                    )
                    or token.is_in_with_columns
                ) and token.value not in with_names + subqueries_names:
                    alias = token.left_expanded
                    column_aliases_names.append(alias)

        self._columns_aliases_names = column_aliases_names
        return self._columns_aliases_names
Ejemplo n.º 4
0
    def with_names(self) -> List[str]:
        """
        Returns with statements aliases list from a given query

        E.g. WITH database1.tableFromWith AS (SELECT * FROM table3)
             SELECT "xxxxx" FROM database1.tableFromWith alias
             LEFT JOIN database2.table2 ON ("tt"."ttt"."fff" = "xx"."xxx")
        will return ["database1.tableFromWith"]
        """
        if self._with_names is not None:
            return self._with_names
        with_names = UniqueList()
        for token in self._not_parsed_tokens:
            if token.previous_token.normalized == "WITH":
                self._is_in_with_block = True
                while self._is_in_with_block and token.next_token:
                    if token.next_token.is_as_keyword:
                        self._handle_with_name_save(token=token, with_names=with_names)
                        while token.next_token and not token.is_with_query_end:
                            token = token.next_token
                        is_end_of_with_block = (
                            token.next_token_not_comment.normalized
                            in WITH_ENDING_KEYWORDS
                        )
                        if is_end_of_with_block:
                            self._is_in_with_block = False
                    else:
                        token = token.next_token

        self._with_names = with_names
        return self._with_names
Ejemplo n.º 5
0
 def _add_to_columns_subsection(self, keyword: str, column: str):
     """
     Add columns to the section in which it appears in query
     """
     section = COLUMNS_SECTIONS[keyword]
     self._columns_dict = self._columns_dict or dict()
     self._columns_dict.setdefault(section, UniqueList()).append(column)
Ejemplo n.º 6
0
 def _find_all_columns_between_tokens(
     self, start_token: SQLToken, end_token: SQLToken
 ) -> Union[str, List[str]]:
     """
     Returns a list of columns between two tokens
     """
     loop_token = start_token
     aliases = UniqueList()
     while loop_token.next_token != end_token:
         if loop_token.next_token.left_expanded in self._aliases_to_check:
             alias_token = loop_token.next_token
             if (
                 alias_token.normalized != "*"
                 or alias_token.is_wildcard_not_operator
             ):
                 aliases.append(self._resolve_alias_to_column(alias_token))
         loop_token = loop_token.next_token
     return aliases[0] if len(aliases) == 1 else aliases
Ejemplo n.º 7
0
 def _add_to_columns_subsection(self, keyword: str, column: Union[str, List[str]]):
     """
     Add columns to the section in which it appears in query
     """
     section = COLUMNS_SECTIONS[keyword]
     self._columns_dict = self._columns_dict or dict()
     current_section = self._columns_dict.setdefault(section, UniqueList())
     if isinstance(column, str):
         current_section.append(column)
     else:
         current_section.extend(column)
Ejemplo n.º 8
0
    def subqueries_names(self) -> List[str]:
        """
        Returns sub-queries aliases list from a given query

        e.g. SELECT COUNT(1) FROM
            (SELECT std.task_id FROM some_task_detail std WHERE std.STATUS = 1) a
             JOIN (SELECT st.task_id FROM some_task st WHERE task_type_id = 80) b
             ON a.task_id = b.task_id;
        will return ["a", "b"]
        """
        if self._subqueries_names is not None:
            return self._subqueries_names
        subqueries_names = UniqueList()
        for token in self.tokens:
            if (token.previous_token.is_subquery_end and token.normalized != "AS") or (
                token.previous_token.normalized == "AS"
                and token.get_nth_previous(2).is_subquery_end
            ):
                subqueries_names.append(str(token))

        self._subqueries_names = subqueries_names
        return self._subqueries_names
Ejemplo n.º 9
0
    def columns_aliases_names(self) -> List[str]:
        """
        Extract names of the column aliases used in query
        """
        if self._columns_aliases_names is not None:
            return self._columns_aliases_names
        column_aliases_names = UniqueList()
        with_names = self.with_names
        subqueries_names = self.subqueries_names
        for token in self._not_parsed_tokens:
            if token.is_potential_alias:
                if token.value in column_aliases_names:
                    self._handle_column_alias_subquery_level_update(token=token)
                elif (
                    token.is_a_valid_alias
                    and token.value not in with_names + subqueries_names
                ):
                    column_aliases_names.append(token.value)
                    self._handle_column_alias_subquery_level_update(token=token)

        self._columns_aliases_names = column_aliases_names
        return self._columns_aliases_names
Ejemplo n.º 10
0
 def _add_to_columns_aliases_subsection(self, token: SQLToken):
     """
     Add alias to the section in which it appears in query
     """
     keyword = token.last_keyword_normalized
     alias = token.left_expanded
     if (
         token.last_keyword_normalized in ["FROM", "WITH"]
         and token.find_nearest_token("(").is_with_columns_start
     ):
         keyword = "SELECT"
     section = COLUMNS_SECTIONS[keyword]
     self._columns_aliases_dict = self._columns_aliases_dict or dict()
     self._columns_aliases_dict.setdefault(section, UniqueList()).append(alias)
Ejemplo n.º 11
0
    def columns_dict(self) -> Dict[str, List[str]]:
        """
        Returns dictionary of column names divided into section of the query in which
        given column is present.

        Sections consist of: select, where, order_by, group_by, join, insert and update
        """
        if not self._columns_dict:
            _ = self.columns
        if self.columns_aliases_dict:
            for key, value in self.columns_aliases_dict.items():
                for alias in value:
                    resolved = self._resolve_column_alias(alias)
                    if isinstance(resolved, list):
                        for res_alias in resolved:
                            self._columns_dict.setdefault(key, UniqueList()).append(
                                res_alias
                            )
                    else:
                        self._columns_dict.setdefault(key, UniqueList()).append(
                            resolved
                        )
        return self._columns_dict
Ejemplo n.º 12
0
    def tables(self) -> List[str]:
        """
        Return the list of tables this query refers to
        """
        if self._tables is not None:
            return self._tables
        tables = UniqueList()
        with_names = self.with_names

        for token in self.tokens:
            if (
                (token.is_name or token.is_keyword)
                and token.last_keyword_normalized in TABLE_ADJUSTMENT_KEYWORDS
                and token.previous_token.normalized not in ["AS", "WITH"]
                and token.normalized not in ["AS", "SELECT"]
            ):
                # handle CREATE TABLE queries (#35)
                # skip keyword that are withing parenthesis-wrapped list of column
                if (
                    self.query_type == QueryType.CREATE
                    and token.is_in_parenthesis
                    and token.is_create_table_columns_definition
                ):
                    continue

                if token.next_token.is_dot:
                    pass  # part of the qualified name
                elif token.is_in_parenthesis and (
                    token.find_nearest_token("(").previous_token.value in with_names
                    or token.last_keyword_normalized == "INTO"
                ):
                    # we are in <columns> of INSERT INTO <TABLE> (<columns>)
                    # or columns of with statement: with (<columns>) as ...
                    pass
                elif token.previous_token.is_dot:
                    tables.append(token.left_expanded)  # full qualified name
                elif (
                    token.previous_token.normalized != token.last_keyword_normalized
                    and not token.previous_token.is_punctuation
                ) or token.previous_token.is_right_parenthesis:
                    # it's not a list of tables, e.g. SELECT * FROM foo, bar
                    # hence, it can be the case of alias without AS,
                    # e.g. SELECT * FROM foo bar
                    # or an alias of subquery (SELECT * FROM foo) bar
                    pass
                else:
                    table_name = str(token.value.strip("`"))
                    tables.append(table_name)

        self._tables = tables - with_names
        return self._tables
Ejemplo n.º 13
0
    def with_names(self) -> List[str]:
        """
        Returns with statements aliases list from a given query

        E.g. WITH database1.tableFromWith AS (SELECT * FROM table3)
             SELECT "xxxxx" FROM database1.tableFromWith alias
             LEFT JOIN database2.table2 ON ("tt"."ttt"."fff" = "xx"."xxx")
        will return ["database1.tableFromWith"]
        """
        if self._with_names is not None:
            return self._with_names
        with_names = UniqueList()
        for token in self.tokens:
            if token.previous_token.normalized == "WITH":
                self._is_in_with_block = True
                while self._is_in_with_block and token.next_token:
                    # name is first
                    if token.next_token.normalized == "AS":
                        if token.is_right_parenthesis:
                            # inside columns of with statement
                            # like: with (col1, col2) as (subquery)
                            token.is_with_columns_end = True
                            token.is_nested_function_end = False
                            start_token = token.find_nearest_token("(")
                            start_token.is_with_columns_start = True
                            start_token.is_nested_function_start = False
                            prev_token = start_token.previous_token
                            with_names.append(prev_token.left_expanded)
                        else:
                            with_names.append(token.left_expanded)
                        # move to next with if exists, this with ends with
                        #  ) + , if many withs or ) + select if one
                        # need to move to next as AS can be in
                        # sub-queries inside with definition
                        while token.next_token and not (
                            token.is_right_parenthesis
                            and (
                                token.next_token.is_punctuation
                                or token.next_token.normalized in WITH_ENDING_KEYWORDS
                            )
                        ):
                            token = token.next_token
                        if token.next_token.normalized in WITH_ENDING_KEYWORDS:
                            self._is_in_with_block = False
                    else:
                        token = token.next_token

        self._with_names = with_names
        return self._with_names
Ejemplo n.º 14
0
    def columns(self) -> List[str]:
        """
        Returns the list columns this query refers to
        """
        if self._columns is not None:
            return self._columns
        columns = UniqueList()

        for token in self._not_parsed_tokens:
            if token.is_name or token.is_keyword_column_name:
                if token.is_column_definition_inside_create_table(
                    query_type=self.query_type
                ):
                    token.token_type = TokenType.COLUMN
                    columns.append(token.value)
                elif (
                    token.is_potential_column_name
                    and token.is_not_an_alias_or_is_self_alias_outside_of_subquery(
                        columns_aliases_names=self.columns_aliases_names,
                        max_subquery_level=self._column_aliases_max_subquery_level,
                    )
                    and not token.is_sub_query_name_or_with_name_or_function_name(
                        sub_queries_names=self.subqueries_names,
                        with_names=self.with_names,
                    )
                    and not token.is_table_definition_suffix_in_non_select_create_table(
                        query_type=self.query_type
                    )
                    and not token.is_conversion_specifier
                ):
                    self._handle_column_save(token=token, columns=columns)

                elif token.is_column_name_inside_insert_clause:
                    column = str(token.value).strip("`")
                    self._add_to_columns_subsection(
                        keyword=token.last_keyword_normalized, column=column
                    )
                    token.token_type = TokenType.COLUMN
                    columns.append(column)
            elif token.is_a_wildcard_in_select_statement:
                self._handle_column_save(token=token, columns=columns)

        self._columns = columns
        return self._columns
Ejemplo n.º 15
0
    def columns(self) -> List[str]:
        """
        Returns the list columns this query refers to
        """
        if self._columns is not None:
            return self._columns
        columns = UniqueList()
        tables_aliases = self.tables_aliases
        subqueries_names = self.subqueries_names

        for token in self.tokens:
            # handle CREATE TABLE queries (#35)
            if token.is_name and self.query_type == QueryType.CREATE:
                # previous token is either ( or , -> indicates the column name
                if (
                    token.is_in_parenthesis
                    and token.previous_token.is_punctuation
                    and token.last_keyword_normalized == "TABLE"
                ):
                    columns.append(token.value)
                    continue

                # we're in CREATE TABLE query with the columns
                # ignore any annotations outside the parenthesis with the list of columns
                # e.g. ) CHARACTER SET utf8;
                if (
                    not token.is_in_parenthesis
                    and token.find_nearest_token("SELECT", value_attribute="normalized")
                    is EmptyToken
                ):
                    continue

            if (
                token.is_name and not token.next_token.is_dot
            ) or token.is_keyword_column_name:
                # analyze the name tokens, column names and where condition values
                if (
                    token.last_keyword_normalized in KEYWORDS_BEFORE_COLUMNS
                    and token.previous_token.normalized not in ["AS", ")"]
                    and not token.is_alias_without_as
                    and (
                        token.left_expanded not in self.columns_aliases_names
                        or token.token_is_alias_of_self_not_from_subquery(
                            aliases_levels=self._column_aliases_max_subquery_level
                        )
                    )
                ):

                    if (
                        not (
                            # aliases of sub-queries i.e.: SELECT from (...) <alias>
                            token.previous_token.is_right_parenthesis
                            and token.value in subqueries_names
                        )
                        # custom functions - they are followed by the parenthesis
                        # e.g. custom_func(...
                        and not token.next_token.is_left_parenthesis
                    ):
                        column = token.table_prefixed_column(tables_aliases)
                        column = self._resolve_subquery_alias_to_column(column)
                        self._add_to_columns_with_tables(token, column)
                        self._add_to_columns_subsection(
                            keyword=token.last_keyword_normalized, column=column
                        )
                        columns.extend(column)
                elif (
                    token.last_keyword_normalized == "INTO"
                    and token.previous_token.is_punctuation
                ):
                    # INSERT INTO `foo` (col1, `col2`) VALUES (..)
                    column = str(token.value).strip("`")
                    self._add_to_columns_subsection(
                        keyword=token.last_keyword_normalized, column=column
                    )
                    columns.append(column)
                elif token.left_expanded in self.columns_aliases_names:
                    self._add_to_columns_aliases_subsection(token=token)
            elif (
                token.is_wildcard
                and token.last_keyword_normalized == "SELECT"
                and not token.previous_token.is_left_parenthesis
            ):
                # handle * wildcard in select part, but ignore count(*)
                column = token.table_prefixed_column(tables_aliases)
                column = self._resolve_subquery_alias_to_column(column)
                self._add_to_columns_with_tables(token, column)
                self._add_to_columns_subsection(
                    keyword=token.last_keyword_normalized, column=column
                )
                columns.extend(column)

        self._columns = columns
        return self._columns