def columns_aliases_names(self) -> List[str]: """ Extract names of the column aliases used in query """ if self._columns_aliases_names is not None: return self._columns_aliases_names column_aliases_names = UniqueList() with_names = self.with_names subqueries_names = self.subqueries_names for token in self.tokens: if ( token.is_name or (token.is_keyword and token.previous_token.normalized == "AS") ) and not token.next_token.is_dot: if ( token.last_keyword_normalized in KEYWORDS_BEFORE_COLUMNS and token.normalized not in ["DIV"] and token.is_alias_definition or token.is_in_with_columns ) and token.value not in with_names + subqueries_names: alias = token.left_expanded column_aliases_names.append(alias) current_level = self._column_aliases_max_subquery_level.setdefault( alias, 0 ) if token.subquery_level > current_level: self._column_aliases_max_subquery_level[ alias ] = token.subquery_level self._columns_aliases_names = column_aliases_names return self._columns_aliases_names
def tables(self) -> List[str]: """ Return the list of tables this query refers to """ if self._tables is not None: return self._tables tables = UniqueList() with_names = self.with_names for token in self._not_parsed_tokens: if token.is_potential_table_name: if ( token.is_alias_of_table_or_alias_of_subquery or token.is_with_statement_nested_in_subquery or token.is_constraint_definition_inside_create_table_clause( query_type=self.query_type ) or token.is_columns_alias_of_with_query_or_column_in_insert_query( with_names=with_names ) ): continue table_name = str(token.value.strip("`")) token.token_type = TokenType.TABLE tables.append(table_name) self._tables = tables - with_names return self._tables
def columns_aliases_names(self) -> List[str]: """ Extract names of the column aliases used in query """ if self._columns_aliases_names is not None: return self._columns_aliases_names column_aliases_names = UniqueList() with_names = self.with_names subqueries_names = self.subqueries_names for token in self.tokens: if ( token.is_name or (token.is_keyword and token.previous_token.normalized == "AS") ) and not token.next_token.is_dot: if ( token.last_keyword_normalized in KEYWORDS_BEFORE_COLUMNS and ( token.previous_token.normalized in ["AS", ")"] or token.is_alias_without_as ) or token.is_in_with_columns ) and token.value not in with_names + subqueries_names: alias = token.left_expanded column_aliases_names.append(alias) self._columns_aliases_names = column_aliases_names return self._columns_aliases_names
def with_names(self) -> List[str]: """ Returns with statements aliases list from a given query E.g. WITH database1.tableFromWith AS (SELECT * FROM table3) SELECT "xxxxx" FROM database1.tableFromWith alias LEFT JOIN database2.table2 ON ("tt"."ttt"."fff" = "xx"."xxx") will return ["database1.tableFromWith"] """ if self._with_names is not None: return self._with_names with_names = UniqueList() for token in self._not_parsed_tokens: if token.previous_token.normalized == "WITH": self._is_in_with_block = True while self._is_in_with_block and token.next_token: if token.next_token.is_as_keyword: self._handle_with_name_save(token=token, with_names=with_names) while token.next_token and not token.is_with_query_end: token = token.next_token is_end_of_with_block = ( token.next_token_not_comment.normalized in WITH_ENDING_KEYWORDS ) if is_end_of_with_block: self._is_in_with_block = False else: token = token.next_token self._with_names = with_names return self._with_names
def _add_to_columns_subsection(self, keyword: str, column: str): """ Add columns to the section in which it appears in query """ section = COLUMNS_SECTIONS[keyword] self._columns_dict = self._columns_dict or dict() self._columns_dict.setdefault(section, UniqueList()).append(column)
def _find_all_columns_between_tokens( self, start_token: SQLToken, end_token: SQLToken ) -> Union[str, List[str]]: """ Returns a list of columns between two tokens """ loop_token = start_token aliases = UniqueList() while loop_token.next_token != end_token: if loop_token.next_token.left_expanded in self._aliases_to_check: alias_token = loop_token.next_token if ( alias_token.normalized != "*" or alias_token.is_wildcard_not_operator ): aliases.append(self._resolve_alias_to_column(alias_token)) loop_token = loop_token.next_token return aliases[0] if len(aliases) == 1 else aliases
def _add_to_columns_subsection(self, keyword: str, column: Union[str, List[str]]): """ Add columns to the section in which it appears in query """ section = COLUMNS_SECTIONS[keyword] self._columns_dict = self._columns_dict or dict() current_section = self._columns_dict.setdefault(section, UniqueList()) if isinstance(column, str): current_section.append(column) else: current_section.extend(column)
def subqueries_names(self) -> List[str]: """ Returns sub-queries aliases list from a given query e.g. SELECT COUNT(1) FROM (SELECT std.task_id FROM some_task_detail std WHERE std.STATUS = 1) a JOIN (SELECT st.task_id FROM some_task st WHERE task_type_id = 80) b ON a.task_id = b.task_id; will return ["a", "b"] """ if self._subqueries_names is not None: return self._subqueries_names subqueries_names = UniqueList() for token in self.tokens: if (token.previous_token.is_subquery_end and token.normalized != "AS") or ( token.previous_token.normalized == "AS" and token.get_nth_previous(2).is_subquery_end ): subqueries_names.append(str(token)) self._subqueries_names = subqueries_names return self._subqueries_names
def columns_aliases_names(self) -> List[str]: """ Extract names of the column aliases used in query """ if self._columns_aliases_names is not None: return self._columns_aliases_names column_aliases_names = UniqueList() with_names = self.with_names subqueries_names = self.subqueries_names for token in self._not_parsed_tokens: if token.is_potential_alias: if token.value in column_aliases_names: self._handle_column_alias_subquery_level_update(token=token) elif ( token.is_a_valid_alias and token.value not in with_names + subqueries_names ): column_aliases_names.append(token.value) self._handle_column_alias_subquery_level_update(token=token) self._columns_aliases_names = column_aliases_names return self._columns_aliases_names
def _add_to_columns_aliases_subsection(self, token: SQLToken): """ Add alias to the section in which it appears in query """ keyword = token.last_keyword_normalized alias = token.left_expanded if ( token.last_keyword_normalized in ["FROM", "WITH"] and token.find_nearest_token("(").is_with_columns_start ): keyword = "SELECT" section = COLUMNS_SECTIONS[keyword] self._columns_aliases_dict = self._columns_aliases_dict or dict() self._columns_aliases_dict.setdefault(section, UniqueList()).append(alias)
def columns_dict(self) -> Dict[str, List[str]]: """ Returns dictionary of column names divided into section of the query in which given column is present. Sections consist of: select, where, order_by, group_by, join, insert and update """ if not self._columns_dict: _ = self.columns if self.columns_aliases_dict: for key, value in self.columns_aliases_dict.items(): for alias in value: resolved = self._resolve_column_alias(alias) if isinstance(resolved, list): for res_alias in resolved: self._columns_dict.setdefault(key, UniqueList()).append( res_alias ) else: self._columns_dict.setdefault(key, UniqueList()).append( resolved ) return self._columns_dict
def tables(self) -> List[str]: """ Return the list of tables this query refers to """ if self._tables is not None: return self._tables tables = UniqueList() with_names = self.with_names for token in self.tokens: if ( (token.is_name or token.is_keyword) and token.last_keyword_normalized in TABLE_ADJUSTMENT_KEYWORDS and token.previous_token.normalized not in ["AS", "WITH"] and token.normalized not in ["AS", "SELECT"] ): # handle CREATE TABLE queries (#35) # skip keyword that are withing parenthesis-wrapped list of column if ( self.query_type == QueryType.CREATE and token.is_in_parenthesis and token.is_create_table_columns_definition ): continue if token.next_token.is_dot: pass # part of the qualified name elif token.is_in_parenthesis and ( token.find_nearest_token("(").previous_token.value in with_names or token.last_keyword_normalized == "INTO" ): # we are in <columns> of INSERT INTO <TABLE> (<columns>) # or columns of with statement: with (<columns>) as ... pass elif token.previous_token.is_dot: tables.append(token.left_expanded) # full qualified name elif ( token.previous_token.normalized != token.last_keyword_normalized and not token.previous_token.is_punctuation ) or token.previous_token.is_right_parenthesis: # it's not a list of tables, e.g. SELECT * FROM foo, bar # hence, it can be the case of alias without AS, # e.g. SELECT * FROM foo bar # or an alias of subquery (SELECT * FROM foo) bar pass else: table_name = str(token.value.strip("`")) tables.append(table_name) self._tables = tables - with_names return self._tables
def with_names(self) -> List[str]: """ Returns with statements aliases list from a given query E.g. WITH database1.tableFromWith AS (SELECT * FROM table3) SELECT "xxxxx" FROM database1.tableFromWith alias LEFT JOIN database2.table2 ON ("tt"."ttt"."fff" = "xx"."xxx") will return ["database1.tableFromWith"] """ if self._with_names is not None: return self._with_names with_names = UniqueList() for token in self.tokens: if token.previous_token.normalized == "WITH": self._is_in_with_block = True while self._is_in_with_block and token.next_token: # name is first if token.next_token.normalized == "AS": if token.is_right_parenthesis: # inside columns of with statement # like: with (col1, col2) as (subquery) token.is_with_columns_end = True token.is_nested_function_end = False start_token = token.find_nearest_token("(") start_token.is_with_columns_start = True start_token.is_nested_function_start = False prev_token = start_token.previous_token with_names.append(prev_token.left_expanded) else: with_names.append(token.left_expanded) # move to next with if exists, this with ends with # ) + , if many withs or ) + select if one # need to move to next as AS can be in # sub-queries inside with definition while token.next_token and not ( token.is_right_parenthesis and ( token.next_token.is_punctuation or token.next_token.normalized in WITH_ENDING_KEYWORDS ) ): token = token.next_token if token.next_token.normalized in WITH_ENDING_KEYWORDS: self._is_in_with_block = False else: token = token.next_token self._with_names = with_names return self._with_names
def columns(self) -> List[str]: """ Returns the list columns this query refers to """ if self._columns is not None: return self._columns columns = UniqueList() for token in self._not_parsed_tokens: if token.is_name or token.is_keyword_column_name: if token.is_column_definition_inside_create_table( query_type=self.query_type ): token.token_type = TokenType.COLUMN columns.append(token.value) elif ( token.is_potential_column_name and token.is_not_an_alias_or_is_self_alias_outside_of_subquery( columns_aliases_names=self.columns_aliases_names, max_subquery_level=self._column_aliases_max_subquery_level, ) and not token.is_sub_query_name_or_with_name_or_function_name( sub_queries_names=self.subqueries_names, with_names=self.with_names, ) and not token.is_table_definition_suffix_in_non_select_create_table( query_type=self.query_type ) and not token.is_conversion_specifier ): self._handle_column_save(token=token, columns=columns) elif token.is_column_name_inside_insert_clause: column = str(token.value).strip("`") self._add_to_columns_subsection( keyword=token.last_keyword_normalized, column=column ) token.token_type = TokenType.COLUMN columns.append(column) elif token.is_a_wildcard_in_select_statement: self._handle_column_save(token=token, columns=columns) self._columns = columns return self._columns
def columns(self) -> List[str]: """ Returns the list columns this query refers to """ if self._columns is not None: return self._columns columns = UniqueList() tables_aliases = self.tables_aliases subqueries_names = self.subqueries_names for token in self.tokens: # handle CREATE TABLE queries (#35) if token.is_name and self.query_type == QueryType.CREATE: # previous token is either ( or , -> indicates the column name if ( token.is_in_parenthesis and token.previous_token.is_punctuation and token.last_keyword_normalized == "TABLE" ): columns.append(token.value) continue # we're in CREATE TABLE query with the columns # ignore any annotations outside the parenthesis with the list of columns # e.g. ) CHARACTER SET utf8; if ( not token.is_in_parenthesis and token.find_nearest_token("SELECT", value_attribute="normalized") is EmptyToken ): continue if ( token.is_name and not token.next_token.is_dot ) or token.is_keyword_column_name: # analyze the name tokens, column names and where condition values if ( token.last_keyword_normalized in KEYWORDS_BEFORE_COLUMNS and token.previous_token.normalized not in ["AS", ")"] and not token.is_alias_without_as and ( token.left_expanded not in self.columns_aliases_names or token.token_is_alias_of_self_not_from_subquery( aliases_levels=self._column_aliases_max_subquery_level ) ) ): if ( not ( # aliases of sub-queries i.e.: SELECT from (...) <alias> token.previous_token.is_right_parenthesis and token.value in subqueries_names ) # custom functions - they are followed by the parenthesis # e.g. custom_func(... and not token.next_token.is_left_parenthesis ): column = token.table_prefixed_column(tables_aliases) column = self._resolve_subquery_alias_to_column(column) self._add_to_columns_with_tables(token, column) self._add_to_columns_subsection( keyword=token.last_keyword_normalized, column=column ) columns.extend(column) elif ( token.last_keyword_normalized == "INTO" and token.previous_token.is_punctuation ): # INSERT INTO `foo` (col1, `col2`) VALUES (..) column = str(token.value).strip("`") self._add_to_columns_subsection( keyword=token.last_keyword_normalized, column=column ) columns.append(column) elif token.left_expanded in self.columns_aliases_names: self._add_to_columns_aliases_subsection(token=token) elif ( token.is_wildcard and token.last_keyword_normalized == "SELECT" and not token.previous_token.is_left_parenthesis ): # handle * wildcard in select part, but ignore count(*) column = token.table_prefixed_column(tables_aliases) column = self._resolve_subquery_alias_to_column(column) self._add_to_columns_with_tables(token, column) self._add_to_columns_subsection( keyword=token.last_keyword_normalized, column=column ) columns.extend(column) self._columns = columns return self._columns