def _eval(self, segment, dialect, **kwargs): if segment.is_type("statement"): queries = SelectCrawler.gather(segment, dialect) # Begin analysis at the final, outer query (key=None). self._visit_sources(queries.pop(None), dialect, queries) if queries: return LintResult(anchor=segment) return None
def _visit_sources( cls, select_info_list: List[SelectCrawler], dialect: Dialect, queries: Dict[str, List[SelectCrawler]], ): for select_info in select_info_list: for source in SelectCrawler.crawl(select_info.select_statement, queries, dialect): if isinstance(source, list): cls._visit_sources(source, dialect, queries)
def _eval(self, segment, dialect, **kwargs): """Outermost query should produce known number of columns.""" if segment.is_type("statement"): queries = SelectCrawler.gather(segment, dialect) # Begin analysis at the final, outer query (key=None). select_info = queries[None] try: return self._analyze_result_columns(select_info, dialect, queries) except RuleFailure: return LintResult(anchor=queries[None][0].select_info.select_statement) return None
def _analyze_result_columns( self, select_info_list: List[SelectCrawler], dialect: Dialect, queries: Dict[str, List[SelectCrawler]], ): """Given info on a list of SELECTs, determine whether to warn.""" # Recursively walk from the given query (select_info_list) to any # wildcard columns in the select targets. If every wildcard evdentually # resolves to a query without wildcards, all is well. Otherwise, warn. for select_info in select_info_list: self.logger.debug( f"Analyzing query: {select_info.select_statement.raw}") for wildcard in select_info.get_wildcard_info(): if wildcard.tables: for wildcard_table in wildcard.tables: self.logger.debug( f"Wildcard: {wildcard.segment.raw} has target {wildcard_table}" ) # Is it an alias? alias_info = select_info.find_alias(wildcard_table) if alias_info: # Found the alias matching the wildcard. Recurse, # analyzing the query associated with that alias. self._handle_alias(alias_info, dialect, queries) else: # Not an alias. Is it a CTE? if wildcard_table in queries: # Wildcard refers to a CTE. Analyze it. self._analyze_result_columns( queries.pop(wildcard_table), dialect, queries) else: # Not CTE, not table alias. Presumably an # external table. Warn. self.logger.debug( f"Query target {wildcard_table} is external. Generating warning." ) raise RuleFailure() else: # No table was specified with the wildcard. Assume we're # querying from a nested select in FROM. select_info_target = SelectCrawler.get( select_info.select_statement, queries, dialect) assert isinstance(select_info_target, list) self._analyze_result_columns( select_info_target, dialect, queries, )
def _handle_alias(self, alias_info, dialect, queries): select_info_target = SelectCrawler.get( alias_info.from_expression_element, queries, dialect) if isinstance(select_info_target, str): # It's an alias to an external table whose # number of columns could vary without our # knowledge. Thus, warn. self.logger.debug( f"Query target {select_info_target} is external. Generating warning." ) raise RuleFailure() else: # Handle nested SELECT. self._analyze_result_columns(select_info_target, dialect, queries)
def _analyze_result_columns(self, query: Query): """Given info on a list of SELECTs, determine whether to warn.""" # Recursively walk from the given query (select_info_list) to any # wildcard columns in the select targets. If every wildcard evdentually # resolves to a query without wildcards, all is well. Otherwise, warn. if not query.selectables: return for selectable in query.selectables: self.logger.debug(f"Analyzing query: {selectable.selectable.raw}") for wildcard in selectable.get_wildcard_info(): if wildcard.tables: for wildcard_table in wildcard.tables: self.logger.debug( f"Wildcard: {wildcard.segment.raw} has target " "{wildcard_table}") # Is it an alias? alias_info = selectable.find_alias(wildcard_table) if alias_info: # Found the alias matching the wildcard. Recurse, # analyzing the query associated with that alias. self._handle_alias(selectable, alias_info, query) else: # Not an alias. Is it a CTE? cte = query.lookup_cte(wildcard_table) if cte: # Wildcard refers to a CTE. Analyze it. self._analyze_result_columns(cte) else: # Not CTE, not table alias. Presumably an # external table. Warn. self.logger.debug( f"Query target {wildcard_table} is external. " "Generating warning.") raise RuleFailure(selectable.selectable) else: # No table was specified with the wildcard. Assume we're # querying from a nested select in FROM. query_list = SelectCrawler.get( query, query.selectables[0].selectable) for o in query_list: if isinstance(o, Query): self._analyze_result_columns(o) return self.logger.debug( f'Query target "{query.selectables[0].selectable.raw}" has no ' "targets. Generating warning.") raise RuleFailure(query.selectables[0].selectable)
def _eval(self, context: RuleContext) -> Optional[LintResult]: """Outermost query should produce known number of columns.""" start_types = [ "select_statement", "set_expression", "with_compound_statement" ] if context.segment.is_type( *start_types) and not context.functional.parent_stack.any( sp.is_type(*start_types)): crawler = SelectCrawler(context.segment, context.dialect) # Begin analysis at the outer query. if crawler.query_tree: try: return self._analyze_result_columns(crawler.query_tree) except RuleFailure as e: return LintResult(anchor=e.anchor) return None
def _eval(self, context: RuleContext) -> EvalResultType: if context.segment.is_type("statement"): crawler = SelectCrawler(context.segment, context.dialect) if crawler.query_tree: # Begin analysis at the final, outer query (key=None). self._visit_sources(crawler.query_tree) if crawler.query_tree.ctes: return [ LintResult( anchor=query.cte_name_segment, description=f"Query defines CTE " f'"{query.cte_name_segment.raw}" ' f"but does not use it.", ) for query in crawler.query_tree.ctes.values() if query.cte_name_segment ] return None
def _eval(self, context: RuleContext) -> EvalResultType: violations: List[LintResult] = [] if context.segment.is_type("select_statement"): # Exit early if the SELECT does not define any aliases. select_info = get_select_statement_info(context.segment, context.dialect) if not select_info or not select_info.table_aliases: return None # Analyze the SELECT. crawler = SelectCrawler(context.segment, context.dialect, query_class=L025Query) query: L025Query = cast(L025Query, crawler.query_tree) self._analyze_table_aliases(query, context.dialect) alias: AliasInfo for alias in query.aliases: if alias.aliased and alias.ref_str not in query.tbl_refs: # Unused alias. Report and fix. violations.append(self._report_unused_alias(alias)) return violations or None
def _eval(self, context: RuleContext) -> EvalResultType: # Config type hints self.force_enable: bool if ( context.dialect.name in ["bigquery", "hive", "redshift", "spark3"] and not self.force_enable ): return LintResult() violations: List[LintResult] = [] start_types = ["select_statement", "delete_statement", "update_statement"] if context.segment.is_type( *start_types ) and not context.functional.parent_stack.any(sp.is_type(*start_types)): dml_target_table: Optional[Tuple[str, ...]] = None if not context.segment.is_type("select_statement"): # Extract first table reference. This will be the target # table in a DELETE or UPDATE statement. table_reference = next( context.segment.recursive_crawl("table_reference"), None ) if table_reference: dml_target_table = self._table_ref_as_tuple(table_reference) # Verify table references in any SELECT statements found in or # below context.segment in the parser tree. crawler = SelectCrawler( context.segment, context.dialect, query_class=L026Query ) query: L026Query = cast(L026Query, crawler.query_tree) if query: self._analyze_table_references( query, dml_target_table, context.dialect, violations ) return violations or None
def _visit_sources( cls, select_info_list: List[SelectCrawler], dialect: Dialect, queries: Dict[str, List[SelectCrawler]], ): for select_info in select_info_list: # Process nested SELECTs. for source in SelectCrawler.crawl(select_info.select_statement, queries, dialect): if isinstance(source, list): cls._visit_sources(source, dialect, queries) # Process the query's sources. for alias_info in select_info.select_info.table_aliases: # Does the query read from a CTE? If so, visit the CTE. for target_segment in alias_info.from_expression_element.get_children( "table_expression", "join_clause"): target = target_segment.raw if target in queries: select_info_target = queries.pop(target) if isinstance(select_info_target, list): cls._visit_sources(select_info_target, dialect, queries)