Esempio n. 1
0
    def _eval(self, segment, dialect, **kwargs):
        if segment.is_type("statement"):
            queries = SelectCrawler.gather(segment, dialect)

            # Begin analysis at the final, outer query (key=None).
            self._visit_sources(queries.pop(None), dialect, queries)
            if queries:
                return LintResult(anchor=segment)
        return None
Esempio n. 2
0
 def _visit_sources(
     cls,
     select_info_list: List[SelectCrawler],
     dialect: Dialect,
     queries: Dict[str, List[SelectCrawler]],
 ):
     for select_info in select_info_list:
         for source in SelectCrawler.crawl(select_info.select_statement,
                                           queries, dialect):
             if isinstance(source, list):
                 cls._visit_sources(source, dialect, queries)
Esempio n. 3
0
    def _eval(self, segment, dialect, **kwargs):
        """Outermost query should produce known number of columns."""
        if segment.is_type("statement"):
            queries = SelectCrawler.gather(segment, dialect)

            # Begin analysis at the final, outer query (key=None).
            select_info = queries[None]
            try:
                return self._analyze_result_columns(select_info, dialect, queries)
            except RuleFailure:
                return LintResult(anchor=queries[None][0].select_info.select_statement)
        return None
Esempio n. 4
0
 def _analyze_result_columns(
     self,
     select_info_list: List[SelectCrawler],
     dialect: Dialect,
     queries: Dict[str, List[SelectCrawler]],
 ):
     """Given info on a list of SELECTs, determine whether to warn."""
     # Recursively walk from the given query (select_info_list) to any
     # wildcard columns in the select targets. If every wildcard evdentually
     # resolves to a query without wildcards, all is well. Otherwise, warn.
     for select_info in select_info_list:
         self.logger.debug(
             f"Analyzing query: {select_info.select_statement.raw}")
         for wildcard in select_info.get_wildcard_info():
             if wildcard.tables:
                 for wildcard_table in wildcard.tables:
                     self.logger.debug(
                         f"Wildcard: {wildcard.segment.raw} has target {wildcard_table}"
                     )
                     # Is it an alias?
                     alias_info = select_info.find_alias(wildcard_table)
                     if alias_info:
                         # Found the alias matching the wildcard. Recurse,
                         # analyzing the query associated with that alias.
                         self._handle_alias(alias_info, dialect, queries)
                     else:
                         # Not an alias. Is it a CTE?
                         if wildcard_table in queries:
                             # Wildcard refers to a CTE. Analyze it.
                             self._analyze_result_columns(
                                 queries.pop(wildcard_table), dialect,
                                 queries)
                         else:
                             # Not CTE, not table alias. Presumably an
                             # external table. Warn.
                             self.logger.debug(
                                 f"Query target {wildcard_table} is external. Generating warning."
                             )
                             raise RuleFailure()
             else:
                 # No table was specified with the wildcard. Assume we're
                 # querying from a nested select in FROM.
                 select_info_target = SelectCrawler.get(
                     select_info.select_statement, queries, dialect)
                 assert isinstance(select_info_target, list)
                 self._analyze_result_columns(
                     select_info_target,
                     dialect,
                     queries,
                 )
Esempio n. 5
0
 def _handle_alias(self, alias_info, dialect, queries):
     select_info_target = SelectCrawler.get(
         alias_info.from_expression_element, queries, dialect)
     if isinstance(select_info_target, str):
         # It's an alias to an external table whose
         # number of columns could vary without our
         # knowledge. Thus, warn.
         self.logger.debug(
             f"Query target {select_info_target} is external. Generating warning."
         )
         raise RuleFailure()
     else:
         # Handle nested SELECT.
         self._analyze_result_columns(select_info_target, dialect, queries)
Esempio n. 6
0
 def _analyze_result_columns(self, query: Query):
     """Given info on a list of SELECTs, determine whether to warn."""
     # Recursively walk from the given query (select_info_list) to any
     # wildcard columns in the select targets. If every wildcard evdentually
     # resolves to a query without wildcards, all is well. Otherwise, warn.
     if not query.selectables:
         return
     for selectable in query.selectables:
         self.logger.debug(f"Analyzing query: {selectable.selectable.raw}")
         for wildcard in selectable.get_wildcard_info():
             if wildcard.tables:
                 for wildcard_table in wildcard.tables:
                     self.logger.debug(
                         f"Wildcard: {wildcard.segment.raw} has target "
                         "{wildcard_table}")
                     # Is it an alias?
                     alias_info = selectable.find_alias(wildcard_table)
                     if alias_info:
                         # Found the alias matching the wildcard. Recurse,
                         # analyzing the query associated with that alias.
                         self._handle_alias(selectable, alias_info, query)
                     else:
                         # Not an alias. Is it a CTE?
                         cte = query.lookup_cte(wildcard_table)
                         if cte:
                             # Wildcard refers to a CTE. Analyze it.
                             self._analyze_result_columns(cte)
                         else:
                             # Not CTE, not table alias. Presumably an
                             # external table. Warn.
                             self.logger.debug(
                                 f"Query target {wildcard_table} is external. "
                                 "Generating warning.")
                             raise RuleFailure(selectable.selectable)
             else:
                 # No table was specified with the wildcard. Assume we're
                 # querying from a nested select in FROM.
                 query_list = SelectCrawler.get(
                     query, query.selectables[0].selectable)
                 for o in query_list:
                     if isinstance(o, Query):
                         self._analyze_result_columns(o)
                         return
                 self.logger.debug(
                     f'Query target "{query.selectables[0].selectable.raw}" has no '
                     "targets. Generating warning.")
                 raise RuleFailure(query.selectables[0].selectable)
Esempio n. 7
0
    def _eval(self, context: RuleContext) -> Optional[LintResult]:
        """Outermost query should produce known number of columns."""
        start_types = [
            "select_statement", "set_expression", "with_compound_statement"
        ]
        if context.segment.is_type(
                *start_types) and not context.functional.parent_stack.any(
                    sp.is_type(*start_types)):
            crawler = SelectCrawler(context.segment, context.dialect)

            # Begin analysis at the outer query.
            if crawler.query_tree:
                try:
                    return self._analyze_result_columns(crawler.query_tree)
                except RuleFailure as e:
                    return LintResult(anchor=e.anchor)
        return None
Esempio n. 8
0
 def _eval(self, context: RuleContext) -> EvalResultType:
     if context.segment.is_type("statement"):
         crawler = SelectCrawler(context.segment, context.dialect)
         if crawler.query_tree:
             # Begin analysis at the final, outer query (key=None).
             self._visit_sources(crawler.query_tree)
             if crawler.query_tree.ctes:
                 return [
                     LintResult(
                         anchor=query.cte_name_segment,
                         description=f"Query defines CTE "
                         f'"{query.cte_name_segment.raw}" '
                         f"but does not use it.",
                     ) for query in crawler.query_tree.ctes.values()
                     if query.cte_name_segment
                 ]
     return None
Esempio n. 9
0
    def _eval(self, context: RuleContext) -> EvalResultType:
        violations: List[LintResult] = []
        if context.segment.is_type("select_statement"):
            # Exit early if the SELECT does not define any aliases.
            select_info = get_select_statement_info(context.segment,
                                                    context.dialect)
            if not select_info or not select_info.table_aliases:
                return None

            # Analyze the SELECT.
            crawler = SelectCrawler(context.segment,
                                    context.dialect,
                                    query_class=L025Query)
            query: L025Query = cast(L025Query, crawler.query_tree)
            self._analyze_table_aliases(query, context.dialect)

            alias: AliasInfo
            for alias in query.aliases:
                if alias.aliased and alias.ref_str not in query.tbl_refs:
                    # Unused alias. Report and fix.
                    violations.append(self._report_unused_alias(alias))
        return violations or None
Esempio n. 10
0
    def _eval(self, context: RuleContext) -> EvalResultType:
        # Config type hints
        self.force_enable: bool

        if (
            context.dialect.name in ["bigquery", "hive", "redshift", "spark3"]
            and not self.force_enable
        ):
            return LintResult()

        violations: List[LintResult] = []
        start_types = ["select_statement", "delete_statement", "update_statement"]
        if context.segment.is_type(
            *start_types
        ) and not context.functional.parent_stack.any(sp.is_type(*start_types)):
            dml_target_table: Optional[Tuple[str, ...]] = None
            if not context.segment.is_type("select_statement"):
                # Extract first table reference. This will be the target
                # table in a DELETE or UPDATE statement.
                table_reference = next(
                    context.segment.recursive_crawl("table_reference"), None
                )
                if table_reference:
                    dml_target_table = self._table_ref_as_tuple(table_reference)

            # Verify table references in any SELECT statements found in or
            # below context.segment in the parser tree.
            crawler = SelectCrawler(
                context.segment, context.dialect, query_class=L026Query
            )
            query: L026Query = cast(L026Query, crawler.query_tree)
            if query:
                self._analyze_table_references(
                    query, dml_target_table, context.dialect, violations
                )
        return violations or None
Esempio n. 11
0
    def _visit_sources(
        cls,
        select_info_list: List[SelectCrawler],
        dialect: Dialect,
        queries: Dict[str, List[SelectCrawler]],
    ):
        for select_info in select_info_list:
            # Process nested SELECTs.
            for source in SelectCrawler.crawl(select_info.select_statement,
                                              queries, dialect):
                if isinstance(source, list):
                    cls._visit_sources(source, dialect, queries)

            # Process the query's sources.
            for alias_info in select_info.select_info.table_aliases:
                # Does the query read from a CTE? If so, visit the CTE.
                for target_segment in alias_info.from_expression_element.get_children(
                        "table_expression", "join_clause"):
                    target = target_segment.raw
                    if target in queries:
                        select_info_target = queries.pop(target)
                        if isinstance(select_info_target, list):
                            cls._visit_sources(select_info_target, dialect,
                                               queries)