def _eval(self, context: RuleContext) -> Optional[LintResult]: """Identify aliases in from clause and join conditions. Find base table, table expressions in join, and other expressions in select clause and decide if it's needed to report them. """ # Config type hints self.force_enable: bool # Issue 2810: BigQuery has some tricky expectations (apparently not # documented, but subject to change, e.g.: # https://www.reddit.com/r/bigquery/comments/fgk31y/new_in_bigquery_no_more_backticks_around_table/) # about whether backticks are required (and whether the query is valid # or not, even with them), depending on whether the GCP project name is # present, or just the dataset name. Since SQLFluff doesn't have access # to BigQuery when it is looking at the query, it would be complex for # this rule to do the right thing. For now, the rule simply disables # itself. if (context.dialect.name in self._dialects_disabled_by_default and not self.force_enable): return LintResult() assert context.segment.is_type("select_statement") children = FunctionalContext(context).segment.children() from_clause_segment = children.select( sp.is_type("from_clause")).first() base_table = (from_clause_segment.children( sp.is_type("from_expression")).first().children( sp.is_type("from_expression_element")).first().children( sp.is_type("table_expression")).first().children( sp.is_type("object_reference")).first()) if not base_table: return None # A buffer for all table expressions in join conditions from_expression_elements = [] column_reference_segments = [] after_from_clause = children.select(start_seg=from_clause_segment[0]) for clause in from_clause_segment + after_from_clause: for from_expression_element in clause.recursive_crawl( "from_expression_element"): from_expression_elements.append(from_expression_element) for column_reference in clause.recursive_crawl("column_reference"): column_reference_segments.append(column_reference) return (self._lint_aliases_in_join( base_table[0] if base_table else None, from_expression_elements, column_reference_segments, context.segment, ) or None)
def _get_subsequent_whitespace( self, context, ) -> Tuple[Optional[BaseSegment], Optional[BaseSegment]]: """Search forwards through the raw segments for subsequent whitespace. Return a tuple of both the trailing whitespace segment and the first non-whitespace segment discovered. """ # Get all raw segments. "raw_segments" is appropriate as the # only segments we can care about are comma, whitespace, # newline, and comment, which are all raw. Using the # raw_segments allows us to account for possible unexpected # parse tree structures resulting from other rule fixes. raw_segments = FunctionalContext(context).raw_segments # Start after the current comma within the list. Get all the # following whitespace. following_segments = raw_segments.select( loop_while=sp.or_(sp.is_meta(), sp.is_type("whitespace")), start_seg=context.segment, ) subsequent_whitespace = following_segments.last( sp.is_type("whitespace")) try: return ( subsequent_whitespace[0] if subsequent_whitespace else None, raw_segments[raw_segments.index(context.segment) + len(following_segments) + 1], ) except IndexError: # If we find ourselves here it's all whitespace (or nothing) to the # end of the file. This can only happen in bigquery (see # test_pass_bigquery_trailing_comma). return subsequent_whitespace, None
def _eval(self, context: RuleContext) -> LintResult: """Nested CASE statement in ELSE clause could be flattened.""" segment = FunctionalContext(context).segment assert segment.select(sp.is_type("case_expression")) case1_children = segment.children() case1_last_when = case1_children.last(sp.is_type("when_clause")).get() case1_else_clause = case1_children.select(sp.is_type("else_clause")) case1_else_expressions = case1_else_clause.children( sp.is_type("expression")) expression_children = case1_else_expressions.children() case2 = expression_children.select(sp.is_type("case_expression")) # The len() checks below are for safety, to ensure the CASE inside # the ELSE is not part of a larger expression. In that case, it's # not safe to simplify in this way -- we'd be deleting other code. if (not case1_last_when or len(case1_else_expressions) > 1 or len(expression_children) > 1 or not case2): return LintResult() # We can assert that this exists because of the previous check. assert case1_last_when # We can also assert that we'll also have an else clause because # otherwise the case2 check above would fail. case1_else_clause_seg = case1_else_clause.get() assert case1_else_clause_seg # Delete stuff between the last "WHEN" clause and the "ELSE" clause. case1_to_delete = case1_children.select(start_seg=case1_last_when, stop_seg=case1_else_clause_seg) # Delete the nested "CASE" expression. fixes = case1_to_delete.apply(lambda seg: LintFix.delete(seg)) # Determine the indentation to use when we move the nested "WHEN" # and "ELSE" clauses, based on the indentation of case1_last_when. # If no whitespace segments found, use default indent. indent = (case1_children.select( stop_seg=case1_last_when).reversed().select( sp.is_type("whitespace"))) indent_str = "".join(seg.raw for seg in indent) if indent else self.indent # Move the nested "when" and "else" clauses after the last outer # "when". nested_clauses = case2.children( sp.is_type("when_clause", "else_clause")) create_after_last_when = nested_clauses.apply( lambda seg: [NewlineSegment(), WhitespaceSegment(indent_str), seg]) segments = [ item for sublist in create_after_last_when for item in sublist ] fixes.append( LintFix.create_after(case1_last_when, segments, source=segments)) # Delete the outer "else" clause. fixes.append(LintFix.delete(case1_else_clause_seg)) return LintResult(case2[0], fixes=fixes)
def _eval(self, context: RuleContext) -> Optional[LintResult]: """Looking for DISTINCT before a bracket. Look for DISTINCT keyword immediately followed by open parenthesis. """ # We trigger on `select_clause` and look for `select_clause_modifier` assert context.segment.is_type("select_clause") children = FunctionalContext(context).segment.children() modifier = children.select(sp.is_type("select_clause_modifier")) first_element = children.select( sp.is_type("select_clause_element")).first() if not modifier or not first_element: return None # is the first element only an expression with only brackets? expression = (first_element.children(sp.is_type("expression")).first() or first_element) bracketed = expression.children(sp.is_type("bracketed")).first() if bracketed: fixes = [] # If there's nothing else in the expression, remove the brackets. if len(expression[0].segments) == 1: # Remove the brackets and strip any meta segments. fixes.append( LintFix.replace( bracketed[0], self.filter_meta(bracketed[0].segments)[1:-1]), ) # If no whitespace between DISTINCT and expression, add it. if not children.select(sp.is_whitespace(), start_seg=modifier[0], stop_seg=first_element[0]): fixes.append( LintFix.create_before( first_element[0], [WhitespaceSegment()], )) # If no fixes, no problem. if fixes: return LintResult(anchor=modifier[0], fixes=fixes) return None
def _get_indexes(context: RuleContext): children = FunctionalContext(context).segment.children() select_targets = children.select(sp.is_type("select_clause_element")) first_select_target_idx = children.find(select_targets.get()) selects = children.select(sp.is_keyword("select")) select_idx = children.find(selects.get()) if selects else -1 newlines = children.select(sp.is_type("newline")) first_new_line_idx = children.find(newlines.get()) if newlines else -1 comment_after_select_idx = -1 if newlines: comment_after_select = children.select( sp.is_type("comment"), start_seg=selects.get(), stop_seg=newlines.get(), loop_while=sp.or_( sp.is_type("comment"), sp.is_type("whitespace"), sp.is_meta() ), ) if comment_after_select: comment_after_select_idx = ( children.find(comment_after_select.get()) if comment_after_select else -1 ) first_whitespace_idx = -1 if first_new_line_idx != -1: # TRICKY: Ignore whitespace prior to the first newline, e.g. if # the line with "SELECT" (before any select targets) has trailing # whitespace. segments_after_first_line = children.select( sp.is_type("whitespace"), start_seg=children[first_new_line_idx] ) first_whitespace_idx = children.find(segments_after_first_line.get()) siblings_post = FunctionalContext(context).siblings_post from_segment = siblings_post.first(sp.is_type("from_clause")).first().get() pre_from_whitespace = siblings_post.select( sp.is_type("whitespace"), stop_seg=from_segment ) return SelectTargetsInfo( select_idx, first_new_line_idx, first_select_target_idx, first_whitespace_idx, comment_after_select_idx, select_targets, from_segment, list(pre_from_whitespace), )
def _eval(self, context: RuleContext) -> Optional[LintResult]: """Unnecessary CASE statement.""" # Look for CASE expression. if context.segment.segments[0].raw_upper == "CASE": # Find all 'WHEN' clauses and the optional 'ELSE' clause. children = FunctionalContext(context).segment.children() when_clauses = children.select(sp.is_type("when_clause")) else_clauses = children.select(sp.is_type("else_clause")) # Can't fix if multiple WHEN clauses. if len(when_clauses) > 1: return None # Find condition and then expressions. condition_expression = when_clauses.children( sp.is_type("expression"))[0] then_expression = when_clauses.children( sp.is_type("expression"))[1] # Method 1: Check if THEN/ELSE expressions are both Boolean and can # therefore be reduced. if else_clauses: else_expression = else_clauses.children( sp.is_type("expression"))[0] upper_bools = ["TRUE", "FALSE"] if ((then_expression.raw_upper in upper_bools) and (else_expression.raw_upper in upper_bools) and (then_expression.raw_upper != else_expression.raw_upper)): coalesce_arg_1: BaseSegment = condition_expression coalesce_arg_2: BaseSegment = KeywordSegment("false") preceding_not = then_expression.raw_upper == "FALSE" fixes = self._coalesce_fix_list( context, coalesce_arg_1, coalesce_arg_2, preceding_not, ) return LintResult( anchor=condition_expression, fixes=fixes, description="Unnecessary CASE statement. " "Use COALESCE function instead.", ) # Method 2: Check if the condition expression is comparing a column # reference to NULL and whether that column reference is also in either the # THEN/ELSE expression. We can only apply this method when there is only # one condition in the condition expression. condition_expression_segments_raw = { segment.raw_upper for segment in condition_expression.segments } if {"IS", "NULL"}.issubset(condition_expression_segments_raw) and ( not condition_expression_segments_raw.intersection( {"AND", "OR"})): # Check if the comparison is to NULL or NOT NULL. is_not_prefix = "NOT" in condition_expression_segments_raw # Locate column reference in condition expression. column_reference_segment = ( Segments(condition_expression).children( sp.is_type("column_reference")).get()) # Return None if none found (this condition does not apply to functions) if not column_reference_segment: return None if else_clauses: else_expression = else_clauses.children( sp.is_type("expression"))[0] # Check if we can reduce the CASE expression to a single coalesce # function. if (not is_not_prefix and column_reference_segment.raw_upper == else_expression.raw_upper): coalesce_arg_1 = else_expression coalesce_arg_2 = then_expression elif (is_not_prefix and column_reference_segment.raw_upper == then_expression.raw_upper): coalesce_arg_1 = then_expression coalesce_arg_2 = else_expression else: return None if coalesce_arg_2.raw_upper == "NULL": # Can just specify the column on it's own # rather than using a COALESCE function. return LintResult( anchor=condition_expression, fixes=self._column_only_fix_list( context, column_reference_segment, ), description="Unnecessary CASE statement. " f"Just use column '{column_reference_segment.raw}'.", ) return LintResult( anchor=condition_expression, fixes=self._coalesce_fix_list( context, coalesce_arg_1, coalesce_arg_2, ), description="Unnecessary CASE statement. " "Use COALESCE function instead.", ) elif column_reference_segment.raw_upper == then_expression.raw_upper: # Can just specify the column on it's own # rather than using a COALESCE function. # In this case no ELSE statement is equivalent to ELSE NULL. return LintResult( anchor=condition_expression, fixes=self._column_only_fix_list( context, column_reference_segment, ), description="Unnecessary CASE statement. " f"Just use column '{column_reference_segment.raw}'.", ) return None
def _eval(self, context: RuleContext) -> Optional[LintResult]: """Select clause modifiers must appear on same line as SELECT.""" # We only care about select_clause. assert context.segment.is_type("select_clause") # Get children of select_clause and the corresponding select keyword. child_segments = FunctionalContext(context).segment.children() select_keyword = child_segments[0] # See if we have a select_clause_modifier. select_clause_modifier_seg = child_segments.first( sp.is_type("select_clause_modifier")) # Rule doesn't apply if there's no select clause modifier. if not select_clause_modifier_seg: return None select_clause_modifier = select_clause_modifier_seg[0] # Are there any newlines between the select keyword # and the select clause modifier. leading_newline_segments = child_segments.select( select_if=sp.is_type("newline"), loop_while=sp.or_(sp.is_whitespace(), sp.is_meta()), start_seg=select_keyword, ) # Rule doesn't apply if select clause modifier # is already on the same line as the select keyword. if not leading_newline_segments: return None # We should check if there is whitespace before the select clause modifier # and remove this during the lint fix. leading_whitespace_segments = child_segments.select( select_if=sp.is_type("whitespace"), loop_while=sp.or_(sp.is_whitespace(), sp.is_meta()), start_seg=select_keyword, ) # We should also check if the following select clause element # is on the same line as the select clause modifier. trailing_newline_segments = child_segments.select( select_if=sp.is_type("newline"), loop_while=sp.or_(sp.is_whitespace(), sp.is_meta()), start_seg=select_clause_modifier, ) # We will insert these segments directly after the select keyword. edit_segments = [ WhitespaceSegment(), select_clause_modifier, ] if not trailing_newline_segments: # if the first select clause element is on the same line # as the select clause modifier then also insert a newline. edit_segments.append(NewlineSegment()) fixes = [] # Move select clause modifier after select keyword. fixes.append( LintFix.create_after( anchor_segment=select_keyword, edit_segments=edit_segments, )) # Delete original newlines and whitespace between select keyword # and select clause modifier. # If there is not a newline after the select clause modifier then delete # newlines between the select keyword and select clause modifier. if not trailing_newline_segments: fixes.extend(LintFix.delete(s) for s in leading_newline_segments) # If there is a newline after the select clause modifier then delete both the # newlines and whitespace between the select keyword and select clause modifier. else: fixes.extend( LintFix.delete(s) for s in leading_newline_segments + leading_whitespace_segments) # Delete the original select clause modifier. fixes.append(LintFix.delete(select_clause_modifier)) # If there is whitespace (on the same line) after the select clause modifier # then also delete this. trailing_whitespace_segments = child_segments.select( select_if=sp.is_whitespace(), loop_while=sp.or_(sp.is_type("whitespace"), sp.is_meta()), start_seg=select_clause_modifier, ) if trailing_whitespace_segments: fixes.extend( (LintFix.delete(s) for s in trailing_whitespace_segments)) return LintResult( anchor=context.segment, fixes=fixes, )