def _eval(self, context: RuleContext) -> Optional[LintResult]: """Ambiguous use of DISTINCT in select statement with GROUP BY.""" segment = FunctionalContext(context).segment # We know it's a select_statement from the seeker crawler assert segment.all(sp.is_type("select_statement")) # Do we have a group by clause if segment.children(sp.is_type("groupby_clause")): # Do we have the "DISTINCT" keyword in the select clause distinct = (segment.children(sp.is_type("select_clause")).children( sp.is_type("select_clause_modifier")).children( sp.is_type("keyword")).select(sp.is_name("distinct"))) if distinct: return LintResult(anchor=distinct[0]) return None
def _eval(self, context: RuleContext) -> Optional[LintResult]: """Trailing commas within select clause.""" # Config type hints self.select_clause_trailing_comma: str segment = FunctionalContext(context).segment children = segment.children() # Iterate content to find last element last_content: BaseSegment = children.last(sp.is_code())[0] # What mode are we in? if self.select_clause_trailing_comma == "forbid": # Is it a comma? if last_content.is_type("comma"): return LintResult( anchor=last_content, fixes=[LintFix.delete(last_content)], description="Trailing comma in select statement forbidden", ) elif self.select_clause_trailing_comma == "require": if not last_content.is_type("comma"): new_comma = SymbolSegment(",", type="comma") return LintResult( anchor=last_content, fixes=[ LintFix.replace(last_content, [last_content, new_comma]) ], description="Trailing comma in select statement required", ) return None
def _eval(self, context: RuleContext) -> LintResult: """Function name not immediately followed by bracket. Look for Function Segment with anything other than the function name before brackets """ segment = FunctionalContext(context).segment # We only trigger on start_bracket (open parenthesis) assert segment.all(sp.is_type("function")) children = segment.children() function_name = children.first(sp.is_type("function_name"))[0] start_bracket = children.first(sp.is_type("bracketed"))[0] intermediate_segments = children.select(start_seg=function_name, stop_seg=start_bracket) if intermediate_segments: # It's only safe to fix if there is only whitespace # or newlines in the intervening section. if intermediate_segments.all(sp.is_type("whitespace", "newline")): return LintResult( anchor=intermediate_segments[0], fixes=[ LintFix.delete(seg) for seg in intermediate_segments ], ) else: # It's not all whitespace, just report the error. return LintResult(anchor=intermediate_segments[0], ) return LintResult()
def _eval(self, context: RuleContext) -> LintResult: """Nested CASE statement in ELSE clause could be flattened.""" segment = FunctionalContext(context).segment assert segment.select(sp.is_type("case_expression")) case1_children = segment.children() case1_last_when = case1_children.last(sp.is_type("when_clause")).get() case1_else_clause = case1_children.select(sp.is_type("else_clause")) case1_else_expressions = case1_else_clause.children( sp.is_type("expression")) expression_children = case1_else_expressions.children() case2 = expression_children.select(sp.is_type("case_expression")) # The len() checks below are for safety, to ensure the CASE inside # the ELSE is not part of a larger expression. In that case, it's # not safe to simplify in this way -- we'd be deleting other code. if (not case1_last_when or len(case1_else_expressions) > 1 or len(expression_children) > 1 or not case2): return LintResult() # We can assert that this exists because of the previous check. assert case1_last_when # We can also assert that we'll also have an else clause because # otherwise the case2 check above would fail. case1_else_clause_seg = case1_else_clause.get() assert case1_else_clause_seg # Delete stuff between the last "WHEN" clause and the "ELSE" clause. case1_to_delete = case1_children.select(start_seg=case1_last_when, stop_seg=case1_else_clause_seg) # Delete the nested "CASE" expression. fixes = case1_to_delete.apply(lambda seg: LintFix.delete(seg)) # Determine the indentation to use when we move the nested "WHEN" # and "ELSE" clauses, based on the indentation of case1_last_when. # If no whitespace segments found, use default indent. indent = (case1_children.select( stop_seg=case1_last_when).reversed().select( sp.is_type("whitespace"))) indent_str = "".join(seg.raw for seg in indent) if indent else self.indent # Move the nested "when" and "else" clauses after the last outer # "when". nested_clauses = case2.children( sp.is_type("when_clause", "else_clause")) create_after_last_when = nested_clauses.apply( lambda seg: [NewlineSegment(), WhitespaceSegment(indent_str), seg]) segments = [ item for sublist in create_after_last_when for item in sublist ] fixes.append( LintFix.create_after(case1_last_when, segments, source=segments)) # Delete the outer "else" clause. fixes.append(LintFix.delete(case1_else_clause_seg)) return LintResult(case2[0], fixes=fixes)
def _eval(self, context: RuleContext): self.wildcard_policy: str assert context.segment.is_type("select_clause") select_targets_info = self._get_indexes(context) select_clause = FunctionalContext(context).segment wildcards = select_clause.children( sp.is_type("select_clause_element") ).children(sp.is_type("wildcard_expression")) has_wildcard = bool(wildcards) if len(select_targets_info.select_targets) == 1 and ( not has_wildcard or self.wildcard_policy == "single" ): return self._eval_single_select_target_element( select_targets_info, context, ) elif len(select_targets_info.select_targets): return self._eval_multiple_select_target_elements( select_targets_info, context.segment )
def _eval(self, context: RuleContext) -> List[LintResult]: """Set operators should be surrounded by newlines. For any set operator we check if there is any NewLineSegment in the non-code segments preceeding or following it. In particular, as part of this rule we allow multiple NewLineSegments. """ segment = FunctionalContext(context).segment expression = segment.children() set_operator_segments = segment.children(sp.is_type(*self._target_elems)) # We should always find some as children because of the ParentOfSegmentCrawler assert set_operator_segments results: List[LintResult] = [] # If len(set_operator) == 0 this will essentially not run for set_operator in set_operator_segments: preceeding_code = ( expression.reversed().select(start_seg=set_operator).first(sp.is_code()) ) following_code = expression.select(start_seg=set_operator).first( sp.is_code() ) res = { "before": expression.select( start_seg=preceeding_code.get(), stop_seg=set_operator ), "after": expression.select( start_seg=set_operator, stop_seg=following_code.get() ), } newline_before_set_operator = res["before"].first(sp.is_type("newline")) newline_after_set_operator = res["after"].first(sp.is_type("newline")) # If there is a whitespace directly preceeding/following the set operator we # are replacing it with a newline later. preceeding_whitespace = res["before"].first(sp.is_type("whitespace")).get() following_whitespace = res["after"].first(sp.is_type("whitespace")).get() if newline_before_set_operator and newline_after_set_operator: continue elif not newline_before_set_operator and newline_after_set_operator: results.append( LintResult( anchor=set_operator, description=( "Set operators should be surrounded by newlines. " f"Missing newline before set operator {set_operator.raw}." ), fixes=_generate_fixes(whitespace_segment=preceeding_whitespace), ) ) elif newline_before_set_operator and not newline_after_set_operator: results.append( LintResult( anchor=set_operator, description=( "Set operators should be surrounded by newlines. " f"Missing newline after set operator {set_operator.raw}." ), fixes=_generate_fixes(whitespace_segment=following_whitespace), ) ) else: preceeding_whitespace_fixes = _generate_fixes( whitespace_segment=preceeding_whitespace ) following_whitespace_fixes = _generate_fixes( whitespace_segment=following_whitespace ) # make mypy happy assert isinstance(preceeding_whitespace_fixes, Iterable) assert isinstance(following_whitespace_fixes, Iterable) fixes = [] fixes.extend(preceeding_whitespace_fixes) fixes.extend(following_whitespace_fixes) results.append( LintResult( anchor=set_operator, description=( "Set operators should be surrounded by newlines. " "Missing newline before and after set operator " f"{set_operator.raw}." ), fixes=fixes, ) ) return results
def _eval(self, context: RuleContext) -> Optional[List[LintResult]]: """Relational operators should not be used to check for NULL values.""" # Context/motivation for this rule: # https://news.ycombinator.com/item?id=28772289 # https://stackoverflow.com/questions/9581745/sql-is-null-and-null if len(context.segment.segments) <= 2: return None # pragma: no cover # Allow assignments in SET clauses if context.parent_stack and context.parent_stack[-1].is_type( "set_clause_list", "execute_script_statement"): return None # Allow assignments in EXEC clauses if context.segment.is_type("set_clause_list", "execute_script_statement"): return None segment = FunctionalContext(context).segment # Iterate through children of this segment looking for equals or "not # equals". Once found, check if the next code segment is a NULL literal. children = segment.children() operators = segment.children(sp.raw_is("=", "!=", "<>")) if len(operators) == 0: return None self.logger.debug("Operators found: %s", operators) results: List[LintResult] = [] # We may have many operators for operator in operators: self.logger.debug("Children found: %s", children) after_op_list = children.select(start_seg=operator) # If nothing comes after operator then skip if not after_op_list: continue # pragma: no cover null_literal = after_op_list.first(sp.is_code()) # if the next bit of code isnt a NULL then we are good if not null_literal.all(sp.is_type("null_literal")): continue sub_seg = null_literal.get() assert sub_seg, "TypeGuard: Segment must exist" self.logger.debug( "Found NULL literal following equals/not equals @%s: %r", sub_seg.pos_marker, sub_seg.raw, ) edit = _create_base_is_null_sequence( is_upper=sub_seg.raw[0] == "N", operator_raw=operator.raw, ) prev_seg = after_op_list.first().get() next_seg = children.select(stop_seg=operator).last().get() if self._missing_whitespace(prev_seg, before=True): whitespace_segment: CorrectionListType = [WhitespaceSegment()] edit = whitespace_segment + edit if self._missing_whitespace(next_seg, before=False): edit = edit + [WhitespaceSegment()] res = LintResult( anchor=operator, fixes=[LintFix.replace( operator, edit, )], ) results.append(res) return results or None
def _eval_single_select_target_element( self, select_targets_info, context: RuleContext ): select_clause = FunctionalContext(context).segment parent_stack = context.parent_stack if ( select_targets_info.select_idx < select_targets_info.first_new_line_idx < select_targets_info.first_select_target_idx ): # Do we have a modifier? select_children = select_clause.children() modifier: Optional[Segments] modifier = select_children.first(sp.is_type("select_clause_modifier")) # Prepare the select clause which will be inserted insert_buff = [ WhitespaceSegment(), select_children[select_targets_info.first_select_target_idx], ] # Check if the modifier is one we care about if modifier: # If it's already on the first line, ignore it. if ( select_children.index(modifier.get()) < select_targets_info.first_new_line_idx ): modifier = None fixes = [ # Delete the first select target from its original location. # We'll add it to the right section at the end, once we know # what to add. LintFix.delete( select_children[select_targets_info.first_select_target_idx], ), ] # If we have a modifier to move: if modifier: # Add it to the insert insert_buff = [WhitespaceSegment(), modifier[0]] + insert_buff modifier_idx = select_children.index(modifier.get()) # Delete the whitespace after it (which is two after, thanks to indent) if ( len(select_children) > modifier_idx + 1 and select_children[modifier_idx + 2].is_whitespace ): fixes += [ LintFix.delete( select_children[modifier_idx + 2], ), ] # Delete the modifier itself fixes += [ LintFix.delete( modifier[0], ), ] # Set the position marker for removing the preceding # whitespace and newline, which we'll use below. start_idx = modifier_idx else: # Set the position marker for removing the preceding # whitespace and newline, which we'll use below. start_idx = select_targets_info.first_select_target_idx if parent_stack and parent_stack[-1].is_type("select_statement"): select_stmt = parent_stack[-1] select_clause_idx = select_stmt.segments.index(select_clause.get()) after_select_clause_idx = select_clause_idx + 1 if len(select_stmt.segments) > after_select_clause_idx: def _fixes_for_move_after_select_clause( stop_seg: BaseSegment, delete_segments: Optional[Segments] = None, add_newline: bool = True, ) -> List[LintFix]: """Cleans up by moving leftover select_clause segments. Context: Some of the other fixes we make in _eval_single_select_target_element() leave leftover child segments that need to be moved to become *siblings* of the select_clause. """ start_seg = ( modifier[0] if modifier else select_children[select_targets_info.first_new_line_idx] ) move_after_select_clause = select_children.select( start_seg=start_seg, stop_seg=stop_seg, ) # :TRICKY: Below, we have a couple places where we # filter to guard against deleting the same segment # multiple times -- this is illegal. # :TRICKY: Use IdentitySet rather than set() since # different segments may compare as equal. all_deletes = IdentitySet( fix.anchor for fix in fixes if fix.edit_type == "delete" ) fixes_ = [] for seg in delete_segments or []: if seg not in all_deletes: fixes.append(LintFix.delete(seg)) all_deletes.add(seg) fixes_ += [ LintFix.delete(seg) for seg in move_after_select_clause if seg not in all_deletes ] fixes_.append( LintFix.create_after( select_clause[0], ([NewlineSegment()] if add_newline else []) + list(move_after_select_clause), ) ) return fixes_ if select_stmt.segments[after_select_clause_idx].is_type("newline"): # Since we're deleting the newline, we should also delete all # whitespace before it or it will add random whitespace to # following statements. So walk back through the segment # deleting whitespace until you get the previous newline, or # something else. to_delete = select_children.reversed().select( loop_while=sp.is_type("whitespace"), start_seg=select_children[start_idx], ) if to_delete: # The select_clause is immediately followed by a # newline. Delete the newline in order to avoid leaving # behind an empty line after fix, *unless* we stopped # due to something other than a newline. delete_last_newline = select_children[ start_idx - len(to_delete) - 1 ].is_type("newline") # Delete the newline if we decided to. if delete_last_newline: fixes.append( LintFix.delete( select_stmt.segments[after_select_clause_idx], ) ) fixes += _fixes_for_move_after_select_clause( to_delete[-1], to_delete ) elif select_stmt.segments[after_select_clause_idx].is_type( "whitespace" ): # The select_clause has stuff after (most likely a comment) # Delete the whitespace immediately after the select clause # so the other stuff aligns nicely based on where the select # clause started. fixes += [ LintFix.delete( select_stmt.segments[after_select_clause_idx], ), ] fixes += _fixes_for_move_after_select_clause( select_children[ select_targets_info.first_select_target_idx ], ) elif select_stmt.segments[after_select_clause_idx].is_type( "dedent" ): # Again let's strip back the whitespace, but simpler # as don't need to worry about new line so just break # if see non-whitespace to_delete = select_children.reversed().select( loop_while=sp.is_type("whitespace"), start_seg=select_children[select_clause_idx - 1], ) if to_delete: fixes += _fixes_for_move_after_select_clause( to_delete[-1], to_delete, # If we deleted a newline, create a newline. any(seg for seg in to_delete if seg.is_type("newline")), ) else: fixes += _fixes_for_move_after_select_clause( select_children[ select_targets_info.first_select_target_idx ], ) if select_targets_info.comment_after_select_idx == -1: fixes += [ # Insert the select_clause in place of the first newline in the # Select statement LintFix.replace( select_children[select_targets_info.first_new_line_idx], insert_buff, ), ] else: # The SELECT is followed by a comment on the same line. In order # to autofix this, we'd need to move the select target between # SELECT and the comment and potentially delete the entire line # where the select target was (if it is now empty). This is # *fairly tricky and complex*, in part because the newline on # the select target's line is several levels higher in the # parser tree. Hence, we currently don't autofix this. Could be # autofixed in the future if/when we have the time. fixes = [] return LintResult( anchor=select_clause.get(), fixes=fixes, ) return None
def _eval(self, context: RuleContext) -> Optional[LintResult]: """Look for USING in a join clause.""" segment = FunctionalContext(context).segment parent_stack = FunctionalContext(context).parent_stack # We are not concerned with non join clauses assert context.segment.is_type("join_clause") using_anchor = segment.children(sp.is_keyword("using")).first() # If there is no evidence of a USING then we exit if len(using_anchor) == 0: return None anchor = using_anchor.get() description = "Found USING statement. Expected only ON statements." # All returns from here out will be some form of linting error. # we prepare the variable here unfixable_result = LintResult( anchor=anchor, description=description, ) tables_in_join = parent_stack.last().children( sp.is_type("join_clause", "from_expression_element") ) # We can only safely fix the first join clause if segment.get(0) != tables_in_join.get(1): return unfixable_result parent_select = parent_stack.last(sp.is_type("select_statement")).get() if not parent_select: # pragma: no cover return unfixable_result select_info = get_select_statement_info(parent_select, context.dialect) table_aliases = [ ta for ta in (select_info.table_aliases if select_info else []) if ta.ref_str ] if len(table_aliases) < 2: return unfixable_result to_delete, insert_after_anchor = _extract_deletion_sequence_and_anchor(segment) table_a, table_b = table_aliases[:2] edit_segments = [ KeywordSegment(raw="ON"), WhitespaceSegment(raw=" "), ] + _generate_join_conditions( table_a.ref_str, table_b.ref_str, _extract_cols_from_using(segment, using_anchor), ) assert table_a.segment assert table_b.segment fixes = [ LintFix.create_before( anchor_segment=insert_after_anchor, source=[table_a.segment, table_b.segment], edit_segments=edit_segments, ), *[LintFix.delete(seg) for seg in to_delete], ] return LintResult( anchor=anchor, description=description, fixes=fixes, )
def _eval(self, context: RuleContext) -> List[LintResult]: """Operators should follow a standard for being before/after newlines. We use the memory to keep track of whitespace up to now, and whether the last code segment was an operator or not. Anchor is our signal as to whether there's a problem. We only trigger if we have an operator FOLLOWED BY a newline before the next meaningful code segment. """ relevent_types = ["binary_operator", "comparison_operator"] segment = FunctionalContext(context).segment # bring var to this scope so as to only have one type ignore operator_new_lines: str = self.operator_new_lines # type: ignore expr = segment.children() operator_segments = segment.children(sp.is_type(*relevent_types)) results: List[LintResult] = [] # If len(operator_segments) == 0 this will essentially not run for operator in operator_segments: start = expr.reversed().select(start_seg=operator).first( sp.is_code()) end = expr.select(start_seg=operator).first(sp.is_code()) res = [ expr.select(start_seg=start.get(), stop_seg=operator), expr.select(start_seg=operator, stop_seg=end.get()), ] # anchor and change els are reversed in the before case if operator_new_lines == "before": res = [els.reversed() for els in reversed(res)] change_list, anchor_list = res # If the anchor side of the list has no newline # then everything is ok already if not anchor_list.any( sp.and_(sp.is_type("newline"), sp.not_( sp.is_templated()))): continue # If the operator is on a line by itself, that's okay regardless of # the 'operator_new_lines' setting. newline_after_operator = expr.select( sp.or_(sp.is_code(), sp.is_type("newline")), start_seg=operator).first(sp.is_type("newline")) newline_before_operator = (expr.reversed().select( sp.or_(sp.is_code(), sp.is_type("newline")), start_seg=operator).first(sp.is_type("newline"))) if newline_after_operator and newline_before_operator: continue insert_anchor = anchor_list.last().get() assert insert_anchor, "Insert Anchor must be present" lint_res = _generate_fixes( operator_new_lines, change_list, operator, insert_anchor, ) results.append(lint_res) return results