Example #1
0
 def _eval(self, context: RuleContext) -> Optional[LintResult]:
     """Ambiguous use of DISTINCT in select statement with GROUP BY."""
     segment = FunctionalContext(context).segment
     # We know it's a select_statement from the seeker crawler
     assert segment.all(sp.is_type("select_statement"))
     # Do we have a group by clause
     if segment.children(sp.is_type("groupby_clause")):
         # Do we have the "DISTINCT" keyword in the select clause
         distinct = (segment.children(sp.is_type("select_clause")).children(
             sp.is_type("select_clause_modifier")).children(
                 sp.is_type("keyword")).select(sp.is_name("distinct")))
         if distinct:
             return LintResult(anchor=distinct[0])
     return None
Example #2
0
    def _eval(self, context: RuleContext) -> Optional[LintResult]:
        """Trailing commas within select clause."""
        # Config type hints
        self.select_clause_trailing_comma: str

        segment = FunctionalContext(context).segment
        children = segment.children()
        # Iterate content to find last element
        last_content: BaseSegment = children.last(sp.is_code())[0]

        # What mode are we in?
        if self.select_clause_trailing_comma == "forbid":
            # Is it a comma?
            if last_content.is_type("comma"):
                return LintResult(
                    anchor=last_content,
                    fixes=[LintFix.delete(last_content)],
                    description="Trailing comma in select statement forbidden",
                )
        elif self.select_clause_trailing_comma == "require":
            if not last_content.is_type("comma"):
                new_comma = SymbolSegment(",", type="comma")
                return LintResult(
                    anchor=last_content,
                    fixes=[
                        LintFix.replace(last_content,
                                        [last_content, new_comma])
                    ],
                    description="Trailing comma in select statement required",
                )
        return None
Example #3
0
    def _eval(self, context: RuleContext) -> LintResult:
        """Function name not immediately followed by bracket.

        Look for Function Segment with anything other than the
        function name before brackets
        """
        segment = FunctionalContext(context).segment
        # We only trigger on start_bracket (open parenthesis)
        assert segment.all(sp.is_type("function"))
        children = segment.children()

        function_name = children.first(sp.is_type("function_name"))[0]
        start_bracket = children.first(sp.is_type("bracketed"))[0]

        intermediate_segments = children.select(start_seg=function_name,
                                                stop_seg=start_bracket)
        if intermediate_segments:
            # It's only safe to fix if there is only whitespace
            # or newlines in the intervening section.
            if intermediate_segments.all(sp.is_type("whitespace", "newline")):
                return LintResult(
                    anchor=intermediate_segments[0],
                    fixes=[
                        LintFix.delete(seg) for seg in intermediate_segments
                    ],
                )
            else:
                # It's not all whitespace, just report the error.
                return LintResult(anchor=intermediate_segments[0], )
        return LintResult()
Example #4
0
    def _eval(self, context: RuleContext) -> LintResult:
        """Nested CASE statement in ELSE clause could be flattened."""
        segment = FunctionalContext(context).segment
        assert segment.select(sp.is_type("case_expression"))
        case1_children = segment.children()
        case1_last_when = case1_children.last(sp.is_type("when_clause")).get()
        case1_else_clause = case1_children.select(sp.is_type("else_clause"))
        case1_else_expressions = case1_else_clause.children(
            sp.is_type("expression"))
        expression_children = case1_else_expressions.children()
        case2 = expression_children.select(sp.is_type("case_expression"))
        # The len() checks below are for safety, to ensure the CASE inside
        # the ELSE is not part of a larger expression. In that case, it's
        # not safe to simplify in this way -- we'd be deleting other code.
        if (not case1_last_when or len(case1_else_expressions) > 1
                or len(expression_children) > 1 or not case2):
            return LintResult()

        # We can assert that this exists because of the previous check.
        assert case1_last_when
        # We can also assert that we'll also have an else clause because
        # otherwise the case2 check above would fail.
        case1_else_clause_seg = case1_else_clause.get()
        assert case1_else_clause_seg

        # Delete stuff between the last "WHEN" clause and the "ELSE" clause.
        case1_to_delete = case1_children.select(start_seg=case1_last_when,
                                                stop_seg=case1_else_clause_seg)

        # Delete the nested "CASE" expression.
        fixes = case1_to_delete.apply(lambda seg: LintFix.delete(seg))

        # Determine the indentation to use when we move the nested "WHEN"
        # and "ELSE" clauses, based on the indentation of case1_last_when.
        # If no whitespace segments found, use default indent.
        indent = (case1_children.select(
            stop_seg=case1_last_when).reversed().select(
                sp.is_type("whitespace")))
        indent_str = "".join(seg.raw
                             for seg in indent) if indent else self.indent

        # Move the nested "when" and "else" clauses after the last outer
        # "when".
        nested_clauses = case2.children(
            sp.is_type("when_clause", "else_clause"))
        create_after_last_when = nested_clauses.apply(
            lambda seg: [NewlineSegment(),
                         WhitespaceSegment(indent_str), seg])
        segments = [
            item for sublist in create_after_last_when for item in sublist
        ]
        fixes.append(
            LintFix.create_after(case1_last_when, segments, source=segments))

        # Delete the outer "else" clause.
        fixes.append(LintFix.delete(case1_else_clause_seg))
        return LintResult(case2[0], fixes=fixes)
Example #5
0
 def _eval(self, context: RuleContext):
     self.wildcard_policy: str
     assert context.segment.is_type("select_clause")
     select_targets_info = self._get_indexes(context)
     select_clause = FunctionalContext(context).segment
     wildcards = select_clause.children(
         sp.is_type("select_clause_element")
     ).children(sp.is_type("wildcard_expression"))
     has_wildcard = bool(wildcards)
     if len(select_targets_info.select_targets) == 1 and (
         not has_wildcard or self.wildcard_policy == "single"
     ):
         return self._eval_single_select_target_element(
             select_targets_info,
             context,
         )
     elif len(select_targets_info.select_targets):
         return self._eval_multiple_select_target_elements(
             select_targets_info, context.segment
         )
Example #6
0
    def _eval(self, context: RuleContext) -> List[LintResult]:
        """Set operators should be surrounded by newlines.

        For any set operator we check if there is any NewLineSegment in the non-code
        segments preceeding or following it.

        In particular, as part of this rule we allow multiple NewLineSegments.
        """
        segment = FunctionalContext(context).segment

        expression = segment.children()
        set_operator_segments = segment.children(sp.is_type(*self._target_elems))
        # We should always find some as children because of the ParentOfSegmentCrawler
        assert set_operator_segments
        results: List[LintResult] = []

        # If len(set_operator) == 0 this will essentially not run
        for set_operator in set_operator_segments:
            preceeding_code = (
                expression.reversed().select(start_seg=set_operator).first(sp.is_code())
            )
            following_code = expression.select(start_seg=set_operator).first(
                sp.is_code()
            )
            res = {
                "before": expression.select(
                    start_seg=preceeding_code.get(), stop_seg=set_operator
                ),
                "after": expression.select(
                    start_seg=set_operator, stop_seg=following_code.get()
                ),
            }

            newline_before_set_operator = res["before"].first(sp.is_type("newline"))
            newline_after_set_operator = res["after"].first(sp.is_type("newline"))

            # If there is a whitespace directly preceeding/following the set operator we
            # are replacing it with a newline later.
            preceeding_whitespace = res["before"].first(sp.is_type("whitespace")).get()
            following_whitespace = res["after"].first(sp.is_type("whitespace")).get()

            if newline_before_set_operator and newline_after_set_operator:
                continue
            elif not newline_before_set_operator and newline_after_set_operator:
                results.append(
                    LintResult(
                        anchor=set_operator,
                        description=(
                            "Set operators should be surrounded by newlines. "
                            f"Missing newline before set operator {set_operator.raw}."
                        ),
                        fixes=_generate_fixes(whitespace_segment=preceeding_whitespace),
                    )
                )
            elif newline_before_set_operator and not newline_after_set_operator:
                results.append(
                    LintResult(
                        anchor=set_operator,
                        description=(
                            "Set operators should be surrounded by newlines. "
                            f"Missing newline after set operator {set_operator.raw}."
                        ),
                        fixes=_generate_fixes(whitespace_segment=following_whitespace),
                    )
                )
            else:
                preceeding_whitespace_fixes = _generate_fixes(
                    whitespace_segment=preceeding_whitespace
                )
                following_whitespace_fixes = _generate_fixes(
                    whitespace_segment=following_whitespace
                )

                # make mypy happy
                assert isinstance(preceeding_whitespace_fixes, Iterable)
                assert isinstance(following_whitespace_fixes, Iterable)

                fixes = []
                fixes.extend(preceeding_whitespace_fixes)
                fixes.extend(following_whitespace_fixes)

                results.append(
                    LintResult(
                        anchor=set_operator,
                        description=(
                            "Set operators should be surrounded by newlines. "
                            "Missing newline before and after set operator "
                            f"{set_operator.raw}."
                        ),
                        fixes=fixes,
                    )
                )

        return results
Example #7
0
    def _eval(self, context: RuleContext) -> Optional[List[LintResult]]:
        """Relational operators should not be used to check for NULL values."""
        # Context/motivation for this rule:
        # https://news.ycombinator.com/item?id=28772289
        # https://stackoverflow.com/questions/9581745/sql-is-null-and-null
        if len(context.segment.segments) <= 2:
            return None  # pragma: no cover

        # Allow assignments in SET clauses
        if context.parent_stack and context.parent_stack[-1].is_type(
                "set_clause_list", "execute_script_statement"):
            return None

        # Allow assignments in EXEC clauses
        if context.segment.is_type("set_clause_list",
                                   "execute_script_statement"):
            return None

        segment = FunctionalContext(context).segment
        # Iterate through children of this segment looking for equals or "not
        # equals". Once found, check if the next code segment is a NULL literal.

        children = segment.children()
        operators = segment.children(sp.raw_is("=", "!=", "<>"))
        if len(operators) == 0:
            return None
        self.logger.debug("Operators found: %s", operators)

        results: List[LintResult] = []
        # We may have many operators
        for operator in operators:
            self.logger.debug("Children found: %s", children)
            after_op_list = children.select(start_seg=operator)
            # If nothing comes after operator then skip
            if not after_op_list:
                continue  # pragma: no cover
            null_literal = after_op_list.first(sp.is_code())
            # if the next bit of code isnt a NULL then we are good
            if not null_literal.all(sp.is_type("null_literal")):
                continue

            sub_seg = null_literal.get()
            assert sub_seg, "TypeGuard: Segment must exist"
            self.logger.debug(
                "Found NULL literal following equals/not equals @%s: %r",
                sub_seg.pos_marker,
                sub_seg.raw,
            )
            edit = _create_base_is_null_sequence(
                is_upper=sub_seg.raw[0] == "N",
                operator_raw=operator.raw,
            )
            prev_seg = after_op_list.first().get()
            next_seg = children.select(stop_seg=operator).last().get()
            if self._missing_whitespace(prev_seg, before=True):
                whitespace_segment: CorrectionListType = [WhitespaceSegment()]
                edit = whitespace_segment + edit
            if self._missing_whitespace(next_seg, before=False):
                edit = edit + [WhitespaceSegment()]
            res = LintResult(
                anchor=operator,
                fixes=[LintFix.replace(
                    operator,
                    edit,
                )],
            )
            results.append(res)

        return results or None
Example #8
0
    def _eval_single_select_target_element(
        self, select_targets_info, context: RuleContext
    ):
        select_clause = FunctionalContext(context).segment
        parent_stack = context.parent_stack

        if (
            select_targets_info.select_idx
            < select_targets_info.first_new_line_idx
            < select_targets_info.first_select_target_idx
        ):
            # Do we have a modifier?
            select_children = select_clause.children()
            modifier: Optional[Segments]
            modifier = select_children.first(sp.is_type("select_clause_modifier"))

            # Prepare the select clause which will be inserted
            insert_buff = [
                WhitespaceSegment(),
                select_children[select_targets_info.first_select_target_idx],
            ]

            # Check if the modifier is one we care about
            if modifier:
                # If it's already on the first line, ignore it.
                if (
                    select_children.index(modifier.get())
                    < select_targets_info.first_new_line_idx
                ):
                    modifier = None
            fixes = [
                # Delete the first select target from its original location.
                # We'll add it to the right section at the end, once we know
                # what to add.
                LintFix.delete(
                    select_children[select_targets_info.first_select_target_idx],
                ),
            ]

            # If we have a modifier to move:
            if modifier:

                # Add it to the insert
                insert_buff = [WhitespaceSegment(), modifier[0]] + insert_buff

                modifier_idx = select_children.index(modifier.get())
                # Delete the whitespace after it (which is two after, thanks to indent)
                if (
                    len(select_children) > modifier_idx + 1
                    and select_children[modifier_idx + 2].is_whitespace
                ):
                    fixes += [
                        LintFix.delete(
                            select_children[modifier_idx + 2],
                        ),
                    ]

                # Delete the modifier itself
                fixes += [
                    LintFix.delete(
                        modifier[0],
                    ),
                ]

                # Set the position marker for removing the preceding
                # whitespace and newline, which we'll use below.
                start_idx = modifier_idx
            else:
                # Set the position marker for removing the preceding
                # whitespace and newline, which we'll use below.
                start_idx = select_targets_info.first_select_target_idx

            if parent_stack and parent_stack[-1].is_type("select_statement"):
                select_stmt = parent_stack[-1]
                select_clause_idx = select_stmt.segments.index(select_clause.get())
                after_select_clause_idx = select_clause_idx + 1
                if len(select_stmt.segments) > after_select_clause_idx:

                    def _fixes_for_move_after_select_clause(
                        stop_seg: BaseSegment,
                        delete_segments: Optional[Segments] = None,
                        add_newline: bool = True,
                    ) -> List[LintFix]:
                        """Cleans up by moving leftover select_clause segments.

                        Context: Some of the other fixes we make in
                        _eval_single_select_target_element() leave leftover
                        child segments that need to be moved to become
                        *siblings* of the select_clause.
                        """
                        start_seg = (
                            modifier[0]
                            if modifier
                            else select_children[select_targets_info.first_new_line_idx]
                        )
                        move_after_select_clause = select_children.select(
                            start_seg=start_seg,
                            stop_seg=stop_seg,
                        )
                        # :TRICKY: Below, we have a couple places where we
                        # filter to guard against deleting the same segment
                        # multiple times -- this is illegal.
                        # :TRICKY: Use IdentitySet rather than set() since
                        # different segments may compare as equal.
                        all_deletes = IdentitySet(
                            fix.anchor for fix in fixes if fix.edit_type == "delete"
                        )
                        fixes_ = []
                        for seg in delete_segments or []:
                            if seg not in all_deletes:
                                fixes.append(LintFix.delete(seg))
                                all_deletes.add(seg)
                        fixes_ += [
                            LintFix.delete(seg)
                            for seg in move_after_select_clause
                            if seg not in all_deletes
                        ]
                        fixes_.append(
                            LintFix.create_after(
                                select_clause[0],
                                ([NewlineSegment()] if add_newline else [])
                                + list(move_after_select_clause),
                            )
                        )
                        return fixes_

                    if select_stmt.segments[after_select_clause_idx].is_type("newline"):
                        # Since we're deleting the newline, we should also delete all
                        # whitespace before it or it will add random whitespace to
                        # following statements. So walk back through the segment
                        # deleting whitespace until you get the previous newline, or
                        # something else.
                        to_delete = select_children.reversed().select(
                            loop_while=sp.is_type("whitespace"),
                            start_seg=select_children[start_idx],
                        )
                        if to_delete:
                            # The select_clause is immediately followed by a
                            # newline. Delete the newline in order to avoid leaving
                            # behind an empty line after fix, *unless* we stopped
                            # due to something other than a newline.
                            delete_last_newline = select_children[
                                start_idx - len(to_delete) - 1
                            ].is_type("newline")

                            # Delete the newline if we decided to.
                            if delete_last_newline:
                                fixes.append(
                                    LintFix.delete(
                                        select_stmt.segments[after_select_clause_idx],
                                    )
                                )

                            fixes += _fixes_for_move_after_select_clause(
                                to_delete[-1], to_delete
                            )
                    elif select_stmt.segments[after_select_clause_idx].is_type(
                        "whitespace"
                    ):
                        # The select_clause has stuff after (most likely a comment)
                        # Delete the whitespace immediately after the select clause
                        # so the other stuff aligns nicely based on where the select
                        # clause started.
                        fixes += [
                            LintFix.delete(
                                select_stmt.segments[after_select_clause_idx],
                            ),
                        ]
                        fixes += _fixes_for_move_after_select_clause(
                            select_children[
                                select_targets_info.first_select_target_idx
                            ],
                        )
                    elif select_stmt.segments[after_select_clause_idx].is_type(
                        "dedent"
                    ):
                        # Again let's strip back the whitespace, but simpler
                        # as don't need to worry about new line so just break
                        # if see non-whitespace
                        to_delete = select_children.reversed().select(
                            loop_while=sp.is_type("whitespace"),
                            start_seg=select_children[select_clause_idx - 1],
                        )
                        if to_delete:
                            fixes += _fixes_for_move_after_select_clause(
                                to_delete[-1],
                                to_delete,
                                # If we deleted a newline, create a newline.
                                any(seg for seg in to_delete if seg.is_type("newline")),
                            )
                    else:
                        fixes += _fixes_for_move_after_select_clause(
                            select_children[
                                select_targets_info.first_select_target_idx
                            ],
                        )

            if select_targets_info.comment_after_select_idx == -1:
                fixes += [
                    # Insert the select_clause in place of the first newline in the
                    # Select statement
                    LintFix.replace(
                        select_children[select_targets_info.first_new_line_idx],
                        insert_buff,
                    ),
                ]
            else:
                # The SELECT is followed by a comment on the same line. In order
                # to autofix this, we'd need to move the select target between
                # SELECT and the comment and potentially delete the entire line
                # where the select target was (if it is now empty). This is
                # *fairly tricky and complex*, in part because the newline on
                # the select target's line is several levels higher in the
                # parser tree. Hence, we currently don't autofix this. Could be
                # autofixed in the future if/when we have the time.
                fixes = []
            return LintResult(
                anchor=select_clause.get(),
                fixes=fixes,
            )
        return None
Example #9
0
    def _eval(self, context: RuleContext) -> Optional[LintResult]:
        """Look for USING in a join clause."""
        segment = FunctionalContext(context).segment
        parent_stack = FunctionalContext(context).parent_stack
        # We are not concerned with non join clauses
        assert context.segment.is_type("join_clause")

        using_anchor = segment.children(sp.is_keyword("using")).first()
        # If there is no evidence of a USING then we exit
        if len(using_anchor) == 0:
            return None

        anchor = using_anchor.get()
        description = "Found USING statement. Expected only ON statements."
        # All returns from here out will be some form of linting error.
        # we prepare the variable here
        unfixable_result = LintResult(
            anchor=anchor,
            description=description,
        )

        tables_in_join = parent_stack.last().children(
            sp.is_type("join_clause", "from_expression_element")
        )

        # We can only safely fix the first join clause
        if segment.get(0) != tables_in_join.get(1):
            return unfixable_result

        parent_select = parent_stack.last(sp.is_type("select_statement")).get()
        if not parent_select:  # pragma: no cover
            return unfixable_result

        select_info = get_select_statement_info(parent_select, context.dialect)
        table_aliases = [
            ta
            for ta in (select_info.table_aliases if select_info else [])
            if ta.ref_str
        ]
        if len(table_aliases) < 2:
            return unfixable_result

        to_delete, insert_after_anchor = _extract_deletion_sequence_and_anchor(segment)

        table_a, table_b = table_aliases[:2]
        edit_segments = [
            KeywordSegment(raw="ON"),
            WhitespaceSegment(raw=" "),
        ] + _generate_join_conditions(
            table_a.ref_str,
            table_b.ref_str,
            _extract_cols_from_using(segment, using_anchor),
        )

        assert table_a.segment
        assert table_b.segment
        fixes = [
            LintFix.create_before(
                anchor_segment=insert_after_anchor,
                source=[table_a.segment, table_b.segment],
                edit_segments=edit_segments,
            ),
            *[LintFix.delete(seg) for seg in to_delete],
        ]
        return LintResult(
            anchor=anchor,
            description=description,
            fixes=fixes,
        )
Example #10
0
    def _eval(self, context: RuleContext) -> List[LintResult]:
        """Operators should follow a standard for being before/after newlines.

        We use the memory to keep track of whitespace up to now, and
        whether the last code segment was an operator or not.
        Anchor is our signal as to whether there's a problem.

        We only trigger if we have an operator FOLLOWED BY a newline
        before the next meaningful code segment.

        """
        relevent_types = ["binary_operator", "comparison_operator"]
        segment = FunctionalContext(context).segment
        # bring var to this scope so as to only have one type ignore
        operator_new_lines: str = self.operator_new_lines  # type: ignore
        expr = segment.children()
        operator_segments = segment.children(sp.is_type(*relevent_types))
        results: List[LintResult] = []
        # If len(operator_segments) == 0 this will essentially not run
        for operator in operator_segments:
            start = expr.reversed().select(start_seg=operator).first(
                sp.is_code())
            end = expr.select(start_seg=operator).first(sp.is_code())
            res = [
                expr.select(start_seg=start.get(), stop_seg=operator),
                expr.select(start_seg=operator, stop_seg=end.get()),
            ]
            # anchor and change els are reversed in the before case
            if operator_new_lines == "before":
                res = [els.reversed() for els in reversed(res)]

            change_list, anchor_list = res
            # If the anchor side of the list has no newline
            # then everything is ok already
            if not anchor_list.any(
                    sp.and_(sp.is_type("newline"), sp.not_(
                        sp.is_templated()))):
                continue

            # If the operator is on a line by itself, that's okay regardless of
            # the 'operator_new_lines' setting.
            newline_after_operator = expr.select(
                sp.or_(sp.is_code(), sp.is_type("newline")),
                start_seg=operator).first(sp.is_type("newline"))
            newline_before_operator = (expr.reversed().select(
                sp.or_(sp.is_code(), sp.is_type("newline")),
                start_seg=operator).first(sp.is_type("newline")))
            if newline_after_operator and newline_before_operator:
                continue

            insert_anchor = anchor_list.last().get()
            assert insert_anchor, "Insert Anchor must be present"
            lint_res = _generate_fixes(
                operator_new_lines,
                change_list,
                operator,
                insert_anchor,
            )
            results.append(lint_res)

        return results