Esempio n. 1
0
    def _eval(self, context: RuleContext) -> Optional[LintResult]:
        """Identify aliases in from clause and join conditions.

        Find base table, table expressions in join, and other expressions in select
        clause and decide if it's needed to report them.
        """
        # Config type hints
        self.force_enable: bool

        # Issue 2810: BigQuery has some tricky expectations (apparently not
        # documented, but subject to change, e.g.:
        # https://www.reddit.com/r/bigquery/comments/fgk31y/new_in_bigquery_no_more_backticks_around_table/)
        # about whether backticks are required (and whether the query is valid
        # or not, even with them), depending on whether the GCP project name is
        # present, or just the dataset name. Since SQLFluff doesn't have access
        # to BigQuery when it is looking at the query, it would be complex for
        # this rule to do the right thing. For now, the rule simply disables
        # itself.
        if (context.dialect.name in self._dialects_disabled_by_default
                and not self.force_enable):
            return LintResult()

        assert context.segment.is_type("select_statement")

        children = FunctionalContext(context).segment.children()
        from_clause_segment = children.select(
            sp.is_type("from_clause")).first()
        base_table = (from_clause_segment.children(
            sp.is_type("from_expression")).first().children(
                sp.is_type("from_expression_element")).first().children(
                    sp.is_type("table_expression")).first().children(
                        sp.is_type("object_reference")).first())
        if not base_table:
            return None

        # A buffer for all table expressions in join conditions
        from_expression_elements = []
        column_reference_segments = []

        after_from_clause = children.select(start_seg=from_clause_segment[0])
        for clause in from_clause_segment + after_from_clause:
            for from_expression_element in clause.recursive_crawl(
                    "from_expression_element"):
                from_expression_elements.append(from_expression_element)
            for column_reference in clause.recursive_crawl("column_reference"):
                column_reference_segments.append(column_reference)

        return (self._lint_aliases_in_join(
            base_table[0] if base_table else None,
            from_expression_elements,
            column_reference_segments,
            context.segment,
        ) or None)
Esempio n. 2
0
    def _get_subsequent_whitespace(
        self,
        context,
    ) -> Tuple[Optional[BaseSegment], Optional[BaseSegment]]:
        """Search forwards through the raw segments for subsequent whitespace.

        Return a tuple of both the trailing whitespace segment and the
        first non-whitespace segment discovered.
        """
        # Get all raw segments. "raw_segments" is appropriate as the
        # only segments we can care about are comma, whitespace,
        # newline, and comment, which are all raw. Using the
        # raw_segments allows us to account for possible unexpected
        # parse tree structures resulting from other rule fixes.
        raw_segments = FunctionalContext(context).raw_segments
        # Start after the current comma within the list. Get all the
        # following whitespace.
        following_segments = raw_segments.select(
            loop_while=sp.or_(sp.is_meta(), sp.is_type("whitespace")),
            start_seg=context.segment,
        )
        subsequent_whitespace = following_segments.last(
            sp.is_type("whitespace"))
        try:
            return (
                subsequent_whitespace[0] if subsequent_whitespace else None,
                raw_segments[raw_segments.index(context.segment) +
                             len(following_segments) + 1],
            )
        except IndexError:
            # If we find ourselves here it's all whitespace (or nothing) to the
            # end of the file. This can only happen in bigquery (see
            # test_pass_bigquery_trailing_comma).
            return subsequent_whitespace, None
Esempio n. 3
0
    def _eval(self, context: RuleContext) -> LintResult:
        """Nested CASE statement in ELSE clause could be flattened."""
        segment = FunctionalContext(context).segment
        assert segment.select(sp.is_type("case_expression"))
        case1_children = segment.children()
        case1_last_when = case1_children.last(sp.is_type("when_clause")).get()
        case1_else_clause = case1_children.select(sp.is_type("else_clause"))
        case1_else_expressions = case1_else_clause.children(
            sp.is_type("expression"))
        expression_children = case1_else_expressions.children()
        case2 = expression_children.select(sp.is_type("case_expression"))
        # The len() checks below are for safety, to ensure the CASE inside
        # the ELSE is not part of a larger expression. In that case, it's
        # not safe to simplify in this way -- we'd be deleting other code.
        if (not case1_last_when or len(case1_else_expressions) > 1
                or len(expression_children) > 1 or not case2):
            return LintResult()

        # We can assert that this exists because of the previous check.
        assert case1_last_when
        # We can also assert that we'll also have an else clause because
        # otherwise the case2 check above would fail.
        case1_else_clause_seg = case1_else_clause.get()
        assert case1_else_clause_seg

        # Delete stuff between the last "WHEN" clause and the "ELSE" clause.
        case1_to_delete = case1_children.select(start_seg=case1_last_when,
                                                stop_seg=case1_else_clause_seg)

        # Delete the nested "CASE" expression.
        fixes = case1_to_delete.apply(lambda seg: LintFix.delete(seg))

        # Determine the indentation to use when we move the nested "WHEN"
        # and "ELSE" clauses, based on the indentation of case1_last_when.
        # If no whitespace segments found, use default indent.
        indent = (case1_children.select(
            stop_seg=case1_last_when).reversed().select(
                sp.is_type("whitespace")))
        indent_str = "".join(seg.raw
                             for seg in indent) if indent else self.indent

        # Move the nested "when" and "else" clauses after the last outer
        # "when".
        nested_clauses = case2.children(
            sp.is_type("when_clause", "else_clause"))
        create_after_last_when = nested_clauses.apply(
            lambda seg: [NewlineSegment(),
                         WhitespaceSegment(indent_str), seg])
        segments = [
            item for sublist in create_after_last_when for item in sublist
        ]
        fixes.append(
            LintFix.create_after(case1_last_when, segments, source=segments))

        # Delete the outer "else" clause.
        fixes.append(LintFix.delete(case1_else_clause_seg))
        return LintResult(case2[0], fixes=fixes)
Esempio n. 4
0
    def _eval(self, context: RuleContext) -> Optional[LintResult]:
        """Looking for DISTINCT before a bracket.

        Look for DISTINCT keyword immediately followed by open parenthesis.
        """
        # We trigger on `select_clause` and look for `select_clause_modifier`
        assert context.segment.is_type("select_clause")
        children = FunctionalContext(context).segment.children()
        modifier = children.select(sp.is_type("select_clause_modifier"))
        first_element = children.select(
            sp.is_type("select_clause_element")).first()
        if not modifier or not first_element:
            return None
        # is the first element only an expression with only brackets?
        expression = (first_element.children(sp.is_type("expression")).first()
                      or first_element)
        bracketed = expression.children(sp.is_type("bracketed")).first()
        if bracketed:
            fixes = []
            # If there's nothing else in the expression, remove the brackets.
            if len(expression[0].segments) == 1:
                # Remove the brackets and strip any meta segments.
                fixes.append(
                    LintFix.replace(
                        bracketed[0],
                        self.filter_meta(bracketed[0].segments)[1:-1]), )
            # If no whitespace between DISTINCT and expression, add it.
            if not children.select(sp.is_whitespace(),
                                   start_seg=modifier[0],
                                   stop_seg=first_element[0]):
                fixes.append(
                    LintFix.create_before(
                        first_element[0],
                        [WhitespaceSegment()],
                    ))
            # If no fixes, no problem.
            if fixes:
                return LintResult(anchor=modifier[0], fixes=fixes)
        return None
Esempio n. 5
0
    def _get_indexes(context: RuleContext):
        children = FunctionalContext(context).segment.children()
        select_targets = children.select(sp.is_type("select_clause_element"))
        first_select_target_idx = children.find(select_targets.get())
        selects = children.select(sp.is_keyword("select"))
        select_idx = children.find(selects.get()) if selects else -1
        newlines = children.select(sp.is_type("newline"))
        first_new_line_idx = children.find(newlines.get()) if newlines else -1
        comment_after_select_idx = -1
        if newlines:
            comment_after_select = children.select(
                sp.is_type("comment"),
                start_seg=selects.get(),
                stop_seg=newlines.get(),
                loop_while=sp.or_(
                    sp.is_type("comment"), sp.is_type("whitespace"), sp.is_meta()
                ),
            )
            if comment_after_select:
                comment_after_select_idx = (
                    children.find(comment_after_select.get())
                    if comment_after_select
                    else -1
                )
        first_whitespace_idx = -1
        if first_new_line_idx != -1:
            # TRICKY: Ignore whitespace prior to the first newline, e.g. if
            # the line with "SELECT" (before any select targets) has trailing
            # whitespace.
            segments_after_first_line = children.select(
                sp.is_type("whitespace"), start_seg=children[first_new_line_idx]
            )
            first_whitespace_idx = children.find(segments_after_first_line.get())

        siblings_post = FunctionalContext(context).siblings_post
        from_segment = siblings_post.first(sp.is_type("from_clause")).first().get()
        pre_from_whitespace = siblings_post.select(
            sp.is_type("whitespace"), stop_seg=from_segment
        )
        return SelectTargetsInfo(
            select_idx,
            first_new_line_idx,
            first_select_target_idx,
            first_whitespace_idx,
            comment_after_select_idx,
            select_targets,
            from_segment,
            list(pre_from_whitespace),
        )
Esempio n. 6
0
    def _eval(self, context: RuleContext) -> Optional[LintResult]:
        """Unnecessary CASE statement."""
        # Look for CASE expression.
        if context.segment.segments[0].raw_upper == "CASE":
            # Find all 'WHEN' clauses and the optional 'ELSE' clause.
            children = FunctionalContext(context).segment.children()
            when_clauses = children.select(sp.is_type("when_clause"))
            else_clauses = children.select(sp.is_type("else_clause"))

            # Can't fix if multiple WHEN clauses.
            if len(when_clauses) > 1:
                return None

            # Find condition and then expressions.
            condition_expression = when_clauses.children(
                sp.is_type("expression"))[0]
            then_expression = when_clauses.children(
                sp.is_type("expression"))[1]

            # Method 1: Check if THEN/ELSE expressions are both Boolean and can
            # therefore be reduced.
            if else_clauses:
                else_expression = else_clauses.children(
                    sp.is_type("expression"))[0]
                upper_bools = ["TRUE", "FALSE"]
                if ((then_expression.raw_upper in upper_bools)
                        and (else_expression.raw_upper in upper_bools) and
                    (then_expression.raw_upper != else_expression.raw_upper)):
                    coalesce_arg_1: BaseSegment = condition_expression
                    coalesce_arg_2: BaseSegment = KeywordSegment("false")
                    preceding_not = then_expression.raw_upper == "FALSE"

                    fixes = self._coalesce_fix_list(
                        context,
                        coalesce_arg_1,
                        coalesce_arg_2,
                        preceding_not,
                    )

                    return LintResult(
                        anchor=condition_expression,
                        fixes=fixes,
                        description="Unnecessary CASE statement. "
                        "Use COALESCE function instead.",
                    )

            # Method 2: Check if the condition expression is comparing a column
            # reference to NULL and whether that column reference is also in either the
            # THEN/ELSE expression. We can only apply this method when there is only
            # one condition in the condition expression.
            condition_expression_segments_raw = {
                segment.raw_upper
                for segment in condition_expression.segments
            }
            if {"IS", "NULL"}.issubset(condition_expression_segments_raw) and (
                    not condition_expression_segments_raw.intersection(
                        {"AND", "OR"})):
                # Check if the comparison is to NULL or NOT NULL.
                is_not_prefix = "NOT" in condition_expression_segments_raw

                # Locate column reference in condition expression.
                column_reference_segment = (
                    Segments(condition_expression).children(
                        sp.is_type("column_reference")).get())

                # Return None if none found (this condition does not apply to functions)
                if not column_reference_segment:
                    return None

                if else_clauses:
                    else_expression = else_clauses.children(
                        sp.is_type("expression"))[0]
                    # Check if we can reduce the CASE expression to a single coalesce
                    # function.
                    if (not is_not_prefix
                            and column_reference_segment.raw_upper
                            == else_expression.raw_upper):
                        coalesce_arg_1 = else_expression
                        coalesce_arg_2 = then_expression
                    elif (is_not_prefix and column_reference_segment.raw_upper
                          == then_expression.raw_upper):
                        coalesce_arg_1 = then_expression
                        coalesce_arg_2 = else_expression
                    else:
                        return None

                    if coalesce_arg_2.raw_upper == "NULL":
                        # Can just specify the column on it's own
                        # rather than using a COALESCE function.
                        return LintResult(
                            anchor=condition_expression,
                            fixes=self._column_only_fix_list(
                                context,
                                column_reference_segment,
                            ),
                            description="Unnecessary CASE statement. "
                            f"Just use column '{column_reference_segment.raw}'.",
                        )

                    return LintResult(
                        anchor=condition_expression,
                        fixes=self._coalesce_fix_list(
                            context,
                            coalesce_arg_1,
                            coalesce_arg_2,
                        ),
                        description="Unnecessary CASE statement. "
                        "Use COALESCE function instead.",
                    )
                elif column_reference_segment.raw_upper == then_expression.raw_upper:
                    # Can just specify the column on it's own
                    # rather than using a COALESCE function.
                    # In this case no ELSE statement is equivalent to ELSE NULL.
                    return LintResult(
                        anchor=condition_expression,
                        fixes=self._column_only_fix_list(
                            context,
                            column_reference_segment,
                        ),
                        description="Unnecessary CASE statement. "
                        f"Just use column '{column_reference_segment.raw}'.",
                    )

        return None
Esempio n. 7
0
    def _eval(self, context: RuleContext) -> Optional[LintResult]:
        """Select clause modifiers must appear on same line as SELECT."""
        # We only care about select_clause.
        assert context.segment.is_type("select_clause")

        # Get children of select_clause and the corresponding select keyword.
        child_segments = FunctionalContext(context).segment.children()
        select_keyword = child_segments[0]

        # See if we have a select_clause_modifier.
        select_clause_modifier_seg = child_segments.first(
            sp.is_type("select_clause_modifier"))

        # Rule doesn't apply if there's no select clause modifier.
        if not select_clause_modifier_seg:
            return None

        select_clause_modifier = select_clause_modifier_seg[0]

        # Are there any newlines between the select keyword
        # and the select clause modifier.
        leading_newline_segments = child_segments.select(
            select_if=sp.is_type("newline"),
            loop_while=sp.or_(sp.is_whitespace(), sp.is_meta()),
            start_seg=select_keyword,
        )

        # Rule doesn't apply if select clause modifier
        # is already on the same line as the select keyword.
        if not leading_newline_segments:
            return None

        # We should check if there is whitespace before the select clause modifier
        # and remove this during the lint fix.
        leading_whitespace_segments = child_segments.select(
            select_if=sp.is_type("whitespace"),
            loop_while=sp.or_(sp.is_whitespace(), sp.is_meta()),
            start_seg=select_keyword,
        )

        # We should also check if the following select clause element
        # is on the same line as the select clause modifier.
        trailing_newline_segments = child_segments.select(
            select_if=sp.is_type("newline"),
            loop_while=sp.or_(sp.is_whitespace(), sp.is_meta()),
            start_seg=select_clause_modifier,
        )

        # We will insert these segments directly after the select keyword.
        edit_segments = [
            WhitespaceSegment(),
            select_clause_modifier,
        ]
        if not trailing_newline_segments:
            # if the first select clause element is on the same line
            # as the select clause modifier then also insert a newline.
            edit_segments.append(NewlineSegment())

        fixes = []
        # Move select clause modifier after select keyword.
        fixes.append(
            LintFix.create_after(
                anchor_segment=select_keyword,
                edit_segments=edit_segments,
            ))

        # Delete original newlines and whitespace between select keyword
        # and select clause modifier.

        # If there is not a newline after the select clause modifier then delete
        # newlines between the select keyword and select clause modifier.
        if not trailing_newline_segments:
            fixes.extend(LintFix.delete(s) for s in leading_newline_segments)
        # If there is a newline after the select clause modifier then delete both the
        # newlines and whitespace between the select keyword and select clause modifier.
        else:
            fixes.extend(
                LintFix.delete(s) for s in leading_newline_segments +
                leading_whitespace_segments)
        # Delete the original select clause modifier.
        fixes.append(LintFix.delete(select_clause_modifier))

        # If there is whitespace (on the same line) after the select clause modifier
        # then also delete this.
        trailing_whitespace_segments = child_segments.select(
            select_if=sp.is_whitespace(),
            loop_while=sp.or_(sp.is_type("whitespace"), sp.is_meta()),
            start_seg=select_clause_modifier,
        )
        if trailing_whitespace_segments:
            fixes.extend(
                (LintFix.delete(s) for s in trailing_whitespace_segments))

        return LintResult(
            anchor=context.segment,
            fixes=fixes,
        )