Esempio n. 1
0
    def render_string(self, in_str: str, fname: str,
                      config: FluffConfig) -> RenderedFile:
        """Template the file."""
        linter_logger.info("TEMPLATING RAW [%s] (%s)", self.templater.name,
                           fname)

        # Start the templating timer
        t0 = time.monotonic()

        if not config.get("templater_obj") == self.templater:
            linter_logger.warning((
                f"Attempt to set templater to {config.get('templater_obj').name} failed. Using {self.templater.name} "
                "templater. Templater cannot be set in a .sqlfluff file in a subdirectory of the current working "
                "directory. It can be set in a .sqlfluff in the current working directory. See Nesting section of the "
                "docs for more details."))
        try:
            templated_file, templater_violations = self.templater.process(
                in_str=in_str,
                fname=fname,
                config=config,
                formatter=self.formatter)
        except SQLTemplaterSkipFile as s:
            linter_logger.warning(str(s))
            templated_file = None
            templater_violations = []

        if not templated_file:
            linter_logger.info("TEMPLATING FAILED: %s", templater_violations)

        # Record time
        time_dict = {"templating": time.monotonic() - t0}

        return RenderedFile(templated_file, templater_violations, config,
                            time_dict, fname)
Esempio n. 2
0
def get_encoding(fname: str, config: FluffConfig) -> str:
    """Get the encoding of the file (autodetect)."""
    encoding_config = config.get("encoding", default="autodetect")

    if encoding_config == "autodetect":
        with open(fname, "rb") as f:
            data = f.read()
        return chardet.detect(data)["encoding"]

    return encoding_config
Esempio n. 3
0
    def render_string(self, in_str: str, fname: str, config: FluffConfig,
                      encoding: str) -> RenderedFile:
        """Template the file."""
        linter_logger.info("TEMPLATING RAW [%s] (%s)", self.templater.name,
                           fname)

        # Start the templating timer
        t0 = time.monotonic()

        # Newlines are normalised to unix-style line endings (\n).
        # The motivation is that Jinja normalises newlines during templating and
        # we want consistent mapping between the raw and templated slices.
        in_str = self._normalise_newlines(in_str)

        # Since Linter.__init__() does not require a dialect to be specified,
        # check for one now. (We're processing a string, not a file, so we're
        # not going to pick up a .sqlfluff or other config file to provide a
        # missing dialect at this point.)
        config.verify_dialect_specified()
        if not config.get("templater_obj") == self.templater:
            linter_logger.warning((
                f"Attempt to set templater to {config.get('templater_obj').name} "
                f"failed. Using {self.templater.name} templater. Templater cannot "
                "be set in a .sqlfluff file in a subdirectory of the current "
                "working directory. It can be set in a .sqlfluff in the current "
                "working directory. See Nesting section of the docs for more "
                "details."))
        try:
            templated_file, templater_violations = self.templater.process(
                in_str=in_str,
                fname=fname,
                config=config,
                formatter=self.formatter)
        except SQLFluffSkipFile as s:  # pragma: no cover
            linter_logger.warning(str(s))
            templated_file = None
            templater_violations = []

        if not templated_file:
            linter_logger.info("TEMPLATING FAILED: %s", templater_violations)

        # Record time
        time_dict = {"templating": time.monotonic() - t0}

        return RenderedFile(
            templated_file,
            templater_violations,
            config,
            time_dict,
            fname,
            encoding,
            in_str,
        )
Esempio n. 4
0
def assert_rule_pass_in_sql(code, sql, configs=None):
    """Assert that a given rule doesn't fail on the given sql."""
    # Configs allows overrides if we want to use them.
    cfg = FluffConfig(configs=configs)
    r = get_rule_from_set(code, config=cfg)
    parsed = Linter(config=cfg).parse_string(sql)
    if parsed.violations:
        pytest.fail(parsed.violations[0].desc() + "\n" + parsed.tree.stringify())
    print(f"Parsed:\n {parsed.tree.stringify()}")
    lerrs, _, _, _ = r.crawl(parsed.tree, dialect=cfg.get("dialect_obj"))
    print(f"Errors Found: {lerrs}")
    if any(v.rule.code == code for v in lerrs):
        pytest.fail(f"Found {code} failures in query which should pass.", pytrace=False)
Esempio n. 5
0
 def _wrapped(self,
              *,
              in_str: str,
              fname: str,
              config: FluffConfig = None,
              **kwargs):
     if config:
         limit = config.get("large_file_skip_char_limit")
         if limit:
             templater_logger.warning(
                 "The config value large_file_skip_char_limit was found set. "
                 "This feature will be removed in a future release, please "
                 "use the more efficient 'large_file_skip_byte_limit' instead."
             )
         if limit and len(in_str) > limit:
             raise SQLFluffSkipFile(
                 f"Length of file {fname!r} is over {limit} characters. "
                 "Skipping to avoid parser lock. Users can increase this limit "
                 "in their config by setting the 'large_file_skip_char_limit' "
                 "value, or disable by setting it to zero.")
     return func(self, in_str=in_str, fname=fname, config=config, **kwargs)
Esempio n. 6
0
def assert_rule_pass_in_sql(code, sql, configs=None):
    """Assert that a given rule doesn't fail on the given sql."""
    # Configs allows overrides if we want to use them.
    cfg = FluffConfig(configs=configs)
    r = get_rule_from_set(code, config=cfg)
    linter = Linter(config=cfg)
    rendered = linter.render_string(sql,
                                    fname="<STR>",
                                    config=cfg,
                                    encoding="utf-8")
    parsed = linter.parse_rendered(rendered, recurse=True)
    if parsed.violations:
        pytest.fail(parsed.violations[0].desc() + "\n" +
                    parsed.tree.stringify())
    print(f"Parsed:\n {parsed.tree.stringify()}")
    lerrs, _, _, _ = r.crawl(parsed.tree, [],
                             dialect=cfg.get("dialect_obj"),
                             templated_file=rendered[0])
    print(f"Errors Found: {lerrs}")
    if any(v.rule.code == code for v in lerrs):
        pytest.fail(f"Found {code} failures in query which should pass.",
                    pytrace=False)
Esempio n. 7
0
    def lint_fix_parsed(
        cls,
        tree: BaseSegment,
        config: FluffConfig,
        rule_set: List[BaseRule],
        fix: bool = False,
        fname: Optional[str] = None,
        templated_file: Optional[TemplatedFile] = None,
        formatter: Any = None,
    ) -> Tuple[BaseSegment, List[SQLBaseError], List[NoQaDirective]]:
        """Lint and optionally fix a tree object."""
        # Keep track of the linting errors on the very first linter pass. The
        # list of issues output by "lint" and "fix" only includes issues present
        # in the initial SQL code, EXCLUDING any issues that may be created by
        # the fixes themselves.
        initial_linting_errors = []
        # A placeholder for the fixes we had on the previous loop
        last_fixes = None
        # Keep a set of previous versions to catch infinite loops.
        previous_versions: Set[Tuple[str, Tuple[SourceFix,
                                                ...]]] = {(tree.raw, ())}

        # If we are fixing then we want to loop up to the runaway_limit, otherwise just
        # once for linting.
        loop_limit = config.get("runaway_limit") if fix else 1

        # Dispatch the output for the lint header
        if formatter:
            formatter.dispatch_lint_header(fname)

        # Look for comment segments which might indicate lines to ignore.
        if not config.get("disable_noqa"):
            rule_codes = [r.code for r in rule_set]
            ignore_buff, ivs = cls.extract_ignore_mask_tree(tree, rule_codes)
            initial_linting_errors += ivs
        else:
            ignore_buff = []

        save_tree = tree
        # There are two phases of rule running.
        # 1. The main loop is for most rules. These rules are assumed to
        # interact and cause a cascade of fixes requiring multiple passes.
        # These are run the `runaway_limit` number of times (default 10).
        # 2. The post loop is for post-processing rules, not expected to trigger
        # any downstream rules, e.g. capitalization fixes. They are run on the
        # first loop and then twice at the end (once to fix, and once again to
        # check result of fixes), but not in the intervening loops.
        phases = ["main"]
        if fix:
            phases.append("post")
        for phase in phases:
            if len(phases) > 1:
                rules_this_phase = [
                    rule for rule in rule_set if rule.lint_phase == phase
                ]
            else:
                rules_this_phase = rule_set
            for loop in range(loop_limit if phase == "main" else 2):

                def is_first_linter_pass():
                    return phase == phases[0] and loop == 0

                # Additional newlines are to assist in scanning linting loops
                # during debugging.
                linter_logger.info(
                    f"\n\nEntering linter phase {phase}, loop {loop+1}/{loop_limit}\n"
                )
                changed = False

                if is_first_linter_pass():
                    # In order to compute initial_linting_errors correctly, need
                    # to run all rules on the first loop of the main phase.
                    rules_this_phase = rule_set
                progress_bar_crawler = tqdm(
                    rules_this_phase,
                    desc="lint by rules",
                    leave=False,
                    disable=progress_bar_configuration.disable_progress_bar,
                )

                for crawler in progress_bar_crawler:
                    # Performance: After first loop pass, skip rules that don't
                    # do fixes. Any results returned won't be seen by the user
                    # anyway (linting errors ADDED by rules changing SQL, are
                    # not reported back to the user - only initial linting errors),
                    # so there's absolutely no reason to run them.
                    if (fix and not is_first_linter_pass()
                            and not is_fix_compatible(crawler)):
                        continue

                    progress_bar_crawler.set_description(
                        f"rule {crawler.code}")

                    # fixes should be a dict {} with keys edit, delete, create
                    # delete is just a list of segments to delete
                    # edit and create are list of tuples. The first element is
                    # the "anchor", the segment to look for either to edit or to
                    # insert BEFORE. The second is the element to insert or create.
                    linting_errors, _, fixes, _ = crawler.crawl(
                        tree,
                        dialect=config.get("dialect_obj"),
                        fix=fix,
                        templated_file=templated_file,
                        ignore_mask=ignore_buff,
                        fname=fname,
                    )
                    if is_first_linter_pass():
                        initial_linting_errors += linting_errors

                    if fix and fixes:
                        linter_logger.info(
                            f"Applying Fixes [{crawler.code}]: {fixes}")
                        # Do some sanity checks on the fixes before applying.
                        anchor_info = BaseSegment.compute_anchor_edit_info(
                            fixes)
                        if any(not info.is_valid for info in
                               anchor_info.values()):  # pragma: no cover
                            message = (
                                f"Rule {crawler.code} returned conflicting "
                                "fixes with the same anchor. This is only "
                                "supported for create_before+create_after, so "
                                "the fixes will not be applied. {fixes!r}")
                            cls._report_conflicting_fixes_same_anchor(message)
                            for lint_result in linting_errors:
                                lint_result.fixes = []
                        elif fixes == last_fixes:  # pragma: no cover
                            # If we generate the same fixes two times in a row,
                            # that means we're in a loop, and we want to stop.
                            # (Fixes should address issues, hence different
                            # and/or fewer fixes next time.)
                            cls._warn_unfixable(crawler.code)
                        else:
                            # This is the happy path. We have fixes, now we want to
                            # apply them.
                            last_fixes = fixes
                            new_tree, _, _ = tree.apply_fixes(
                                config.get("dialect_obj"), crawler.code,
                                anchor_info)
                            # Check for infinite loops. We use a combination of the
                            # fixed templated file and the list of source fixes to
                            # apply.
                            loop_check_tuple = (
                                new_tree.raw,
                                tuple(new_tree.source_fixes),
                            )
                            if loop_check_tuple not in previous_versions:
                                # We've not seen this version of the file so
                                # far. Continue.
                                tree = new_tree
                                previous_versions.add(loop_check_tuple)
                                changed = True
                                continue
                            else:
                                # Applying these fixes took us back to a state
                                # which we've seen before. We're in a loop, so
                                # we want to stop.
                                cls._warn_unfixable(crawler.code)

                if fix and not changed:
                    # We did not change the file. Either the file is clean (no
                    # fixes), or any fixes which are present will take us back
                    # to a previous state.
                    linter_logger.info(
                        f"Fix loop complete for {phase} phase. Stability "
                        f"achieved after {loop}/{loop_limit} loops.")
                    break
            else:
                if fix:
                    # The linter loop hit the limit before reaching a stable point
                    # (i.e. free of lint errors). If this happens, it's usually
                    # because one or more rules produced fixes which did not address
                    # the original issue **or** created new issues.
                    linter_logger.warning(
                        f"Loop limit on fixes reached [{loop_limit}].")

                    # Discard any fixes for the linting errors, since they caused a
                    # loop. IMPORTANT: By doing this, we are telling SQLFluff that
                    # these linting errors are "unfixable". This is important,
                    # because when "sqlfluff fix" encounters unfixable lint errors,
                    # it exits with a "failure" exit code, which is exactly what we
                    # want in this situation. (Reason: Although this is more of an
                    # internal SQLFluff issue, users deserve to know about it,
                    # because it means their file(s) weren't fixed.
                    for violation in initial_linting_errors:
                        if isinstance(violation, SQLLintError):
                            violation.fixes = []

                    # Return the original parse tree, before any fixes were applied.
                    # Reason: When the linter hits the loop limit, the file is often
                    # messy, e.g. some of the fixes were applied repeatedly, possibly
                    # other weird things. We don't want the user to see this junk!
                    return save_tree, initial_linting_errors, ignore_buff

        if config.get("ignore_templated_areas", default=True):
            initial_linting_errors = cls.remove_templated_errors(
                initial_linting_errors)

        return tree, initial_linting_errors, ignore_buff
Esempio n. 8
0
    def _lex_templated_file(
        templated_file: TemplatedFile, config: FluffConfig
    ) -> Tuple[Optional[Sequence[BaseSegment]], List[SQLLexError],
               FluffConfig]:
        """Lex a templated file.

        NOTE: This potentially mutates the config, so make sure to
        use the returned one.
        """
        violations = []
        linter_logger.info("LEXING RAW (%s)", templated_file.fname)
        # Get the lexer
        lexer = Lexer(config=config)
        # Lex the file and log any problems
        try:
            tokens, lex_vs = lexer.lex(templated_file)
            # We might just get the violations as a list
            violations += lex_vs
            linter_logger.info("Lexed tokens: %s",
                               [seg.raw for seg in tokens] if tokens else None)
        except SQLLexError as err:
            linter_logger.info("LEXING FAILED! (%s): %s", templated_file.fname,
                               err)
            violations.append(err)
            return None, violations, config

        if not tokens:  # pragma: no cover TODO?
            return None, violations, config

        # Check that we've got sensible indentation from the lexer.
        # We might need to suppress if it's a complicated file.
        templating_blocks_indent = config.get("template_blocks_indent",
                                              "indentation")
        if isinstance(templating_blocks_indent, str):
            force_block_indent = templating_blocks_indent.lower().strip(
            ) == "force"
        else:
            force_block_indent = False
        templating_blocks_indent = bool(templating_blocks_indent)
        # If we're forcing it through we don't check.
        if templating_blocks_indent and not force_block_indent:
            indent_balance = sum(
                getattr(elem, "indent_val", 0)
                for elem in cast(Tuple[BaseSegment, ...], tokens))
            if indent_balance != 0:
                linter_logger.debug(
                    "Indent balance test failed for %r. Template indents will not be "
                    "linted for this file.",
                    templated_file.fname,
                )
                # Don't enable the templating blocks.
                templating_blocks_indent = False

        # The file will have been lexed without config, so check all indents
        # are enabled.
        new_tokens = []
        for token in cast(Tuple[BaseSegment, ...], tokens):
            if token.is_meta:
                token = cast(MetaSegment, token)
                if token.indent_val != 0:
                    # Don't allow it if we're not linting templating block indents.
                    if not templating_blocks_indent:
                        continue
            new_tokens.append(token)

        # Return new buffer
        return new_tokens, violations, config
Esempio n. 9
0
    def lint_fix_parsed(
        cls,
        tree: BaseSegment,
        config: FluffConfig,
        rule_set: List[BaseRule],
        fix: bool = False,
        fname: Optional[str] = None,
        templated_file: Optional[TemplatedFile] = None,
        formatter: Any = None,
    ) -> Tuple[BaseSegment, List[SQLBaseError], List[NoQaDirective]]:
        """Lint and optionally fix a tree object."""
        # Keep track of the linting errors
        all_linting_errors = []
        # A placeholder for the fixes we had on the previous loop
        last_fixes = None
        # Keep a set of previous versions to catch infinite loops.
        previous_versions = {tree.raw}

        # If we are fixing then we want to loop up to the runaway_limit, otherwise just once for linting.
        loop_limit = config.get("runaway_limit") if fix else 1

        # Dispatch the output for the lint header
        if formatter:
            formatter.dispatch_lint_header(fname)

        # Look for comment segments which might indicate lines to ignore.
        ignore_buff, ivs = cls.extract_ignore_mask(tree)
        all_linting_errors += ivs

        for loop in range(loop_limit):
            changed = False
            for crawler in rule_set:
                # fixes should be a dict {} with keys edit, delete, create
                # delete is just a list of segments to delete
                # edit and create are list of tuples. The first element is the
                # "anchor", the segment to look for either to edit or to insert BEFORE.
                # The second is the element to insert or create.
                linting_errors, _, fixes, _ = crawler.crawl(
                    tree,
                    ignore_mask=ignore_buff,
                    dialect=config.get("dialect_obj"),
                    fname=fname,
                    templated_file=templated_file,
                )
                all_linting_errors += linting_errors

                if fix and fixes:
                    linter_logger.info(f"Applying Fixes [{crawler.code}]: {fixes}")
                    # Do some sanity checks on the fixes before applying.
                    if fixes == last_fixes:  # pragma: no cover
                        cls._warn_unfixable(crawler.code)
                    else:
                        last_fixes = fixes
                        new_tree, _ = tree.apply_fixes(fixes)
                        # Check for infinite loops
                        if new_tree.raw not in previous_versions:
                            # We've not seen this version of the file so far. Continue.
                            tree = new_tree
                            previous_versions.add(tree.raw)
                            changed = True
                            continue
                        else:
                            # Applying these fixes took us back to a state which we've
                            # seen before. Abort.
                            cls._warn_unfixable(crawler.code)

            if loop == 0:
                # Keep track of initial errors for reporting.
                initial_linting_errors = all_linting_errors.copy()

            if fix and not changed:
                # We did not change the file. Either the file is clean (no fixes), or
                # any fixes which are present will take us back to a previous state.
                linter_logger.info(
                    f"Fix loop complete. Stability achieved after {loop}/{loop_limit} loops."
                )
                break
        if fix and loop + 1 == loop_limit:
            linter_logger.warning(f"Loop limit on fixes reached [{loop_limit}].")

        if config.get("ignore_templated_areas", default=True):
            initial_linting_errors = cls.remove_templated_errors(initial_linting_errors)

        return tree, initial_linting_errors, ignore_buff
Esempio n. 10
0
    def lint_fix_parsed(
        cls,
        tree: BaseSegment,
        config: FluffConfig,
        rule_set: List[BaseRule],
        fix: bool = False,
        fname: Optional[str] = None,
        templated_file: Optional[TemplatedFile] = None,
        formatter: Any = None,
    ) -> Tuple[BaseSegment, List[SQLBaseError], List[NoQaDirective]]:
        """Lint and optionally fix a tree object."""
        # Keep track of the linting errors
        all_linting_errors = []
        # A placeholder for the fixes we had on the previous loop
        last_fixes = None
        # Keep a set of previous versions to catch infinite loops.
        previous_versions = {tree.raw}

        # If we are fixing then we want to loop up to the runaway_limit, otherwise just
        # once for linting.
        loop_limit = config.get("runaway_limit") if fix else 1

        # Dispatch the output for the lint header
        if formatter:
            formatter.dispatch_lint_header(fname)

        # Look for comment segments which might indicate lines to ignore.
        if not config.get("disable_noqa"):
            rule_codes = [r.code for r in rule_set]
            ignore_buff, ivs = cls.extract_ignore_mask_tree(tree, rule_codes)
            all_linting_errors += ivs
        else:
            ignore_buff = []

        save_tree = tree
        for loop in range(loop_limit):
            changed = False

            progress_bar_crawler = tqdm(
                rule_set,
                desc="lint by rules",
                leave=False,
                disable=progress_bar_configuration.disable_progress_bar,
            )

            for crawler in progress_bar_crawler:
                progress_bar_crawler.set_description(f"rule {crawler.code}")

                # fixes should be a dict {} with keys edit, delete, create
                # delete is just a list of segments to delete
                # edit and create are list of tuples. The first element is the
                # "anchor", the segment to look for either to edit or to insert BEFORE.
                # The second is the element to insert or create.
                linting_errors, _, fixes, _ = crawler.crawl(
                    tree,
                    ignore_mask=ignore_buff,
                    dialect=config.get("dialect_obj"),
                    fname=fname,
                    templated_file=templated_file,
                )
                all_linting_errors += linting_errors

                if fix and fixes:
                    linter_logger.info(
                        f"Applying Fixes [{crawler.code}]: {fixes}")
                    # Do some sanity checks on the fixes before applying.
                    if fixes == last_fixes:  # pragma: no cover
                        cls._warn_unfixable(crawler.code)
                    else:
                        last_fixes = fixes
                        new_tree, _ = tree.apply_fixes(
                            config.get("dialect_obj"), fixes)
                        # Check for infinite loops
                        if new_tree.raw not in previous_versions:
                            # We've not seen this version of the file so far. Continue.
                            tree = new_tree
                            previous_versions.add(tree.raw)
                            changed = True
                            continue
                        else:
                            # Applying these fixes took us back to a state which we've
                            # seen before. Abort.
                            cls._warn_unfixable(crawler.code)

            if loop == 0:
                # Keep track of initial errors for reporting.
                initial_linting_errors = all_linting_errors.copy()

            if fix and not changed:
                # We did not change the file. Either the file is clean (no fixes), or
                # any fixes which are present will take us back to a previous state.
                linter_logger.info(
                    f"Fix loop complete. Stability achieved after {loop}/{loop_limit} "
                    "loops.")
                break
        else:
            if fix:
                # The linter loop hit the limit before reaching a stable point
                # (i.e. free of lint errors). If this happens, it's usually
                # because one or more rules produced fixes which did not address
                # the original issue **or** created new issues.
                linter_logger.warning(
                    f"Loop limit on fixes reached [{loop_limit}].")

                # Discard any fixes for the linting errors, since they caused a
                # loop. IMPORTANT: By doing this, we are telling SQLFluff that
                # these linting errors are "unfixable". This is important,
                # because when "sqlfluff fix" encounters unfixable lint errors,
                # it exits with a "failure" exit code, which is exactly what we
                # want in this situation. (Reason: Although this is more of an
                # internal SQLFluff issue, users deserve to know about it,
                # because it means their file(s) weren't fixed.
                for violation in initial_linting_errors:
                    if isinstance(violation, SQLLintError):
                        violation.fixes = []

                # Return the original parse tree, before any fixes were applied.
                # Reason: When the linter hits the loop limit, the file is often
                # messy, e.g. some of the fixes were applied repeatedly, possibly
                # other weird things. We don't want the user to see this junk!
                return save_tree, initial_linting_errors, ignore_buff

        if config.get("ignore_templated_areas", default=True):
            initial_linting_errors = cls.remove_templated_errors(
                initial_linting_errors)

        return tree, initial_linting_errors, ignore_buff