def render_string(self, in_str: str, fname: str, config: FluffConfig) -> RenderedFile: """Template the file.""" linter_logger.info("TEMPLATING RAW [%s] (%s)", self.templater.name, fname) # Start the templating timer t0 = time.monotonic() if not config.get("templater_obj") == self.templater: linter_logger.warning(( f"Attempt to set templater to {config.get('templater_obj').name} failed. Using {self.templater.name} " "templater. Templater cannot be set in a .sqlfluff file in a subdirectory of the current working " "directory. It can be set in a .sqlfluff in the current working directory. See Nesting section of the " "docs for more details.")) try: templated_file, templater_violations = self.templater.process( in_str=in_str, fname=fname, config=config, formatter=self.formatter) except SQLTemplaterSkipFile as s: linter_logger.warning(str(s)) templated_file = None templater_violations = [] if not templated_file: linter_logger.info("TEMPLATING FAILED: %s", templater_violations) # Record time time_dict = {"templating": time.monotonic() - t0} return RenderedFile(templated_file, templater_violations, config, time_dict, fname)
def get_encoding(fname: str, config: FluffConfig) -> str: """Get the encoding of the file (autodetect).""" encoding_config = config.get("encoding", default="autodetect") if encoding_config == "autodetect": with open(fname, "rb") as f: data = f.read() return chardet.detect(data)["encoding"] return encoding_config
def render_string(self, in_str: str, fname: str, config: FluffConfig, encoding: str) -> RenderedFile: """Template the file.""" linter_logger.info("TEMPLATING RAW [%s] (%s)", self.templater.name, fname) # Start the templating timer t0 = time.monotonic() # Newlines are normalised to unix-style line endings (\n). # The motivation is that Jinja normalises newlines during templating and # we want consistent mapping between the raw and templated slices. in_str = self._normalise_newlines(in_str) # Since Linter.__init__() does not require a dialect to be specified, # check for one now. (We're processing a string, not a file, so we're # not going to pick up a .sqlfluff or other config file to provide a # missing dialect at this point.) config.verify_dialect_specified() if not config.get("templater_obj") == self.templater: linter_logger.warning(( f"Attempt to set templater to {config.get('templater_obj').name} " f"failed. Using {self.templater.name} templater. Templater cannot " "be set in a .sqlfluff file in a subdirectory of the current " "working directory. It can be set in a .sqlfluff in the current " "working directory. See Nesting section of the docs for more " "details.")) try: templated_file, templater_violations = self.templater.process( in_str=in_str, fname=fname, config=config, formatter=self.formatter) except SQLFluffSkipFile as s: # pragma: no cover linter_logger.warning(str(s)) templated_file = None templater_violations = [] if not templated_file: linter_logger.info("TEMPLATING FAILED: %s", templater_violations) # Record time time_dict = {"templating": time.monotonic() - t0} return RenderedFile( templated_file, templater_violations, config, time_dict, fname, encoding, in_str, )
def assert_rule_pass_in_sql(code, sql, configs=None): """Assert that a given rule doesn't fail on the given sql.""" # Configs allows overrides if we want to use them. cfg = FluffConfig(configs=configs) r = get_rule_from_set(code, config=cfg) parsed = Linter(config=cfg).parse_string(sql) if parsed.violations: pytest.fail(parsed.violations[0].desc() + "\n" + parsed.tree.stringify()) print(f"Parsed:\n {parsed.tree.stringify()}") lerrs, _, _, _ = r.crawl(parsed.tree, dialect=cfg.get("dialect_obj")) print(f"Errors Found: {lerrs}") if any(v.rule.code == code for v in lerrs): pytest.fail(f"Found {code} failures in query which should pass.", pytrace=False)
def _wrapped(self, *, in_str: str, fname: str, config: FluffConfig = None, **kwargs): if config: limit = config.get("large_file_skip_char_limit") if limit: templater_logger.warning( "The config value large_file_skip_char_limit was found set. " "This feature will be removed in a future release, please " "use the more efficient 'large_file_skip_byte_limit' instead." ) if limit and len(in_str) > limit: raise SQLFluffSkipFile( f"Length of file {fname!r} is over {limit} characters. " "Skipping to avoid parser lock. Users can increase this limit " "in their config by setting the 'large_file_skip_char_limit' " "value, or disable by setting it to zero.") return func(self, in_str=in_str, fname=fname, config=config, **kwargs)
def assert_rule_pass_in_sql(code, sql, configs=None): """Assert that a given rule doesn't fail on the given sql.""" # Configs allows overrides if we want to use them. cfg = FluffConfig(configs=configs) r = get_rule_from_set(code, config=cfg) linter = Linter(config=cfg) rendered = linter.render_string(sql, fname="<STR>", config=cfg, encoding="utf-8") parsed = linter.parse_rendered(rendered, recurse=True) if parsed.violations: pytest.fail(parsed.violations[0].desc() + "\n" + parsed.tree.stringify()) print(f"Parsed:\n {parsed.tree.stringify()}") lerrs, _, _, _ = r.crawl(parsed.tree, [], dialect=cfg.get("dialect_obj"), templated_file=rendered[0]) print(f"Errors Found: {lerrs}") if any(v.rule.code == code for v in lerrs): pytest.fail(f"Found {code} failures in query which should pass.", pytrace=False)
def lint_fix_parsed( cls, tree: BaseSegment, config: FluffConfig, rule_set: List[BaseRule], fix: bool = False, fname: Optional[str] = None, templated_file: Optional[TemplatedFile] = None, formatter: Any = None, ) -> Tuple[BaseSegment, List[SQLBaseError], List[NoQaDirective]]: """Lint and optionally fix a tree object.""" # Keep track of the linting errors on the very first linter pass. The # list of issues output by "lint" and "fix" only includes issues present # in the initial SQL code, EXCLUDING any issues that may be created by # the fixes themselves. initial_linting_errors = [] # A placeholder for the fixes we had on the previous loop last_fixes = None # Keep a set of previous versions to catch infinite loops. previous_versions: Set[Tuple[str, Tuple[SourceFix, ...]]] = {(tree.raw, ())} # If we are fixing then we want to loop up to the runaway_limit, otherwise just # once for linting. loop_limit = config.get("runaway_limit") if fix else 1 # Dispatch the output for the lint header if formatter: formatter.dispatch_lint_header(fname) # Look for comment segments which might indicate lines to ignore. if not config.get("disable_noqa"): rule_codes = [r.code for r in rule_set] ignore_buff, ivs = cls.extract_ignore_mask_tree(tree, rule_codes) initial_linting_errors += ivs else: ignore_buff = [] save_tree = tree # There are two phases of rule running. # 1. The main loop is for most rules. These rules are assumed to # interact and cause a cascade of fixes requiring multiple passes. # These are run the `runaway_limit` number of times (default 10). # 2. The post loop is for post-processing rules, not expected to trigger # any downstream rules, e.g. capitalization fixes. They are run on the # first loop and then twice at the end (once to fix, and once again to # check result of fixes), but not in the intervening loops. phases = ["main"] if fix: phases.append("post") for phase in phases: if len(phases) > 1: rules_this_phase = [ rule for rule in rule_set if rule.lint_phase == phase ] else: rules_this_phase = rule_set for loop in range(loop_limit if phase == "main" else 2): def is_first_linter_pass(): return phase == phases[0] and loop == 0 # Additional newlines are to assist in scanning linting loops # during debugging. linter_logger.info( f"\n\nEntering linter phase {phase}, loop {loop+1}/{loop_limit}\n" ) changed = False if is_first_linter_pass(): # In order to compute initial_linting_errors correctly, need # to run all rules on the first loop of the main phase. rules_this_phase = rule_set progress_bar_crawler = tqdm( rules_this_phase, desc="lint by rules", leave=False, disable=progress_bar_configuration.disable_progress_bar, ) for crawler in progress_bar_crawler: # Performance: After first loop pass, skip rules that don't # do fixes. Any results returned won't be seen by the user # anyway (linting errors ADDED by rules changing SQL, are # not reported back to the user - only initial linting errors), # so there's absolutely no reason to run them. if (fix and not is_first_linter_pass() and not is_fix_compatible(crawler)): continue progress_bar_crawler.set_description( f"rule {crawler.code}") # fixes should be a dict {} with keys edit, delete, create # delete is just a list of segments to delete # edit and create are list of tuples. The first element is # the "anchor", the segment to look for either to edit or to # insert BEFORE. The second is the element to insert or create. linting_errors, _, fixes, _ = crawler.crawl( tree, dialect=config.get("dialect_obj"), fix=fix, templated_file=templated_file, ignore_mask=ignore_buff, fname=fname, ) if is_first_linter_pass(): initial_linting_errors += linting_errors if fix and fixes: linter_logger.info( f"Applying Fixes [{crawler.code}]: {fixes}") # Do some sanity checks on the fixes before applying. anchor_info = BaseSegment.compute_anchor_edit_info( fixes) if any(not info.is_valid for info in anchor_info.values()): # pragma: no cover message = ( f"Rule {crawler.code} returned conflicting " "fixes with the same anchor. This is only " "supported for create_before+create_after, so " "the fixes will not be applied. {fixes!r}") cls._report_conflicting_fixes_same_anchor(message) for lint_result in linting_errors: lint_result.fixes = [] elif fixes == last_fixes: # pragma: no cover # If we generate the same fixes two times in a row, # that means we're in a loop, and we want to stop. # (Fixes should address issues, hence different # and/or fewer fixes next time.) cls._warn_unfixable(crawler.code) else: # This is the happy path. We have fixes, now we want to # apply them. last_fixes = fixes new_tree, _, _ = tree.apply_fixes( config.get("dialect_obj"), crawler.code, anchor_info) # Check for infinite loops. We use a combination of the # fixed templated file and the list of source fixes to # apply. loop_check_tuple = ( new_tree.raw, tuple(new_tree.source_fixes), ) if loop_check_tuple not in previous_versions: # We've not seen this version of the file so # far. Continue. tree = new_tree previous_versions.add(loop_check_tuple) changed = True continue else: # Applying these fixes took us back to a state # which we've seen before. We're in a loop, so # we want to stop. cls._warn_unfixable(crawler.code) if fix and not changed: # We did not change the file. Either the file is clean (no # fixes), or any fixes which are present will take us back # to a previous state. linter_logger.info( f"Fix loop complete for {phase} phase. Stability " f"achieved after {loop}/{loop_limit} loops.") break else: if fix: # The linter loop hit the limit before reaching a stable point # (i.e. free of lint errors). If this happens, it's usually # because one or more rules produced fixes which did not address # the original issue **or** created new issues. linter_logger.warning( f"Loop limit on fixes reached [{loop_limit}].") # Discard any fixes for the linting errors, since they caused a # loop. IMPORTANT: By doing this, we are telling SQLFluff that # these linting errors are "unfixable". This is important, # because when "sqlfluff fix" encounters unfixable lint errors, # it exits with a "failure" exit code, which is exactly what we # want in this situation. (Reason: Although this is more of an # internal SQLFluff issue, users deserve to know about it, # because it means their file(s) weren't fixed. for violation in initial_linting_errors: if isinstance(violation, SQLLintError): violation.fixes = [] # Return the original parse tree, before any fixes were applied. # Reason: When the linter hits the loop limit, the file is often # messy, e.g. some of the fixes were applied repeatedly, possibly # other weird things. We don't want the user to see this junk! return save_tree, initial_linting_errors, ignore_buff if config.get("ignore_templated_areas", default=True): initial_linting_errors = cls.remove_templated_errors( initial_linting_errors) return tree, initial_linting_errors, ignore_buff
def _lex_templated_file( templated_file: TemplatedFile, config: FluffConfig ) -> Tuple[Optional[Sequence[BaseSegment]], List[SQLLexError], FluffConfig]: """Lex a templated file. NOTE: This potentially mutates the config, so make sure to use the returned one. """ violations = [] linter_logger.info("LEXING RAW (%s)", templated_file.fname) # Get the lexer lexer = Lexer(config=config) # Lex the file and log any problems try: tokens, lex_vs = lexer.lex(templated_file) # We might just get the violations as a list violations += lex_vs linter_logger.info("Lexed tokens: %s", [seg.raw for seg in tokens] if tokens else None) except SQLLexError as err: linter_logger.info("LEXING FAILED! (%s): %s", templated_file.fname, err) violations.append(err) return None, violations, config if not tokens: # pragma: no cover TODO? return None, violations, config # Check that we've got sensible indentation from the lexer. # We might need to suppress if it's a complicated file. templating_blocks_indent = config.get("template_blocks_indent", "indentation") if isinstance(templating_blocks_indent, str): force_block_indent = templating_blocks_indent.lower().strip( ) == "force" else: force_block_indent = False templating_blocks_indent = bool(templating_blocks_indent) # If we're forcing it through we don't check. if templating_blocks_indent and not force_block_indent: indent_balance = sum( getattr(elem, "indent_val", 0) for elem in cast(Tuple[BaseSegment, ...], tokens)) if indent_balance != 0: linter_logger.debug( "Indent balance test failed for %r. Template indents will not be " "linted for this file.", templated_file.fname, ) # Don't enable the templating blocks. templating_blocks_indent = False # The file will have been lexed without config, so check all indents # are enabled. new_tokens = [] for token in cast(Tuple[BaseSegment, ...], tokens): if token.is_meta: token = cast(MetaSegment, token) if token.indent_val != 0: # Don't allow it if we're not linting templating block indents. if not templating_blocks_indent: continue new_tokens.append(token) # Return new buffer return new_tokens, violations, config
def lint_fix_parsed( cls, tree: BaseSegment, config: FluffConfig, rule_set: List[BaseRule], fix: bool = False, fname: Optional[str] = None, templated_file: Optional[TemplatedFile] = None, formatter: Any = None, ) -> Tuple[BaseSegment, List[SQLBaseError], List[NoQaDirective]]: """Lint and optionally fix a tree object.""" # Keep track of the linting errors all_linting_errors = [] # A placeholder for the fixes we had on the previous loop last_fixes = None # Keep a set of previous versions to catch infinite loops. previous_versions = {tree.raw} # If we are fixing then we want to loop up to the runaway_limit, otherwise just once for linting. loop_limit = config.get("runaway_limit") if fix else 1 # Dispatch the output for the lint header if formatter: formatter.dispatch_lint_header(fname) # Look for comment segments which might indicate lines to ignore. ignore_buff, ivs = cls.extract_ignore_mask(tree) all_linting_errors += ivs for loop in range(loop_limit): changed = False for crawler in rule_set: # fixes should be a dict {} with keys edit, delete, create # delete is just a list of segments to delete # edit and create are list of tuples. The first element is the # "anchor", the segment to look for either to edit or to insert BEFORE. # The second is the element to insert or create. linting_errors, _, fixes, _ = crawler.crawl( tree, ignore_mask=ignore_buff, dialect=config.get("dialect_obj"), fname=fname, templated_file=templated_file, ) all_linting_errors += linting_errors if fix and fixes: linter_logger.info(f"Applying Fixes [{crawler.code}]: {fixes}") # Do some sanity checks on the fixes before applying. if fixes == last_fixes: # pragma: no cover cls._warn_unfixable(crawler.code) else: last_fixes = fixes new_tree, _ = tree.apply_fixes(fixes) # Check for infinite loops if new_tree.raw not in previous_versions: # We've not seen this version of the file so far. Continue. tree = new_tree previous_versions.add(tree.raw) changed = True continue else: # Applying these fixes took us back to a state which we've # seen before. Abort. cls._warn_unfixable(crawler.code) if loop == 0: # Keep track of initial errors for reporting. initial_linting_errors = all_linting_errors.copy() if fix and not changed: # We did not change the file. Either the file is clean (no fixes), or # any fixes which are present will take us back to a previous state. linter_logger.info( f"Fix loop complete. Stability achieved after {loop}/{loop_limit} loops." ) break if fix and loop + 1 == loop_limit: linter_logger.warning(f"Loop limit on fixes reached [{loop_limit}].") if config.get("ignore_templated_areas", default=True): initial_linting_errors = cls.remove_templated_errors(initial_linting_errors) return tree, initial_linting_errors, ignore_buff
def lint_fix_parsed( cls, tree: BaseSegment, config: FluffConfig, rule_set: List[BaseRule], fix: bool = False, fname: Optional[str] = None, templated_file: Optional[TemplatedFile] = None, formatter: Any = None, ) -> Tuple[BaseSegment, List[SQLBaseError], List[NoQaDirective]]: """Lint and optionally fix a tree object.""" # Keep track of the linting errors all_linting_errors = [] # A placeholder for the fixes we had on the previous loop last_fixes = None # Keep a set of previous versions to catch infinite loops. previous_versions = {tree.raw} # If we are fixing then we want to loop up to the runaway_limit, otherwise just # once for linting. loop_limit = config.get("runaway_limit") if fix else 1 # Dispatch the output for the lint header if formatter: formatter.dispatch_lint_header(fname) # Look for comment segments which might indicate lines to ignore. if not config.get("disable_noqa"): rule_codes = [r.code for r in rule_set] ignore_buff, ivs = cls.extract_ignore_mask_tree(tree, rule_codes) all_linting_errors += ivs else: ignore_buff = [] save_tree = tree for loop in range(loop_limit): changed = False progress_bar_crawler = tqdm( rule_set, desc="lint by rules", leave=False, disable=progress_bar_configuration.disable_progress_bar, ) for crawler in progress_bar_crawler: progress_bar_crawler.set_description(f"rule {crawler.code}") # fixes should be a dict {} with keys edit, delete, create # delete is just a list of segments to delete # edit and create are list of tuples. The first element is the # "anchor", the segment to look for either to edit or to insert BEFORE. # The second is the element to insert or create. linting_errors, _, fixes, _ = crawler.crawl( tree, ignore_mask=ignore_buff, dialect=config.get("dialect_obj"), fname=fname, templated_file=templated_file, ) all_linting_errors += linting_errors if fix and fixes: linter_logger.info( f"Applying Fixes [{crawler.code}]: {fixes}") # Do some sanity checks on the fixes before applying. if fixes == last_fixes: # pragma: no cover cls._warn_unfixable(crawler.code) else: last_fixes = fixes new_tree, _ = tree.apply_fixes( config.get("dialect_obj"), fixes) # Check for infinite loops if new_tree.raw not in previous_versions: # We've not seen this version of the file so far. Continue. tree = new_tree previous_versions.add(tree.raw) changed = True continue else: # Applying these fixes took us back to a state which we've # seen before. Abort. cls._warn_unfixable(crawler.code) if loop == 0: # Keep track of initial errors for reporting. initial_linting_errors = all_linting_errors.copy() if fix and not changed: # We did not change the file. Either the file is clean (no fixes), or # any fixes which are present will take us back to a previous state. linter_logger.info( f"Fix loop complete. Stability achieved after {loop}/{loop_limit} " "loops.") break else: if fix: # The linter loop hit the limit before reaching a stable point # (i.e. free of lint errors). If this happens, it's usually # because one or more rules produced fixes which did not address # the original issue **or** created new issues. linter_logger.warning( f"Loop limit on fixes reached [{loop_limit}].") # Discard any fixes for the linting errors, since they caused a # loop. IMPORTANT: By doing this, we are telling SQLFluff that # these linting errors are "unfixable". This is important, # because when "sqlfluff fix" encounters unfixable lint errors, # it exits with a "failure" exit code, which is exactly what we # want in this situation. (Reason: Although this is more of an # internal SQLFluff issue, users deserve to know about it, # because it means their file(s) weren't fixed. for violation in initial_linting_errors: if isinstance(violation, SQLLintError): violation.fixes = [] # Return the original parse tree, before any fixes were applied. # Reason: When the linter hits the loop limit, the file is often # messy, e.g. some of the fixes were applied repeatedly, possibly # other weird things. We don't want the user to see this junk! return save_tree, initial_linting_errors, ignore_buff if config.get("ignore_templated_areas", default=True): initial_linting_errors = cls.remove_templated_errors( initial_linting_errors) return tree, initial_linting_errors, ignore_buff