def test_singleton(self): """Test to ensure mutiple calls to benchit return the same object """ b = BenchIt() b2 = BenchIt() self.assertEqual(b, b2) self.assertEqual(b.timer_name, "BenchIt") b3 = BenchIt("New Bencher") self.assertNotEqual(b, b3) self.assertEqual(b3.timer_name, "New Bencher") b4 = BenchIt(timer_name="New Bencher")
def test_benchit(self): """ Full benchmark test. Test starting, marking, stopping, and displaying the benchmark. """ b = BenchIt() # Ensure start key is set on init self.assertEqual(len(b), 1) self.assertIn('_start', b.keys()) # Set a marker b.mark('Unique Marker') self.assertIn('Unique Marker', b.keys()) # Stop the benchmark b.stop() self.assertIn('_end', b.keys()) # Ensure end time is after the start time self.assertGreater(b['_end'], b['_start']) # Check the output table with capture(b.display) as output: self.assertIn("Avg Time", output) self.assertIn("Unique Marker", output) self.assertIn("Total runtime", output) self.assertIn("test_benchit", output)
def demo_benchmark(): b = BenchIt() # starts the timer # do stuff sleep(1); b() sleep(.2) sleep(.3) sleep(.4) b("More stuff Done") for i in range(1, 5): sleep(.1); b() b.display()
def test_benchit(self): """ Full benchmark test. Test starting, marking, stopping, and displaying the benchmark. """ b = BenchIt() # Ensure start key is set on init self.assertEqual(len(b.times), 1) self.assertIn('_start', b.times.keys()) # Set a marker "code marker" b() "more code" b("__call__ marker") b.mark('Unique Marker') self.assertIn('"code marker"', b.times.keys()) self.assertIn('__call__ marker', b.times.keys()) self.assertIn('Unique Marker', b.times.keys()) # Stop the benchmark b.stop() self.assertIn('_end', b.times.keys()) # Ensure end time is after the start time self.assertGreater(b.times['_end'], b.times['_start']) # Check the output table with capture(b.display) as output: self.assertIn("Avg Time", output) self.assertIn("Unique Marker", output) self.assertIn("Total runtime", output) self.assertIn("test_benchit", output)
def parse(self, parse_context=None, parse_grammar=None): """Use the parse grammar to find subsegments within this segment. A large chunk of the logic around this can be found in the `expand` method. Use the parse setting in the context for testing, mostly to check how deep to go. True/False for yes or no, an integer allows a certain number of levels. Optionally, this method allows a custom parse grammar to be provided which will override any existing parse grammar on the segment. """ # Clear the blacklist cache so avoid missteps if parse_context: parse_context.blacklist.clear() # the parse_depth and recurse kwargs control how deep we will recurse for testing. if not self.segments: # This means we're a root segment, just return an unmutated self return self # Check the Parse Grammar parse_grammar = parse_grammar or self.parse_grammar if parse_grammar is None: # No parse grammar, go straight to expansion parse_context.logger.debug( "{0}.parse: no grammar. Going straight to expansion".format( self.__class__.__name__)) else: # For debugging purposes. Ensure that we don't have non-code elements # at the start or end of the segments. They should always in the middle, # or in the parent expression. segments = self.segments if self.can_start_end_non_code: pre_nc, segments, post_nc = trim_non_code_segments(segments) else: pre_nc = () post_nc = () if (not segments[0].is_code) and (not segments[0].is_meta): raise ValueError( "Segment {0} starts with non code segment: {1!r}.\n{2!r}" .format(self, segments[0].raw, segments)) if (not segments[-1].is_code) and (not segments[-1].is_meta): raise ValueError( "Segment {0} ends with non code segment: {1!r}.\n{2!r}" .format(self, segments[-1].raw, segments)) # NOTE: No match_depth kwarg, because this is the start of the matching. with parse_context.matching_segment( self.__class__.__name__) as ctx: m = parse_grammar.match(segments=segments, parse_context=ctx) if not isinstance(m, MatchResult): raise TypeError( "[PD:{0}] {1}.match. Result is {2}, not a MatchResult!". format(parse_context.parse_depth, self.__class__.__name__, type(m))) # Basic Validation, that we haven't dropped anything. check_still_complete(segments, m.matched_segments, m.unmatched_segments) if m.has_match(): if m.is_complete(): # Complete match, happy days! self.segments = pre_nc + m.matched_segments + post_nc else: # Incomplete match. # For now this means the parsing has failed. Lets add the unmatched bit at the # end as something unparsable. # TODO: Do something more intelligent here. self.segments = ( pre_nc + m.matched_segments + (UnparsableSegment( segments=m.unmatched_segments + post_nc, expected="Nothing...", ), )) elif self.allow_empty and not segments: # Very edge case, but some segments are allowed to be empty other than non-code self.segments = pre_nc + post_nc else: # If there's no match at this stage, then it's unparsable. That's # a problem at this stage so wrap it in an unparsable segment and carry on. self.segments = ( pre_nc + ( UnparsableSegment( segments=segments, expected=self.name, ), # NB: tuple ) + post_nc) bencher = BenchIt() # starts the timer bencher("Parse complete of {0!r}".format(self.__class__.__name__)) # Recurse if allowed (using the expand method to deal with the expansion) parse_context.logger.debug( "{0}.parse: Done Parse. Plotting Recursion. Recurse={1!r}".format( self.__class__.__name__, parse_context.recurse)) parse_depth_msg = "###\n#\n# Beginning Parse Depth {0}: {1}\n#\n###\nInitial Structure:\n{2}".format( parse_context.parse_depth + 1, self.__class__.__name__, self.stringify()) if parse_context.may_recurse(): parse_context.logger.debug(parse_depth_msg) with parse_context.deeper_parse() as ctx: self.segments = self.expand(self.segments, parse_context=ctx) return self
def fix(force, paths, parallel, bench=False, fixed_suffix="", logger=None, **kwargs): """Fix SQL files. PATH is the path to a sql file or directory to lint. This can be either a file ('path/to/file.sql'), a path ('directory/of/sql/files'), a single ('-') character to indicate reading from *stdin* or a dot/blank ('.'/' ') which will be interpreted like passing the current working directory as a path argument. """ # some quick checks fixing_stdin = ("-", ) == paths c = get_config(**kwargs) lnt, formatter = get_linter_and_formatter(c, silent=fixing_stdin) verbose = c.get("verbose") bencher = BenchIt() formatter.dispatch_config(lnt) # Set up logging. set_logging_level(verbosity=verbose, logger=logger, stderr_output=fixing_stdin) # handle stdin case. should output formatted sql to stdout and nothing else. if fixing_stdin: stdin = sys.stdin.read() # TODO: Remove verbose result = lnt.lint_string_wrapped(stdin, fname="stdin", fix=True) stdout = result.paths[0].files[0].fix_string()[0] click.echo(stdout, nl=False) sys.exit() # Lint the paths (not with the fix argument at this stage), outputting as we go. click.echo("==== finding fixable violations ====") try: result = lnt.lint_paths(paths, fix=True, ignore_non_existent_files=False, parallel=parallel) except IOError: click.echo( colorize( "The path(s) {0!r} could not be accessed. Check it/they exist(s)." .format(paths), "red", )) sys.exit(1) # NB: We filter to linting violations here, because they're # the only ones which can be potentially fixed. if result.num_violations(types=SQLLintError, fixable=True) > 0: click.echo("==== fixing violations ====") click.echo("{0} fixable linting violations found".format( result.num_violations(types=SQLLintError, fixable=True))) if force: click.echo(colorize("FORCE MODE", "red") + ": Attempting fixes...") # TODO: Remove verbose success = do_fixes( lnt, result, formatter, types=SQLLintError, fixed_file_suffix=fixed_suffix, ) if not success: sys.exit(1) else: click.echo("Are you sure you wish to attempt to fix these? [Y/n] ", nl=False) c = click.getchar().lower() click.echo("...") if c in ("y", "\r", "\n"): click.echo("Attempting fixes...") # TODO: Remove verbose success = do_fixes( lnt, result, formatter, types=SQLLintError, fixed_file_suffix=fixed_suffix, ) if not success: sys.exit(1) else: click.echo("All Finished 📜 🎉!") elif c == "n": click.echo("Aborting...") else: click.echo("Invalid input, please enter 'Y' or 'N'") click.echo("Aborting...") else: click.echo("==== no fixable linting violations found ====") if result.num_violations(types=SQLLintError, fixable=False) > 0: click.echo(" [{0} unfixable linting violations found]".format( result.num_violations(types=SQLLintError, fixable=False))) click.echo("All Finished 📜 🎉!") if bench: click.echo("\n\n==== bencher stats ====") bencher.display() sys.exit(0)
def parse(path, code_only, format, profiler, bench, **kwargs): """Parse SQL files and just spit out the result. PATH is the path to a sql file or directory to lint. This can be either a file ('path/to/file.sql'), a path ('directory/of/sql/files'), a single ('-') character to indicate reading from *stdin* or a dot/blank ('.'/' ') which will be interpreted like passing the current working directory as a path argument. """ # Initialise the benchmarker bencher = BenchIt() # starts the timer c = get_config(**kwargs) # We don't want anything else to be logged if we want a yaml output lnt = get_linter(c, silent=format in ('json', 'yaml')) verbose = c.get('verbose') recurse = c.get('recurse') config_string = format_config(lnt, verbose=verbose) if len(config_string) > 0: lnt.log(config_string) # TODO: do this better nv = 0 if profiler: # Set up the profiler if required try: import cProfile except ImportError: lnt.log('The cProfiler is not available on your platform.') sys.exit(1) pr = cProfile.Profile() pr.enable() bencher("Parse setup") try: # handle stdin if specified via lone '-' if '-' == path: # put the parser result in a list to iterate later config = lnt.config.make_child_from_path('stdin') result = [lnt.parse_string( sys.stdin.read(), 'stdin', verbosity=verbose, recurse=recurse, config=config )] else: # A single path must be specified for this command result = lnt.parse_path(path, verbosity=verbose, recurse=recurse) # iterative print for human readout if format == 'human': for parsed, violations, time_dict in result: if parsed: lnt.log(parsed.stringify(code_only=code_only)) else: # TODO: Make this prettier lnt.log('...Failed to Parse...') nv += len(violations) for v in violations: lnt.log(format_violation(v, verbose=verbose)) if verbose >= 2: lnt.log("==== timings ====") lnt.log(cli_table(time_dict.items())) bencher("Output details for file") else: # collect result and print as single payload # will need to zip in the file paths filepaths = ['stdin'] if '-' == path else lnt.paths_from_path(path) result = [ dict( filepath=filepath, segments=parsed.as_record(code_only=code_only, show_raw=True) ) for filepath, (parsed, _, _) in zip(filepaths, result) ] if format == 'yaml': # For yaml dumping always dump double quoted strings if they contain tabs or newlines. def quoted_presenter(dumper, data): """Representer which always double quotes string values needing escapes.""" if '\n' in data or '\t' in data: return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='"') else: return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='') yaml.add_representer(str, quoted_presenter) click.echo(yaml.dump(result)) elif format == 'json': click.echo(json.dumps(result)) except IOError: click.echo(colorize('The path {0!r} could not be accessed. Check it exists.'.format(path), 'red')) sys.exit(1) if profiler: pr.disable() profiler_buffer = StringIO() ps = pstats.Stats( pr, stream=profiler_buffer ).sort_stats('cumulative') ps.print_stats() lnt.log("==== profiler stats ====") # Only print the first 50 lines of it lnt.log('\n'.join(profiler_buffer.getvalue().split('\n')[:50])) if bench: lnt.log("\n\n==== bencher stats ====") bencher.display() if nv > 0: sys.exit(66) else: sys.exit(0)
def parse(self, parse_context=None): """Use the parse grammar to find subsegments within this segment. A large chunk of the logic around this can be found in the `expand` method. Use the parse setting in the context for testing, mostly to check how deep to go. True/False for yes or no, an integer allows a certain number of levels. """ if not parse_context.dialect: raise RuntimeError("No dialect provided to {0!r}!".format(self)) # Clear the blacklist cache so avoid missteps if parse_context: parse_context.blacklist.clear() # the parse_depth and recurse kwargs control how deep we will recurse for testing. if not self.segments: # This means we're a root segment, just return an unmutated self return self # Get the Parse Grammar g = self._parse_grammar() if g is None: # No parse grammar, go straight to expansion logging.debug("{0}.parse: no grammar. Going straight to expansion".format(self.__class__.__name__)) else: # Use the Parse Grammar (and the private method) # For debugging purposes. Ensure that we don't have non-code elements # at the start or end of the segments. They should always in the middle, # or in the parent expression. if not self._can_start_end_non_code: if (not self.segments[0].is_code) and (not self.segments[0].is_meta): raise ValueError("Segment {0} starts with non code segment: {1!r}.\n{2!r}".format( self, self.segments[0].raw, self.segments)) if (not self.segments[-1].is_code) and (not self.segments[-1].is_meta): raise ValueError("Segment {0} ends with non code segment: {1!r}.\n{2!r}".format( self, self.segments[-1].raw, self.segments)) # NOTE: No match_depth kwarg, because this is the start of the matching. m = g._match( segments=self.segments, parse_context=parse_context.copy( match_segment=self.__class__.__name__ ) ) if not isinstance(m, MatchResult): raise TypeError( "[PD:{0}] {1}.match. Result is {2}, not a MatchResult!".format( parse_context.parse_depth, self.__class__.__name__, type(m))) # Basic Validation, that we haven't dropped anything. check_still_complete(self.segments, m.matched_segments, m.unmatched_segments) if m.has_match(): if m.is_complete(): # Complete match, happy days! self.segments = m.matched_segments else: # Incomplete match. # For now this means the parsing has failed. Lets add the unmatched bit at the # end as something unparsable. # TODO: Do something more intelligent here. self.segments = m.matched_segments + (UnparsableSegment( segments=m.unmatched_segments, expected="Nothing..."),) else: # If there's no match at this stage, then it's unparsable. That's # a problem at this stage so wrap it in an unparable segment and carry on. self.segments = (UnparsableSegment( segments=self.segments, expected=g.expected_string(dialect=parse_context.dialect)),) # NB: tuple # Validate new segments self.validate_segments(text="parsing") bencher = BenchIt() # starts the timer bencher("Parse complete of {0!r}".format(self.__class__.__name__)) # Recurse if allowed (using the expand method to deal with the expansion) logging.debug( "{0}.parse: Done Parse. Plotting Recursion. Recurse={1!r}".format( self.__class__.__name__, parse_context.recurse)) parse_depth_msg = "###\n#\n# Beginning Parse Depth {0}: {1}\n#\n###\nInitial Structure:\n{2}".format( parse_context.parse_depth + 1, self.__class__.__name__, self.stringify()) if parse_context.recurse is True: logging.debug(parse_depth_msg) self.segments = self.expand( self.segments, parse_context=parse_context.copy( incr='parse_depth', match_depth=0, recurse=True ) ) elif isinstance(parse_context.recurse, int): if parse_context.recurse > 1: logging.debug(parse_depth_msg) self.segments = self.expand( self.segments, parse_context=parse_context.copy(decr='recurse', incr='parse_depth') ) # Validate new segments self.validate_segments(text="expanding") return self
def parse_string(self, s, fname=None, verbosity=0, recurse=True, config=None): """Parse a string. Returns: `tuple` of (`parsed`, `violations`, `time_dict`, `config_diff`). `parsed` is a segment structure representing the parsed file. If parsing fails due to an inrecoverable violation then we will return None. `violations` is a :obj:`list` of violations so far, which will either be templating, lexing or parsing violations at this stage. `time_dict` is a :obj:`dict` containing timings for how long each step took in the process. """ violations = [] t0 = time.monotonic() bencher = BenchIt() # starts the timer if fname: short_fname = fname.replace('\\', '/').split('/')[-1] else: # this handles to potential case of a null fname short_fname = fname bencher("Staring parse_string for {0!r}".format(short_fname)) # Log the start of this process if we're in a more verbose mode. if verbosity > 1: self.log(format_filename(filename=fname, success='PARSING', verbose=verbosity)) # This is where we output config diffs if they exist. if config: # Only output config diffs if there is a config to diff to. config_diff = config.diff_to(self.config) if config_diff: self.log(" Config Diff:") self.log(format_config_vals(self.config.iter_vals(cfg=config_diff))) verbosity_logger("TEMPLATING RAW [{0}] ({1})".format(self.templater.name, fname), verbosity=verbosity) try: s = self.templater.process(s, fname=fname, config=config or self.config) except SQLTemplaterError as err: violations.append(err) file_segment = None # NB: We'll carry on if we fail to template, it might still lex t1 = time.monotonic() bencher("Templating {0!r}".format(short_fname)) if s: verbosity_logger("LEXING RAW ({0})".format(fname), verbosity=verbosity) # Lex the file and log any problems try: file_segment, lex_vs = FileSegment.from_raw(s, config=config or self.config) # We might just get the violations as a list violations += lex_vs except SQLLexError as err: violations.append(err) file_segment = None else: file_segment = None if file_segment: verbosity_logger(file_segment.stringify(), verbosity=verbosity) t2 = time.monotonic() bencher("Lexing {0!r}".format(short_fname)) verbosity_logger("PARSING ({0})".format(fname), verbosity=verbosity) # Parse the file and log any problems if file_segment: try: # Make a parse context and parse context = self.get_parse_context() context.verbosity = verbosity or context.verbosity context.recurse = recurse or context.recurse parsed = file_segment.parse(parse_context=context) except SQLParseError as err: violations.append(err) parsed = None if parsed: verbosity_logger(frame_msg("Parsed Tree:"), verbosity=verbosity) verbosity_logger(parsed.stringify(), verbosity=verbosity) else: parsed = None t3 = time.monotonic() time_dict = {'templating': t1 - t0, 'lexing': t2 - t1, 'parsing': t3 - t2} bencher("Finish parsing {0!r}".format(short_fname)) return parsed, violations, time_dict
def parse_string(self, s, fname=None, verbosity=0, recurse=True, config=None): """Parse a string. Returns: `tuple` of (`parsed`, `violations`, `time_dict`, `config_diff`). `parsed` is a segment structure representing the parsed file. If parsing fails due to an inrecoverable violation then we will return None. `violations` is a :obj:`list` of violations so far, which will either be templating, lexing or parsing violations at this stage. `time_dict` is a :obj:`dict` containing timings for how long each step took in the process. """ violations = [] t0 = time.monotonic() bencher = BenchIt() # starts the timer if fname: short_fname = fname.replace('\\', '/').split('/')[-1] else: # this handles to potential case of a null fname short_fname = fname bencher("Staring parse_string for {0!r}".format(short_fname)) # Log the start of this process if we're in a more verbose mode. if verbosity > 1: self.log( format_filename(filename=fname, success='PARSING', verbose=verbosity)) # This is where we output config diffs if they exist. if config: # Only output config diffs if there is a config to diff to. config_diff = config.diff_to(self.config) if config_diff: self.log(" Config Diff:") self.log( format_config_vals( self.config.iter_vals(cfg=config_diff))) verbosity_logger("TEMPLATING RAW [{0}] ({1})".format( self.templater.name, fname), verbosity=verbosity) s, templater_violations = self.templater.process(s, fname=fname, config=config or self.config) violations += templater_violations # Detect the case of a catastrophic templater fail. In this case # we don't continue. We'll just bow out now. if not s: file_segment = None t1 = time.monotonic() bencher("Templating {0!r}".format(short_fname)) if s: verbosity_logger("LEXING RAW ({0})".format(fname), verbosity=verbosity) # Lex the file and log any problems try: file_segment, lex_vs = FileSegment.from_raw(s, config=config or self.config) # We might just get the violations as a list violations += lex_vs except SQLLexError as err: violations.append(err) file_segment = None else: file_segment = None if file_segment: verbosity_logger(file_segment.stringify(), verbosity=verbosity) t2 = time.monotonic() bencher("Lexing {0!r}".format(short_fname)) verbosity_logger("PARSING ({0})".format(fname), verbosity=verbosity) # Parse the file and log any problems if file_segment: try: # Make a parse context and parse context = self.get_parse_context(config=config or self.config) context.verbosity = verbosity or context.verbosity context.recurse = recurse or context.recurse parsed = file_segment.parse(parse_context=context) except SQLParseError as err: violations.append(err) parsed = None if parsed: verbosity_logger(frame_msg("Parsed Tree:"), verbosity=verbosity) verbosity_logger(parsed.stringify(), verbosity=verbosity) # We may succeed parsing, but still have unparsable segments. Extract them here. for unparsable in parsed.iter_unparsables(): # No exception has been raised explicitly, but we still create one here # so that we can use the common interface violations.append( SQLParseError("Found unparsable section: {0!r}".format( unparsable.raw if len(unparsable.raw) < 40 else unparsable.raw[:40] + "..."), segment=unparsable)) if verbosity >= 2: verbosity_logger("Found unparsable segment...", verbosity=verbosity) verbosity_logger(unparsable.stringify(), verbosity=verbosity) else: parsed = None t3 = time.monotonic() time_dict = { 'templating': t1 - t0, 'lexing': t2 - t1, 'parsing': t3 - t2 } bencher("Finish parsing {0!r}".format(short_fname)) return parsed, violations, time_dict
def parse_string(self, in_str, fname=None, recurse=True, config=None): """Parse a string. Returns: `ParsedString` of (`parsed`, `violations`, `time_dict`, `templated_file`). `parsed` is a segment structure representing the parsed file. If parsing fails due to an inrecoverable violation then we will return None. `violations` is a :obj:`list` of violations so far, which will either be templating, lexing or parsing violations at this stage. `time_dict` is a :obj:`dict` containing timings for how long each step took in the process. `templated_file` is a :obj:`TemplatedFile` containing the details of the templated file. """ violations = [] t0 = time.monotonic() bencher = BenchIt() # starts the timer if fname: short_fname = fname.replace("\\", "/").split("/")[-1] else: # this handles the potential case of a null fname short_fname = fname bencher("Staring parse_string for {0!r}".format(short_fname)) # Dispatch the output for the parse header (including the config diff) if self.formatter: self.formatter.dispatch_parse_header(fname, self.config, config) # Just use the local config from here: config = config or self.config # Scan the raw file for config commands. for raw_line in in_str.splitlines(): if raw_line.startswith("-- sqlfluff"): # Found a in-file config command config.process_inline_config(raw_line) linter_logger.info("TEMPLATING RAW [%s] (%s)", self.templater.name, fname) templated_file, templater_violations = self.templater.process( in_str=in_str, fname=fname, config=config) violations += templater_violations # Detect the case of a catastrophic templater fail. In this case # we don't continue. We'll just bow out now. if not templated_file: linter_logger.info("TEMPLATING FAILED: %s", templater_violations) tokens = None t1 = time.monotonic() bencher("Templating {0!r}".format(short_fname)) if templated_file: linter_logger.info("LEXING RAW (%s)", fname) # Get the lexer lexer = Lexer(config=config) # Lex the file and log any problems try: tokens, lex_vs = lexer.lex(templated_file) # We might just get the violations as a list violations += lex_vs except SQLLexError as err: linter_logger.info("LEXING FAILED! (%s): %s", fname, err) violations.append(err) tokens = None else: tokens = None if tokens: linter_logger.info("Lexed tokens: %s", [seg.raw for seg in tokens]) else: linter_logger.info("NO LEXED TOKENS!") if tokens: # Check that we've got sensible indentation from the lexer. # We might need to supress if it's a complicated file. templating_blocks_indent = config.get("template_blocks_indent", "indentation") if isinstance(templating_blocks_indent, str): force_block_indent = templating_blocks_indent.lower().strip( ) == "force" else: force_block_indent = False templating_blocks_indent = bool(templating_blocks_indent) # If we're forcing it through we don't check. if templating_blocks_indent and not force_block_indent: indent_balance = sum( getattr(elem, "indent_val", 0) for elem in tokens) if indent_balance != 0: linter_logger.warning( "Indent balance test failed for %r. Template indents will not be linted for this file.", fname, ) # Don't enable the templating blocks. templating_blocks_indent = False # Disable the linting of L003 on templated tokens. config.set_value( ["rules", "L003", "lint_templated_tokens"], False) # The file will have been lexed without config, so check all indents # are enabled. new_tokens = [] for token in tokens: if token.is_meta: if token.indent_val != 0: # Don't allow it if we're not linting templating block indents. if not templating_blocks_indent: continue # Don't allow if it's not configure to function. elif not token.is_enabled(indent_config=config. get_section("indentation")): continue new_tokens.append(token) # Swap the buffers tokens = new_tokens t2 = time.monotonic() bencher("Lexing {0!r}".format(short_fname)) linter_logger.info("PARSING (%s)", fname) parser = Parser(config=config) # Parse the file and log any problems if tokens: try: parsed = parser.parse(tokens, recurse=recurse) except SQLParseError as err: linter_logger.info("PARSING FAILED! (%s): %s", fname, err) violations.append(err) parsed = None if parsed: linter_logger.info( "\n###\n#\n# {0}\n#\n###".format("Parsed Tree:")) linter_logger.info("\n" + parsed.stringify()) # We may succeed parsing, but still have unparsable segments. Extract them here. for unparsable in parsed.iter_unparsables(): # No exception has been raised explicitly, but we still create one here # so that we can use the common interface violations.append( SQLParseError( "Found unparsable section: {0!r}".format( unparsable.raw if len(unparsable.raw) < 40 else unparsable.raw[:40] + "..."), segment=unparsable, )) linter_logger.info("Found unparsable segment...") linter_logger.info(unparsable.stringify()) else: parsed = None t3 = time.monotonic() time_dict = { "templating": t1 - t0, "lexing": t2 - t1, "parsing": t3 - t2 } bencher("Finish parsing {0!r}".format(short_fname)) return ParsedString(parsed, violations, time_dict, templated_file, config)
def parse_string( self, in_str: str, fname: Optional[str] = None, recurse: bool = True, config: Optional[FluffConfig] = None, ) -> ParsedString: """Parse a string.""" violations: List[SQLBaseError] = [] t0 = time.monotonic() bencher = BenchIt() # starts the timer short_fname = self._generate_short_fname(fname) bencher("Staring parse_string for {0!r}".format(short_fname)) # Dispatch the output for the template header (including the config diff) if self.formatter: self.formatter.dispatch_template_header(fname, self.config, config) # Just use the local config from here: config = config or self.config # Scan the raw file for config commands. config.process_raw_file_for_config(in_str) templated_file, templater_violations = self.render_string( in_str, fname, config) violations += templater_violations t1 = time.monotonic() bencher("Templating {0!r}".format(short_fname)) # Dispatch the output for the parse header if self.formatter: self.formatter.dispatch_parse_header(fname) tokens: Optional[Sequence[BaseSegment]] if templated_file: tokens, lvs, config = self._lex_templated_file( templated_file, config) violations += lvs else: tokens = None t2 = time.monotonic() bencher("Lexing {0!r}".format(short_fname)) linter_logger.info("PARSING (%s)", fname) if tokens: parsed, pvs = self._parse_tokens(tokens, config, recurse=recurse) violations += pvs else: parsed = None t3 = time.monotonic() time_dict = { "templating": t1 - t0, "lexing": t2 - t1, "parsing": t3 - t2 } bencher("Finish parsing {0!r}".format(short_fname)) return ParsedString(parsed, violations, time_dict, templated_file, config)
def parse_string(self, s, fname=None, recurse=True, config=None): """Parse a string. Returns: `tuple` of (`parsed`, `violations`, `time_dict`, `config_diff`). `parsed` is a segment structure representing the parsed file. If parsing fails due to an inrecoverable violation then we will return None. `violations` is a :obj:`list` of violations so far, which will either be templating, lexing or parsing violations at this stage. `time_dict` is a :obj:`dict` containing timings for how long each step took in the process. """ violations = [] t0 = time.monotonic() bencher = BenchIt() # starts the timer if fname: short_fname = fname.replace('\\', '/').split('/')[-1] else: # this handles to potential case of a null fname short_fname = fname bencher("Staring parse_string for {0!r}".format(short_fname)) # Dispatch the output for the parse header (including the config diff) if self.formatter: self.formatter.dispatch_parse_header(fname, self.config, config) linter_logger.info("TEMPLATING RAW [%s] (%s)", self.templater.name, fname) s, templater_violations = self.templater.process(s, fname=fname, config=config or self.config) violations += templater_violations # Detect the case of a catastrophic templater fail. In this case # we don't continue. We'll just bow out now. if not s: file_segment = None t1 = time.monotonic() bencher("Templating {0!r}".format(short_fname)) if s: linter_logger.info("LEXING RAW (%s)", fname) # Lex the file and log any problems try: file_segment, lex_vs = FileSegment.from_raw(s, config=config or self.config) # We might just get the violations as a list violations += lex_vs except SQLLexError as err: violations.append(err) file_segment = None else: file_segment = None if file_segment: linter_logger.info(file_segment.stringify()) t2 = time.monotonic() bencher("Lexing {0!r}".format(short_fname)) linter_logger.info("PARSING (%s)", fname) # Parse the file and log any problems if file_segment: try: # Make a parse context and parse with RootParseContext.from_config(config=config or self.config, recurse=recurse) as ctx: parsed = file_segment.parse(parse_context=ctx) except SQLParseError as err: violations.append(err) parsed = None if parsed: linter_logger.info( "\n###\n#\n# {0}\n#\n###".format("Parsed Tree:")) linter_logger.info("\n" + parsed.stringify()) # We may succeed parsing, but still have unparsable segments. Extract them here. for unparsable in parsed.iter_unparsables(): # No exception has been raised explicitly, but we still create one here # so that we can use the common interface violations.append( SQLParseError("Found unparsable section: {0!r}".format( unparsable.raw if len(unparsable.raw) < 40 else unparsable.raw[:40] + "..."), segment=unparsable)) linter_logger.info("Found unparsable segment...") linter_logger.info(unparsable.stringify()) else: parsed = None t3 = time.monotonic() time_dict = { 'templating': t1 - t0, 'lexing': t2 - t1, 'parsing': t3 - t2 } bencher("Finish parsing {0!r}".format(short_fname)) return parsed, violations, time_dict
def fix(force, paths, bench=False, fixed_suffix="", no_safety=False, logger=None, **kwargs): """Fix SQL files. PATH is the path to a sql file or directory to lint. This can be either a file ('path/to/file.sql'), a path ('directory/of/sql/files'), a single ('-') character to indicate reading from *stdin* or a dot/blank ('.'/' ') which will be interpreted like passing the current working directory as a path argument. """ c = get_config(**kwargs) lnt, formatter = get_linter_and_formatter(c, silent=("-", ) == paths) verbose = c.get("verbose") bencher = BenchIt() formatter.dispatch_config(lnt) # Set up logging. set_logging_level(verbosity=verbose, logger=logger) # Check that if fix is specified, that we have picked only a subset of rules if no_safety: click.echo( colorize("NO SAFETY", "red") + ": Attempting fixes for all enabled rules.") elif lnt.config.get("rule_whitelist") is None: click.echo(("The fix option is only available in combination" " with --rules. This is for your own safety! To" " disable this safety feature use --no-safety or --s.")) sys.exit(1) # handle stdin case. should output formatted sql to stdout and nothing else. if ("-", ) == paths: stdin = sys.stdin.read() # TODO: Remove verbose result = lnt.lint_string_wrapped(stdin, fname="stdin", fix=True) stdout = result.paths[0].files[0].fix_string()[0] click.echo(stdout, nl=False) sys.exit() # Lint the paths (not with the fix argument at this stage), outputting as we go. click.echo("==== finding fixable violations ====") try: result = lnt.lint_paths(paths, fix=True, ignore_non_existent_files=False) except IOError: click.echo( colorize( "The path(s) {0!r} could not be accessed. Check it/they exist(s)." .format(paths), "red", )) sys.exit(1) # NB: We filter to linting violations here, because they're # the only ones which can be potentially fixed. if result.num_violations(types=SQLLintError, fixable=True) > 0: click.echo("==== fixing violations ====") click.echo("{0} fixable linting violations found".format( result.num_violations(types=SQLLintError, fixable=True))) if force: click.echo(colorize("FORCE MODE", "red") + ": Attempting fixes...") # TODO: Remove verbose success = do_fixes( lnt, result, formatter, types=SQLLintError, fixed_file_suffix=fixed_suffix, ) if not success: sys.exit(1) else: click.echo("Are you sure you wish to attempt to fix these? [Y/n] ", nl=False) c = click.getchar().lower() click.echo("...") if c == "y": click.echo("Attempting fixes...") # TODO: Remove verbose success = do_fixes( lnt, result, formatter, types=SQLLintError, fixed_file_suffix=fixed_suffix, ) if not success: sys.exit(1) elif c == "n": click.echo("Aborting...") else: click.echo("Invalid input :(") click.echo("Aborting...") else: click.echo("==== no fixable linting violations found ====") if result.num_violations(types=SQLLintError, fixable=False) > 0: click.echo(" [{0} unfixable linting violations found]".format( result.num_violations(types=SQLLintError, fixable=False))) if bench: click.echo("\n\n==== bencher stats ====") bencher.display() sys.exit(0)
def fix_string(self, verbosity=0): """Obtain the changes to a path as a string. We use the file_mask to do a safe merge, avoiding any templated sections. First we need to detect where there have been changes between the fixed and templated versions. The file mask is of the format: (raw_file, templated_file, fixed_file). We use difflib.SequenceMatcher.get_opcodes See: https://docs.python.org/3.7/library/difflib.html#difflib.SequenceMatcher.get_opcodes It returns a list of tuples ('equal|replace|delete|insert', ia1, ia2, ib1, ib2). """ bencher = BenchIt() bencher("fix_string: start") # Do we have enough information to actually fix the file? if any(elem is None for elem in self.file_mask): verbosity_logger( "Insufficient information to fix file: {0}".format( self.file_mask), verbosity=verbosity) return None, False verbosity_logger("Persisting file masks: {0}".format(self.file_mask), verbosity=verbosity) # Compare Templated with Raw diff_templ = SequenceMatcher(autojunk=None, a=self.file_mask[0], b=self.file_mask[1]) bencher("fix_string: Match 0&1") diff_templ_codes = diff_templ.get_opcodes() verbosity_logger("Templater diff codes: {0}".format(diff_templ_codes), verbosity=verbosity) bencher("fix_string: Got Opcodes 0&1") # Compare Fixed with Templated diff_fix = SequenceMatcher(autojunk=None, a=self.file_mask[1], b=self.file_mask[2]) bencher("fix_string: Matched 1&2") # diff_fix = SequenceMatcher(autojunk=None, a=self.file_mask[1][0], b=self.file_mask[2][0]) diff_fix_codes = diff_fix.get_opcodes() verbosity_logger("Fixing diff codes: {0}".format(diff_fix_codes), verbosity=verbosity) bencher("fix_string: Got Opcodes 1&2") # If diff_templ isn't the same then we should just keep the template. If there *was* # a fix in that space, then we should raise an issue # If it is the same, then we can apply fixes as expected. write_buff = '' fixed_block = None templ_block = None # index in raw, templ and fix idx = (0, 0, 0) loop_idx = 0 bencher("fix_string: Loop Setup") while True: loop_idx += 1 verbosity_logger("{0:04d}: Write Loop: idx:{1}, buff:{2!r}".format( loop_idx, idx, write_buff), verbosity=verbosity) if templ_block is None: if diff_templ_codes: templ_block = diff_templ_codes.pop(0) # We've exhausted the template. Have we exhausted the fixes? elif fixed_block is None and not diff_fix_codes: # Yes - excellent. DONE break # Deal with the case that we only have inserts left. elif all(elem[0] == 'insert' for elem in diff_fix_codes): for fixed_block in diff_fix_codes: write_buff += self.file_mask[2][ fixed_block[3]:fixed_block[4]] break else: raise NotImplementedError( "Fix Block(s) left over! Don't know how to handle this! aeflf8wh" ) if fixed_block is None: if diff_fix_codes: fixed_block = diff_fix_codes.pop(0) # One case is that we just consumed the last block of both, so check indexes # to see if we're at the end of the raw file. elif idx[0] >= len(self.file_mask[0]): # Yep we're at the end break else: raise NotImplementedError( "Unexpectedly depleted the fixes. Panic!") verbosity_logger("{0:04d}: Blocks: template:{1}, fix:{2}".format( loop_idx, templ_block, fixed_block), verbosity=verbosity) if templ_block[0] == 'equal': if fixed_block[0] == 'equal': # No templating, no fixes, go with middle and advance indexes # Find out how far we can advance (we use the middle version because it's common) if templ_block[4] == fixed_block[2]: buff = self.file_mask[1][idx[1]:fixed_block[2]] # consume both blocks fixed_block = None templ_block = None elif templ_block[4] > fixed_block[2]: buff = self.file_mask[1][idx[1]:fixed_block[2]] # consume fixed block fixed_block = None elif templ_block[4] < fixed_block[2]: buff = self.file_mask[1][idx[1]:templ_block[4]] # consume templ block templ_block = None idx = (idx[0] + len(buff), idx[1] + len(buff), idx[2] + len(buff)) write_buff += buff continue elif fixed_block[0] == 'replace': # Consider how to apply fixes. # Can we implement the fix while staying in the equal segment? if fixed_block[2] <= templ_block[4]: # Yes! Write from the fixed version. write_buff += self.file_mask[2][idx[2]:fixed_block[4]] idx = (idx[0] + (fixed_block[2] - fixed_block[1]), fixed_block[2], fixed_block[4]) # Consume the fixed block because we've written the whole thing. fixed_block = None continue else: raise NotImplementedError("DEF") elif fixed_block[0] == 'delete': # We're deleting items, nothing to write but we can consume some # blocks and advance some indexes. idx = (idx[0] + (fixed_block[2] - fixed_block[1]), fixed_block[2], fixed_block[4]) fixed_block = None elif fixed_block[0] == 'insert': # We're inserting items, Write from the fix block, but only that index moves. write_buff += self.file_mask[2][idx[2]:fixed_block[4]] idx = (idx[0], idx[1], fixed_block[4]) fixed_block = None else: raise NotImplementedError(( "Unexpected opcode {0} for fix block! Please report this " "issue on github with the query and rules you're trying to " "fix.").format(fixed_block[0])) elif templ_block[0] == 'replace': # We're in a templated section - we should write the templated version. # we should consume the whole replace block and then deal with where # we end up. buff = self.file_mask[0][idx[0]:templ_block[2]] new_templ_idx = templ_block[4] # Fast forward through fix blocks until we catch up. We're not implementing # any changes in a templated section. while True: if fixed_block[2] > new_templ_idx >= fixed_block[1]: # this block contains the end point break else: # We're not at the end point yet, continue to fast forward through. if fixed_block[0] != 'equal': print("WARNING: Skipping edit block: {0}".format( fixed_block)) if diff_fix_codes: fixed_block = diff_fix_codes.pop(0) else: raise NotImplementedError( "Unexpectedly depleted the fixes. Panic!") # Are we exactly on a join? if new_templ_idx == fixed_block[1]: # GREAT - this makes things easy because we have an equality point already idx = (templ_block[2], new_templ_idx, fixed_block[3]) else: if fixed_block[0] == 'equal': # If it's in an equal block, we can use the same offset from the end. idx = (templ_block[2], new_templ_idx, fixed_block[3] + (new_templ_idx - fixed_block[1])) else: # TODO: We're trying to move through an templated section, but end up # in a fixed section. We've lost track of indexes. # We might need to panic if this happens... print("UMMMMMM!") print(new_templ_idx) print(fixed_block) raise NotImplementedError("ABC") write_buff += buff # consume template block templ_block = None elif templ_block[0] == 'delete': # The comparison, things that the templater has deleted # some characters. This is just a quirk of the differ. # In reality this means we just write these characters # and don't worry about advancing the other indexes. buff = self.file_mask[0][idx[0]:templ_block[2]] # consume templ block templ_block = None idx = (idx[0] + len(buff), idx[1], idx[2]) write_buff += buff elif templ_block[0] == 'insert': # The templater has inserted something here. We don't need # to write anything here (because whatever we're looking at # was inserted by the templater), but we do need to keep # track of what happened to the rest of the section we're in. # If nothing was fixed then it's easy because the indices # will be the same. Otherwise... great question... # For now let's just deal with the happy case where the fixed # block is equal if fixed_block[0] == 'equal': # Let's make sure we can consume enough to get through the # templ block and not get to the end of the fix block. if templ_block[4] <= fixed_block[2]: insert_len = templ_block[4] - templ_block[3] idx = (idx[0], idx[1] + insert_len, idx[2] + insert_len) # if things matched up perfectly, consume the fixed block if templ_block[4] == fixed_block[2]: fixed_block = None # always consume templ block in this case templ_block = None else: raise NotImplementedError(( "Unexpected scenario during insert opcode! Please report " "this issue on github with the query and rules you're trying " "to fix.")) else: raise NotImplementedError(( "Unexpected opcode {0} for fix block! Please report this " "issue on github with the query and rules you're trying to " "fix.").format(fixed_block[0])) else: raise NotImplementedError(( "Unexpected opcode {0} for template block! Please report this " "issue on github with the query and rules you're trying to " "fix.").format(templ_block[0])) bencher("fix_string: Fixing loop done") # The success metric here is whether anything ACTUALLY changed. return write_buff, write_buff != self.file_mask[0]
def parse(path, code_only, format, profiler, bench, nofail, logger=None, **kwargs): """Parse SQL files and just spit out the result. PATH is the path to a sql file or directory to lint. This can be either a file ('path/to/file.sql'), a path ('directory/of/sql/files'), a single ('-') character to indicate reading from *stdin* or a dot/blank ('.'/' ') which will be interpreted like passing the current working directory as a path argument. """ # Initialise the benchmarker bencher = BenchIt() # starts the timer c = get_config(**kwargs) # We don't want anything else to be logged if we want a yaml output lnt, formatter = get_linter_and_formatter(c, silent=format in ("json", "yaml")) verbose = c.get("verbose") recurse = c.get("recurse") formatter.dispatch_config(lnt) # Set up logging. set_logging_level(verbosity=verbose, logger=logger) # TODO: do this better nv = 0 if profiler: # Set up the profiler if required try: import cProfile except ImportError: click.echo("The cProfiler is not available on your platform.") sys.exit(1) pr = cProfile.Profile() pr.enable() bencher("Parse setup") try: # handle stdin if specified via lone '-' if "-" == path: # put the parser result in a list to iterate later config = lnt.config.make_child_from_path("stdin") result = [( # TODO: Remove verbose *lnt.parse_string( sys.stdin.read(), "stdin", recurse=recurse, config=config), config, )] else: # A single path must be specified for this command # TODO: Remove verbose result = lnt.parse_path(path, recurse=recurse) # iterative print for human readout if format == "human": for parsed, violations, time_dict, f_cfg in result: if parsed: click.echo(parsed.stringify(code_only=code_only)) else: # TODO: Make this prettier click.echo("...Failed to Parse...") nv += len(violations) if violations: click.echo("==== parsing violations ====") for v in violations: click.echo(format_violation(v)) if violations and f_cfg.get("dialect") == "ansi": click.echo(format_dialect_warning()) if verbose >= 2: click.echo("==== timings ====") click.echo(cli_table(time_dict.items())) bencher("Output details for file") else: # collect result and print as single payload # will need to zip in the file paths filepaths = ["stdin"] if "-" == path else lnt.paths_from_path(path) result = [ dict( filepath=filepath, segments=parsed.as_record(code_only=code_only, show_raw=True), ) for filepath, (parsed, _, _, _) in zip(filepaths, result) ] if format == "yaml": # For yaml dumping always dump double quoted strings if they contain tabs or newlines. def quoted_presenter(dumper, data): """Representer which always double quotes string values needing escapes.""" if "\n" in data or "\t" in data or "'" in data: return dumper.represent_scalar("tag:yaml.org,2002:str", data, style='"') else: return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="") yaml.add_representer(str, quoted_presenter) click.echo(yaml.dump(result)) elif format == "json": click.echo(json.dumps(result)) except IOError: click.echo( colorize( "The path {0!r} could not be accessed. Check it exists.". format(path), "red", )) sys.exit(1) if profiler: pr.disable() profiler_buffer = StringIO() ps = pstats.Stats(pr, stream=profiler_buffer).sort_stats("cumulative") ps.print_stats() click.echo("==== profiler stats ====") # Only print the first 50 lines of it click.echo("\n".join(profiler_buffer.getvalue().split("\n")[:50])) if bench: click.echo("\n\n==== bencher stats ====") bencher.display() if nv > 0 and not nofail: sys.exit(66) else: sys.exit(0)
def fix_string(self): """Obtain the changes to a path as a string. We use the source mapping features of TemplatedFile to generate a list of "patches" which cover the non templated parts of the file and refer back to the locations in the original file. NB: This is MUCH FASTER than the original approach using difflib in pre 0.4.0. There is an important distinction here between Slices and Segments. A Slice is a portion of a file which is determined by the templater based on which portions of the source file are templated or not, and therefore before Lexing and so is completely dialect agnostic. A Segment is determined by the Lexer from portions of strings after templating. """ bencher = BenchIt() bencher("fix_string: start") linter_logger.debug("Original Tree: %r", self.templated_file.templated_str) linter_logger.debug("Fixed Tree: %r", self.tree.raw) # The sliced file is contigious in the TEMPLATED space. # NB: It has gaps and repeats in the source space. # It's also not the FIXED file either. linter_logger.debug("### Templated File.") for idx, file_slice in enumerate(self.templated_file.sliced_file): t_str = self.templated_file.templated_str[ file_slice.templated_slice] s_str = self.templated_file.source_str[file_slice.source_slice] if t_str == s_str: linter_logger.debug(" File slice: %s %r [invariant]", idx, file_slice) else: linter_logger.debug(" File slice: %s %r", idx, file_slice) linter_logger.debug(" \t\t\ttemplated: %r\tsource: %r", t_str, s_str) original_source = self.templated_file.source_str # Make sure no patches overlap and divide up the source file into slices. # Any Template tags in the source file are off limits. source_only_slices = self.templated_file.source_only_slices() linter_logger.debug("Source-only slices: %s", source_only_slices) # Iterate patches, filtering and translating as we go: linter_logger.debug("### Beginning Patch Iteration.") filtered_source_patches = [] dedupe_buffer = [] # We use enumerate so that we get an index for each patch. This is entirely # so when debugging logs we can find a given patch again! for idx, patch in enumerate( self.tree.iter_patches( templated_str=self.templated_file.templated_str)): linter_logger.debug(" %s Yielded patch: %s", idx, patch) # This next bit is ALL FOR LOGGING AND DEBUGGING if patch.templated_slice.start >= 10: pre_hint = self.templated_file.templated_str[ patch.templated_slice.start - 10:patch.templated_slice.start] else: pre_hint = self.templated_file.templated_str[:patch. templated_slice. start] if patch.templated_slice.stop + 10 < len( self.templated_file.templated_str): post_hint = self.templated_file.templated_str[ patch.templated_slice.stop:patch.templated_slice.stop + 10] else: post_hint = self.templated_file.templated_str[ patch.templated_slice.stop:] linter_logger.debug(" Templated Hint: ...%r <> %r...", pre_hint, post_hint) # Attempt to convert to source space. try: source_slice = self.templated_file.templated_slice_to_source_slice( patch.templated_slice, ) except ValueError: linter_logger.info( " - Skipping. Source space Value Error. i.e. attempted insertion within templated section." ) # If we try and slice within a templated section, then we may fail # in which case, we should skip this patch. continue # Check for duplicates dedupe_tuple = (source_slice, patch.fixed_raw) if dedupe_tuple in dedupe_buffer: linter_logger.info( " - Skipping. Source space Duplicate: %s", dedupe_tuple) continue # We now evaluate patches in the source-space for whether they overlap # or disrupt any templated sections. # The intent here is that unless explicity stated, a fix should never # disrupt a templated section. # NOTE: We rely here on the patches being sorted. # TODO: Implement a mechanism for doing templated section fixes. For # now it's just not allowed. # Get the affected raw slices. local_raw_slices = self.templated_file.raw_slices_spanning_source_slice( source_slice) local_type_list = [slc.slice_type for slc in local_raw_slices] enriched_patch = EnrichedFixPatch( source_slice=source_slice, templated_slice=patch.templated_slice, patch_type=patch.patch_type, fixed_raw=patch.fixed_raw, templated_str=self.templated_file.templated_str[ patch.templated_slice], source_str=self.templated_file.source_str[source_slice], ) # Deal with the easy case of only literals if set(local_type_list) == {"literal"}: linter_logger.info( " * Keeping patch on literal-only section: %s", enriched_patch) filtered_source_patches.append(enriched_patch) dedupe_buffer.append(enriched_patch.dedupe_tuple()) # Is it a zero length pathch. elif (enriched_patch.source_slice.start == enriched_patch.source_slice.stop and enriched_patch.source_slice.start == local_raw_slices[0].source_idx): linter_logger.info( " * Keeping insertion patch on slice boundary: %s", enriched_patch, ) filtered_source_patches.append(enriched_patch) dedupe_buffer.append(enriched_patch.dedupe_tuple()) # If it's ONLY templated then we should skip it. elif "literal" not in local_type_list: linter_logger.info( " - Skipping patch over templated section: %s", enriched_patch) # If we span more than two slices then we should just skip it. Too Hard. elif len(local_raw_slices) > 2: linter_logger.info( " - Skipping patch over more than two raw slices: %s", enriched_patch, ) # If it's an insertion (i.e. the string in the pre-fix template is '') then we # won't be able to place it, so skip. elif not enriched_patch.templated_str: linter_logger.info( " - Skipping insertion patch in templated section: %s", enriched_patch, ) # If the string from the templated version isn't in the source, then we can't fix it. elif enriched_patch.templated_str not in enriched_patch.source_str: linter_logger.info( " - Skipping edit patch on templated content: %s", enriched_patch, ) else: # Identify all the places the string appears in the source content. positions = list( findall(enriched_patch.templated_str, enriched_patch.source_str)) if len(positions) != 1: linter_logger.debug( " - Skipping edit patch on non-unique templated content: %s", enriched_patch, ) continue # We have a single occurances of the thing we want to patch. This # means we can use it's position to place our patch. new_source_slice = slice( enriched_patch.source_slice.start + positions[0], enriched_patch.source_slice.start + positions[0] + len(enriched_patch.templated_str), ) enriched_patch = EnrichedFixPatch( source_slice=new_source_slice, templated_slice=enriched_patch.templated_slice, patch_type=enriched_patch.patch_type, fixed_raw=enriched_patch.fixed_raw, templated_str=enriched_patch.templated_str, source_str=enriched_patch.source_str, ) linter_logger.debug( " * Keeping Tricky Case. Positions: %s, New Slice: %s, Patch: %s", positions, new_source_slice, enriched_patch, ) filtered_source_patches.append(enriched_patch) dedupe_buffer.append(enriched_patch.dedupe_tuple()) continue # Sort the patches before building up the file. filtered_source_patches = sorted(filtered_source_patches, key=lambda x: x.source_slice.start) # We now slice up the file using the patches and any source only slices. # This gives us regions to apply changes to. slice_buff = [] source_idx = 0 for patch in filtered_source_patches: # Are there templated slices at or before the start of this patch? while (source_only_slices and source_only_slices[0].source_idx < patch.source_slice.start): next_so_slice = source_only_slices.pop(0).source_slice() # Add a pre-slice before the next templated slices if needed. if next_so_slice.start > source_idx: slice_buff.append(slice(source_idx, next_so_slice.start)) # Add the templated slice. slice_buff.append(next_so_slice) source_idx = next_so_slice.stop # Is there a gap between current position and this patch? if patch.source_slice.start > source_idx: # Add a slice up to this patch. slice_buff.append(slice(source_idx, patch.source_slice.start)) # Is this patch covering an area we've already covered? if patch.source_slice.start < source_idx: linter_logger.info( "Skipping overlapping patch at Index %s, Patch: %s", source_idx, patch, ) # Ignore the patch for now... continue # Add this patch. slice_buff.append(patch.source_slice) source_idx = patch.source_slice.stop # Add a tail slice. if source_idx < len(self.templated_file.source_str): slice_buff.append( slice(source_idx, len(self.templated_file.source_str))) linter_logger.debug("Final slice buffer: %s", slice_buff) # Iterate through the patches, building up the new string. str_buff = "" for source_slice in slice_buff: # Is it one in the patch buffer: for patch in filtered_source_patches: if patch.source_slice == source_slice: # Use the patched version linter_logger.debug( "%-30s %s %r > %r", "Appending {} Patch:".format(patch.patch_type), patch.source_slice, patch.source_str, patch.fixed_raw, ) str_buff += patch.fixed_raw break else: # Use the raw string linter_logger.debug( "Appending Raw: %s %r", source_slice, self.templated_file.source_str[source_slice], ) str_buff += self.templated_file.source_str[source_slice] bencher("fix_string: Fixing loop done") # The success metric here is whether anything ACTUALLY changed. return str_buff, str_buff != original_source