Esempio n. 1
0
    def test_singleton(self):
        """Test to ensure mutiple calls to benchit return the same object
        """
        b = BenchIt()
        b2 = BenchIt()
        self.assertEqual(b, b2)
        self.assertEqual(b.timer_name, "BenchIt")

        b3 = BenchIt("New Bencher")
        self.assertNotEqual(b, b3)
        self.assertEqual(b3.timer_name, "New Bencher")

        b4 = BenchIt(timer_name="New Bencher")
Esempio n. 2
0
    def test_benchit(self):
        """
        Full benchmark test.
        Test starting, marking, stopping, and displaying the benchmark.
        """
        b = BenchIt()

        # Ensure start key is set on init
        self.assertEqual(len(b), 1)
        self.assertIn('_start', b.keys())

        # Set a marker
        b.mark('Unique Marker')
        self.assertIn('Unique Marker', b.keys())

        # Stop the benchmark
        b.stop()
        self.assertIn('_end', b.keys())

        # Ensure end time is after the start time
        self.assertGreater(b['_end'], b['_start'])

        # Check the output table
        with capture(b.display) as output:
            self.assertIn("Avg Time", output)
            self.assertIn("Unique Marker", output)
            self.assertIn("Total runtime", output)
            self.assertIn("test_benchit", output)
Esempio n. 3
0
def demo_benchmark():
    b = BenchIt() # starts the timer

    # do stuff
    sleep(1); b()

    sleep(.2)
    sleep(.3)
    sleep(.4)
    b("More stuff Done")

    for i in range(1, 5):
      sleep(.1); b()

    b.display()
Esempio n. 4
0
    def test_benchit(self):
        """
        Full benchmark test.
        Test starting, marking, stopping, and displaying the benchmark.
        """
        b = BenchIt()

        # Ensure start key is set on init
        self.assertEqual(len(b.times), 1)
        self.assertIn('_start', b.times.keys())

        # Set a marker
        "code marker"
        b()
        "more code"
        b("__call__ marker")
        b.mark('Unique Marker')
        self.assertIn('"code marker"', b.times.keys())
        self.assertIn('__call__ marker', b.times.keys())
        self.assertIn('Unique Marker', b.times.keys())

        # Stop the benchmark
        b.stop()
        self.assertIn('_end', b.times.keys())

        # Ensure end time is after the start time
        self.assertGreater(b.times['_end'], b.times['_start'])

        # Check the output table
        with capture(b.display) as output:
            self.assertIn("Avg Time", output)
            self.assertIn("Unique Marker", output)
            self.assertIn("Total runtime", output)
            self.assertIn("test_benchit", output)
Esempio n. 5
0
    def parse(self, parse_context=None, parse_grammar=None):
        """Use the parse grammar to find subsegments within this segment.

        A large chunk of the logic around this can be found in the `expand` method.

        Use the parse setting in the context for testing, mostly to check how deep to go.
        True/False for yes or no, an integer allows a certain number of levels.

        Optionally, this method allows a custom parse grammar to be
        provided which will override any existing parse grammar
        on the segment.
        """
        # Clear the blacklist cache so avoid missteps
        if parse_context:
            parse_context.blacklist.clear()

        # the parse_depth and recurse kwargs control how deep we will recurse for testing.
        if not self.segments:
            # This means we're a root segment, just return an unmutated self
            return self

        # Check the Parse Grammar
        parse_grammar = parse_grammar or self.parse_grammar
        if parse_grammar is None:
            # No parse grammar, go straight to expansion
            parse_context.logger.debug(
                "{0}.parse: no grammar. Going straight to expansion".format(
                    self.__class__.__name__))
        else:
            # For debugging purposes. Ensure that we don't have non-code elements
            # at the start or end of the segments. They should always in the middle,
            # or in the parent expression.
            segments = self.segments
            if self.can_start_end_non_code:
                pre_nc, segments, post_nc = trim_non_code_segments(segments)
            else:
                pre_nc = ()
                post_nc = ()
                if (not segments[0].is_code) and (not segments[0].is_meta):
                    raise ValueError(
                        "Segment {0} starts with non code segment: {1!r}.\n{2!r}"
                        .format(self, segments[0].raw, segments))
                if (not segments[-1].is_code) and (not segments[-1].is_meta):
                    raise ValueError(
                        "Segment {0} ends with non code segment: {1!r}.\n{2!r}"
                        .format(self, segments[-1].raw, segments))

            # NOTE: No match_depth kwarg, because this is the start of the matching.
            with parse_context.matching_segment(
                    self.__class__.__name__) as ctx:
                m = parse_grammar.match(segments=segments, parse_context=ctx)

            if not isinstance(m, MatchResult):
                raise TypeError(
                    "[PD:{0}] {1}.match. Result is {2}, not a MatchResult!".
                    format(parse_context.parse_depth, self.__class__.__name__,
                           type(m)))

            # Basic Validation, that we haven't dropped anything.
            check_still_complete(segments, m.matched_segments,
                                 m.unmatched_segments)

            if m.has_match():
                if m.is_complete():
                    # Complete match, happy days!
                    self.segments = pre_nc + m.matched_segments + post_nc
                else:
                    # Incomplete match.
                    # For now this means the parsing has failed. Lets add the unmatched bit at the
                    # end as something unparsable.
                    # TODO: Do something more intelligent here.
                    self.segments = (
                        pre_nc + m.matched_segments + (UnparsableSegment(
                            segments=m.unmatched_segments + post_nc,
                            expected="Nothing...",
                        ), ))
            elif self.allow_empty and not segments:
                # Very edge case, but some segments are allowed to be empty other than non-code
                self.segments = pre_nc + post_nc
            else:
                # If there's no match at this stage, then it's unparsable. That's
                # a problem at this stage so wrap it in an unparsable segment and carry on.
                self.segments = (
                    pre_nc + (
                        UnparsableSegment(
                            segments=segments,
                            expected=self.name,
                        ),  # NB: tuple
                    ) + post_nc)

        bencher = BenchIt()  # starts the timer
        bencher("Parse complete of {0!r}".format(self.__class__.__name__))

        # Recurse if allowed (using the expand method to deal with the expansion)
        parse_context.logger.debug(
            "{0}.parse: Done Parse. Plotting Recursion. Recurse={1!r}".format(
                self.__class__.__name__, parse_context.recurse))
        parse_depth_msg = "###\n#\n# Beginning Parse Depth {0}: {1}\n#\n###\nInitial Structure:\n{2}".format(
            parse_context.parse_depth + 1, self.__class__.__name__,
            self.stringify())
        if parse_context.may_recurse():
            parse_context.logger.debug(parse_depth_msg)
            with parse_context.deeper_parse() as ctx:
                self.segments = self.expand(self.segments, parse_context=ctx)

        return self
Esempio n. 6
0
def fix(force,
        paths,
        parallel,
        bench=False,
        fixed_suffix="",
        logger=None,
        **kwargs):
    """Fix SQL files.

    PATH is the path to a sql file or directory to lint. This can be either a
    file ('path/to/file.sql'), a path ('directory/of/sql/files'), a single ('-')
    character to indicate reading from *stdin* or a dot/blank ('.'/' ') which will
    be interpreted like passing the current working directory as a path argument.
    """
    # some quick checks
    fixing_stdin = ("-", ) == paths

    c = get_config(**kwargs)
    lnt, formatter = get_linter_and_formatter(c, silent=fixing_stdin)
    verbose = c.get("verbose")

    bencher = BenchIt()

    formatter.dispatch_config(lnt)

    # Set up logging.
    set_logging_level(verbosity=verbose,
                      logger=logger,
                      stderr_output=fixing_stdin)

    # handle stdin case. should output formatted sql to stdout and nothing else.
    if fixing_stdin:
        stdin = sys.stdin.read()
        # TODO: Remove verbose
        result = lnt.lint_string_wrapped(stdin, fname="stdin", fix=True)
        stdout = result.paths[0].files[0].fix_string()[0]
        click.echo(stdout, nl=False)
        sys.exit()

    # Lint the paths (not with the fix argument at this stage), outputting as we go.
    click.echo("==== finding fixable violations ====")
    try:
        result = lnt.lint_paths(paths,
                                fix=True,
                                ignore_non_existent_files=False,
                                parallel=parallel)
    except IOError:
        click.echo(
            colorize(
                "The path(s) {0!r} could not be accessed. Check it/they exist(s)."
                .format(paths),
                "red",
            ))
        sys.exit(1)

    # NB: We filter to linting violations here, because they're
    # the only ones which can be potentially fixed.
    if result.num_violations(types=SQLLintError, fixable=True) > 0:
        click.echo("==== fixing violations ====")
        click.echo("{0} fixable linting violations found".format(
            result.num_violations(types=SQLLintError, fixable=True)))
        if force:
            click.echo(colorize("FORCE MODE", "red") + ": Attempting fixes...")
            # TODO: Remove verbose
            success = do_fixes(
                lnt,
                result,
                formatter,
                types=SQLLintError,
                fixed_file_suffix=fixed_suffix,
            )
            if not success:
                sys.exit(1)
        else:
            click.echo("Are you sure you wish to attempt to fix these? [Y/n] ",
                       nl=False)
            c = click.getchar().lower()
            click.echo("...")
            if c in ("y", "\r", "\n"):
                click.echo("Attempting fixes...")
                # TODO: Remove verbose
                success = do_fixes(
                    lnt,
                    result,
                    formatter,
                    types=SQLLintError,
                    fixed_file_suffix=fixed_suffix,
                )
                if not success:
                    sys.exit(1)
                else:
                    click.echo("All Finished 📜 🎉!")
            elif c == "n":
                click.echo("Aborting...")
            else:
                click.echo("Invalid input, please enter 'Y' or 'N'")
                click.echo("Aborting...")
    else:
        click.echo("==== no fixable linting violations found ====")
        if result.num_violations(types=SQLLintError, fixable=False) > 0:
            click.echo("  [{0} unfixable linting violations found]".format(
                result.num_violations(types=SQLLintError, fixable=False)))
        click.echo("All Finished 📜 🎉!")

    if bench:
        click.echo("\n\n==== bencher stats ====")
        bencher.display()

    sys.exit(0)
Esempio n. 7
0
def parse(path, code_only, format, profiler, bench, **kwargs):
    """Parse SQL files and just spit out the result.

    PATH is the path to a sql file or directory to lint. This can be either a
    file ('path/to/file.sql'), a path ('directory/of/sql/files'), a single ('-')
    character to indicate reading from *stdin* or a dot/blank ('.'/' ') which will
    be interpreted like passing the current working directory as a path argument.
    """
    # Initialise the benchmarker
    bencher = BenchIt()  # starts the timer
    c = get_config(**kwargs)
    # We don't want anything else to be logged if we want a yaml output
    lnt = get_linter(c, silent=format in ('json', 'yaml'))
    verbose = c.get('verbose')
    recurse = c.get('recurse')

    config_string = format_config(lnt, verbose=verbose)
    if len(config_string) > 0:
        lnt.log(config_string)

    # TODO: do this better
    nv = 0
    if profiler:
        # Set up the profiler if required
        try:
            import cProfile
        except ImportError:
            lnt.log('The cProfiler is not available on your platform.')
            sys.exit(1)
        pr = cProfile.Profile()
        pr.enable()

    bencher("Parse setup")
    try:
        # handle stdin if specified via lone '-'
        if '-' == path:
            # put the parser result in a list to iterate later
            config = lnt.config.make_child_from_path('stdin')
            result = [lnt.parse_string(
                sys.stdin.read(),
                'stdin',
                verbosity=verbose,
                recurse=recurse,
                config=config
            )]
        else:
            # A single path must be specified for this command
            result = lnt.parse_path(path, verbosity=verbose, recurse=recurse)

        # iterative print for human readout
        if format == 'human':
            for parsed, violations, time_dict in result:
                if parsed:
                    lnt.log(parsed.stringify(code_only=code_only))
                else:
                    # TODO: Make this prettier
                    lnt.log('...Failed to Parse...')
                nv += len(violations)
                for v in violations:
                    lnt.log(format_violation(v, verbose=verbose))
                if verbose >= 2:
                    lnt.log("==== timings ====")
                    lnt.log(cli_table(time_dict.items()))
                bencher("Output details for file")
        else:
            # collect result and print as single payload
            # will need to zip in the file paths
            filepaths = ['stdin'] if '-' == path else lnt.paths_from_path(path)
            result = [
                dict(
                    filepath=filepath,
                    segments=parsed.as_record(code_only=code_only, show_raw=True)
                )
                for filepath, (parsed, _, _) in zip(filepaths, result)
            ]

            if format == 'yaml':
                # For yaml dumping always dump double quoted strings if they contain tabs or newlines.
                def quoted_presenter(dumper, data):
                    """Representer which always double quotes string values needing escapes."""
                    if '\n' in data or '\t' in data:
                        return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='"')
                    else:
                        return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='')

                yaml.add_representer(str, quoted_presenter)

                click.echo(yaml.dump(result))
            elif format == 'json':
                click.echo(json.dumps(result))
    except IOError:
        click.echo(colorize('The path {0!r} could not be accessed. Check it exists.'.format(path), 'red'))
        sys.exit(1)

    if profiler:
        pr.disable()
        profiler_buffer = StringIO()
        ps = pstats.Stats(
            pr, stream=profiler_buffer
        ).sort_stats('cumulative')
        ps.print_stats()
        lnt.log("==== profiler stats ====")
        # Only print the first 50 lines of it
        lnt.log('\n'.join(profiler_buffer.getvalue().split('\n')[:50]))

    if bench:
        lnt.log("\n\n==== bencher stats ====")
        bencher.display()

    if nv > 0:
        sys.exit(66)
    else:
        sys.exit(0)
Esempio n. 8
0
    def parse(self, parse_context=None):
        """Use the parse grammar to find subsegments within this segment.

        A large chunk of the logic around this can be found in the `expand` method.

        Use the parse setting in the context for testing, mostly to check how deep to go.
        True/False for yes or no, an integer allows a certain number of levels.
        """
        if not parse_context.dialect:
            raise RuntimeError("No dialect provided to {0!r}!".format(self))

        # Clear the blacklist cache so avoid missteps
        if parse_context:
            parse_context.blacklist.clear()

        # the parse_depth and recurse kwargs control how deep we will recurse for testing.
        if not self.segments:
            # This means we're a root segment, just return an unmutated self
            return self

        # Get the Parse Grammar
        g = self._parse_grammar()
        if g is None:
            # No parse grammar, go straight to expansion
            logging.debug("{0}.parse: no grammar. Going straight to expansion".format(self.__class__.__name__))
        else:
            # Use the Parse Grammar (and the private method)

            # For debugging purposes. Ensure that we don't have non-code elements
            # at the start or end of the segments. They should always in the middle,
            # or in the parent expression.
            if not self._can_start_end_non_code:
                if (not self.segments[0].is_code) and (not self.segments[0].is_meta):
                    raise ValueError("Segment {0} starts with non code segment: {1!r}.\n{2!r}".format(
                        self, self.segments[0].raw, self.segments))
                if (not self.segments[-1].is_code) and (not self.segments[-1].is_meta):
                    raise ValueError("Segment {0} ends with non code segment: {1!r}.\n{2!r}".format(
                        self, self.segments[-1].raw, self.segments))

            # NOTE: No match_depth kwarg, because this is the start of the matching.
            m = g._match(
                segments=self.segments,
                parse_context=parse_context.copy(
                    match_segment=self.__class__.__name__
                )
            )

            if not isinstance(m, MatchResult):
                raise TypeError(
                    "[PD:{0}] {1}.match. Result is {2}, not a MatchResult!".format(
                        parse_context.parse_depth, self.__class__.__name__, type(m)))

            # Basic Validation, that we haven't dropped anything.
            check_still_complete(self.segments, m.matched_segments, m.unmatched_segments)

            if m.has_match():
                if m.is_complete():
                    # Complete match, happy days!
                    self.segments = m.matched_segments
                else:
                    # Incomplete match.
                    # For now this means the parsing has failed. Lets add the unmatched bit at the
                    # end as something unparsable.
                    # TODO: Do something more intelligent here.
                    self.segments = m.matched_segments + (UnparsableSegment(
                        segments=m.unmatched_segments, expected="Nothing..."),)
            else:
                # If there's no match at this stage, then it's unparsable. That's
                # a problem at this stage so wrap it in an unparable segment and carry on.
                self.segments = (UnparsableSegment(
                    segments=self.segments,
                    expected=g.expected_string(dialect=parse_context.dialect)),)  # NB: tuple

            # Validate new segments
            self.validate_segments(text="parsing")

        bencher = BenchIt()  # starts the timer
        bencher("Parse complete of {0!r}".format(self.__class__.__name__))

        # Recurse if allowed (using the expand method to deal with the expansion)
        logging.debug(
            "{0}.parse: Done Parse. Plotting Recursion. Recurse={1!r}".format(
                self.__class__.__name__, parse_context.recurse))
        parse_depth_msg = "###\n#\n# Beginning Parse Depth {0}: {1}\n#\n###\nInitial Structure:\n{2}".format(
            parse_context.parse_depth + 1, self.__class__.__name__, self.stringify())
        if parse_context.recurse is True:
            logging.debug(parse_depth_msg)
            self.segments = self.expand(
                self.segments,
                parse_context=parse_context.copy(
                    incr='parse_depth', match_depth=0, recurse=True
                )
            )
        elif isinstance(parse_context.recurse, int):
            if parse_context.recurse > 1:
                logging.debug(parse_depth_msg)
                self.segments = self.expand(
                    self.segments,
                    parse_context=parse_context.copy(decr='recurse', incr='parse_depth')
                )
        # Validate new segments
        self.validate_segments(text="expanding")

        return self
Esempio n. 9
0
    def parse_string(self, s, fname=None, verbosity=0, recurse=True, config=None):
        """Parse a string.

        Returns:
            `tuple` of (`parsed`, `violations`, `time_dict`, `config_diff`).
                `parsed` is a segment structure representing the parsed file. If
                    parsing fails due to an inrecoverable violation then we will
                    return None.
                `violations` is a :obj:`list` of violations so far, which will either be
                    templating, lexing or parsing violations at this stage.
                `time_dict` is a :obj:`dict` containing timings for how long each step
                    took in the process.

        """
        violations = []
        t0 = time.monotonic()
        bencher = BenchIt()  # starts the timer
        if fname:
            short_fname = fname.replace('\\', '/').split('/')[-1]
        else:
            # this handles to potential case of a null fname
            short_fname = fname
        bencher("Staring parse_string for {0!r}".format(short_fname))

        # Log the start of this process if we're in a more verbose mode.
        if verbosity > 1:
            self.log(format_filename(filename=fname, success='PARSING', verbose=verbosity))
            # This is where we output config diffs if they exist.
            if config:
                # Only output config diffs if there is a config to diff to.
                config_diff = config.diff_to(self.config)
                if config_diff:
                    self.log("   Config Diff:")
                    self.log(format_config_vals(self.config.iter_vals(cfg=config_diff)))

        verbosity_logger("TEMPLATING RAW [{0}] ({1})".format(self.templater.name, fname), verbosity=verbosity)
        try:
            s = self.templater.process(s, fname=fname, config=config or self.config)
        except SQLTemplaterError as err:
            violations.append(err)
            file_segment = None
            # NB: We'll carry on if we fail to template, it might still lex

        t1 = time.monotonic()
        bencher("Templating {0!r}".format(short_fname))

        if s:
            verbosity_logger("LEXING RAW ({0})".format(fname), verbosity=verbosity)
            # Lex the file and log any problems
            try:
                file_segment, lex_vs = FileSegment.from_raw(s, config=config or self.config)
                # We might just get the violations as a list
                violations += lex_vs
            except SQLLexError as err:
                violations.append(err)
                file_segment = None
        else:
            file_segment = None

        if file_segment:
            verbosity_logger(file_segment.stringify(), verbosity=verbosity)

        t2 = time.monotonic()
        bencher("Lexing {0!r}".format(short_fname))
        verbosity_logger("PARSING ({0})".format(fname), verbosity=verbosity)
        # Parse the file and log any problems
        if file_segment:
            try:
                # Make a parse context and parse
                context = self.get_parse_context()
                context.verbosity = verbosity or context.verbosity
                context.recurse = recurse or context.recurse
                parsed = file_segment.parse(parse_context=context)
            except SQLParseError as err:
                violations.append(err)
                parsed = None
            if parsed:
                verbosity_logger(frame_msg("Parsed Tree:"), verbosity=verbosity)
                verbosity_logger(parsed.stringify(), verbosity=verbosity)
        else:
            parsed = None

        t3 = time.monotonic()
        time_dict = {'templating': t1 - t0, 'lexing': t2 - t1, 'parsing': t3 - t2}
        bencher("Finish parsing {0!r}".format(short_fname))
        return parsed, violations, time_dict
Esempio n. 10
0
    def parse_string(self,
                     s,
                     fname=None,
                     verbosity=0,
                     recurse=True,
                     config=None):
        """Parse a string.

        Returns:
            `tuple` of (`parsed`, `violations`, `time_dict`, `config_diff`).
                `parsed` is a segment structure representing the parsed file. If
                    parsing fails due to an inrecoverable violation then we will
                    return None.
                `violations` is a :obj:`list` of violations so far, which will either be
                    templating, lexing or parsing violations at this stage.
                `time_dict` is a :obj:`dict` containing timings for how long each step
                    took in the process.

        """
        violations = []
        t0 = time.monotonic()
        bencher = BenchIt()  # starts the timer
        if fname:
            short_fname = fname.replace('\\', '/').split('/')[-1]
        else:
            # this handles to potential case of a null fname
            short_fname = fname
        bencher("Staring parse_string for {0!r}".format(short_fname))

        # Log the start of this process if we're in a more verbose mode.
        if verbosity > 1:
            self.log(
                format_filename(filename=fname,
                                success='PARSING',
                                verbose=verbosity))
            # This is where we output config diffs if they exist.
            if config:
                # Only output config diffs if there is a config to diff to.
                config_diff = config.diff_to(self.config)
                if config_diff:
                    self.log("   Config Diff:")
                    self.log(
                        format_config_vals(
                            self.config.iter_vals(cfg=config_diff)))

        verbosity_logger("TEMPLATING RAW [{0}] ({1})".format(
            self.templater.name, fname),
                         verbosity=verbosity)
        s, templater_violations = self.templater.process(s,
                                                         fname=fname,
                                                         config=config
                                                         or self.config)
        violations += templater_violations
        # Detect the case of a catastrophic templater fail. In this case
        # we don't continue. We'll just bow out now.
        if not s:
            file_segment = None

        t1 = time.monotonic()
        bencher("Templating {0!r}".format(short_fname))

        if s:
            verbosity_logger("LEXING RAW ({0})".format(fname),
                             verbosity=verbosity)
            # Lex the file and log any problems
            try:
                file_segment, lex_vs = FileSegment.from_raw(s,
                                                            config=config
                                                            or self.config)
                # We might just get the violations as a list
                violations += lex_vs
            except SQLLexError as err:
                violations.append(err)
                file_segment = None
        else:
            file_segment = None

        if file_segment:
            verbosity_logger(file_segment.stringify(), verbosity=verbosity)

        t2 = time.monotonic()
        bencher("Lexing {0!r}".format(short_fname))
        verbosity_logger("PARSING ({0})".format(fname), verbosity=verbosity)
        # Parse the file and log any problems
        if file_segment:
            try:
                # Make a parse context and parse
                context = self.get_parse_context(config=config or self.config)
                context.verbosity = verbosity or context.verbosity
                context.recurse = recurse or context.recurse
                parsed = file_segment.parse(parse_context=context)
            except SQLParseError as err:
                violations.append(err)
                parsed = None
            if parsed:
                verbosity_logger(frame_msg("Parsed Tree:"),
                                 verbosity=verbosity)
                verbosity_logger(parsed.stringify(), verbosity=verbosity)
                # We may succeed parsing, but still have unparsable segments. Extract them here.
                for unparsable in parsed.iter_unparsables():
                    # No exception has been raised explicitly, but we still create one here
                    # so that we can use the common interface
                    violations.append(
                        SQLParseError("Found unparsable section: {0!r}".format(
                            unparsable.raw if len(unparsable.raw) < 40 else
                            unparsable.raw[:40] + "..."),
                                      segment=unparsable))
                    if verbosity >= 2:
                        verbosity_logger("Found unparsable segment...",
                                         verbosity=verbosity)
                        verbosity_logger(unparsable.stringify(),
                                         verbosity=verbosity)
        else:
            parsed = None

        t3 = time.monotonic()
        time_dict = {
            'templating': t1 - t0,
            'lexing': t2 - t1,
            'parsing': t3 - t2
        }
        bencher("Finish parsing {0!r}".format(short_fname))
        return parsed, violations, time_dict
Esempio n. 11
0
    def parse_string(self, in_str, fname=None, recurse=True, config=None):
        """Parse a string.

        Returns:
            `ParsedString` of (`parsed`, `violations`, `time_dict`, `templated_file`).
                `parsed` is a segment structure representing the parsed file. If
                    parsing fails due to an inrecoverable violation then we will
                    return None.
                `violations` is a :obj:`list` of violations so far, which will either be
                    templating, lexing or parsing violations at this stage.
                `time_dict` is a :obj:`dict` containing timings for how long each step
                    took in the process.
                `templated_file` is a :obj:`TemplatedFile` containing the details
                    of the templated file.

        """
        violations = []
        t0 = time.monotonic()
        bencher = BenchIt()  # starts the timer
        if fname:
            short_fname = fname.replace("\\", "/").split("/")[-1]
        else:
            # this handles the potential case of a null fname
            short_fname = fname
        bencher("Staring parse_string for {0!r}".format(short_fname))

        # Dispatch the output for the parse header (including the config diff)
        if self.formatter:
            self.formatter.dispatch_parse_header(fname, self.config, config)

        # Just use the local config from here:
        config = config or self.config

        # Scan the raw file for config commands.
        for raw_line in in_str.splitlines():
            if raw_line.startswith("-- sqlfluff"):
                # Found a in-file config command
                config.process_inline_config(raw_line)

        linter_logger.info("TEMPLATING RAW [%s] (%s)", self.templater.name,
                           fname)
        templated_file, templater_violations = self.templater.process(
            in_str=in_str, fname=fname, config=config)
        violations += templater_violations
        # Detect the case of a catastrophic templater fail. In this case
        # we don't continue. We'll just bow out now.
        if not templated_file:
            linter_logger.info("TEMPLATING FAILED: %s", templater_violations)
            tokens = None

        t1 = time.monotonic()
        bencher("Templating {0!r}".format(short_fname))

        if templated_file:
            linter_logger.info("LEXING RAW (%s)", fname)
            # Get the lexer
            lexer = Lexer(config=config)
            # Lex the file and log any problems
            try:
                tokens, lex_vs = lexer.lex(templated_file)
                # We might just get the violations as a list
                violations += lex_vs
            except SQLLexError as err:
                linter_logger.info("LEXING FAILED! (%s): %s", fname, err)
                violations.append(err)
                tokens = None
        else:
            tokens = None

        if tokens:
            linter_logger.info("Lexed tokens: %s", [seg.raw for seg in tokens])
        else:
            linter_logger.info("NO LEXED TOKENS!")

        if tokens:
            # Check that we've got sensible indentation from the lexer.
            # We might need to supress if it's a complicated file.
            templating_blocks_indent = config.get("template_blocks_indent",
                                                  "indentation")
            if isinstance(templating_blocks_indent, str):
                force_block_indent = templating_blocks_indent.lower().strip(
                ) == "force"
            else:
                force_block_indent = False
            templating_blocks_indent = bool(templating_blocks_indent)
            # If we're forcing it through we don't check.
            if templating_blocks_indent and not force_block_indent:
                indent_balance = sum(
                    getattr(elem, "indent_val", 0) for elem in tokens)
                if indent_balance != 0:
                    linter_logger.warning(
                        "Indent balance test failed for %r. Template indents will not be linted for this file.",
                        fname,
                    )
                    # Don't enable the templating blocks.
                    templating_blocks_indent = False
                    # Disable the linting of L003 on templated tokens.
                    config.set_value(
                        ["rules", "L003", "lint_templated_tokens"], False)

            # The file will have been lexed without config, so check all indents
            # are enabled.
            new_tokens = []
            for token in tokens:
                if token.is_meta:
                    if token.indent_val != 0:
                        # Don't allow it if we're not linting templating block indents.
                        if not templating_blocks_indent:
                            continue
                        # Don't allow if it's not configure to function.
                        elif not token.is_enabled(indent_config=config.
                                                  get_section("indentation")):
                            continue
                new_tokens.append(token)
            # Swap the buffers
            tokens = new_tokens

        t2 = time.monotonic()
        bencher("Lexing {0!r}".format(short_fname))
        linter_logger.info("PARSING (%s)", fname)
        parser = Parser(config=config)
        # Parse the file and log any problems
        if tokens:
            try:
                parsed = parser.parse(tokens, recurse=recurse)
            except SQLParseError as err:
                linter_logger.info("PARSING FAILED! (%s): %s", fname, err)
                violations.append(err)
                parsed = None
            if parsed:
                linter_logger.info(
                    "\n###\n#\n# {0}\n#\n###".format("Parsed Tree:"))
                linter_logger.info("\n" + parsed.stringify())
                # We may succeed parsing, but still have unparsable segments. Extract them here.
                for unparsable in parsed.iter_unparsables():
                    # No exception has been raised explicitly, but we still create one here
                    # so that we can use the common interface
                    violations.append(
                        SQLParseError(
                            "Found unparsable section: {0!r}".format(
                                unparsable.raw if len(unparsable.raw) < 40 else
                                unparsable.raw[:40] + "..."),
                            segment=unparsable,
                        ))
                    linter_logger.info("Found unparsable segment...")
                    linter_logger.info(unparsable.stringify())
        else:
            parsed = None

        t3 = time.monotonic()
        time_dict = {
            "templating": t1 - t0,
            "lexing": t2 - t1,
            "parsing": t3 - t2
        }
        bencher("Finish parsing {0!r}".format(short_fname))
        return ParsedString(parsed, violations, time_dict, templated_file,
                            config)
Esempio n. 12
0
    def parse_string(
        self,
        in_str: str,
        fname: Optional[str] = None,
        recurse: bool = True,
        config: Optional[FluffConfig] = None,
    ) -> ParsedString:
        """Parse a string."""
        violations: List[SQLBaseError] = []
        t0 = time.monotonic()
        bencher = BenchIt()  # starts the timer
        short_fname = self._generate_short_fname(fname)
        bencher("Staring parse_string for {0!r}".format(short_fname))

        # Dispatch the output for the template header (including the config diff)
        if self.formatter:
            self.formatter.dispatch_template_header(fname, self.config, config)

        # Just use the local config from here:
        config = config or self.config

        # Scan the raw file for config commands.
        config.process_raw_file_for_config(in_str)

        templated_file, templater_violations = self.render_string(
            in_str, fname, config)
        violations += templater_violations

        t1 = time.monotonic()
        bencher("Templating {0!r}".format(short_fname))

        # Dispatch the output for the parse header
        if self.formatter:
            self.formatter.dispatch_parse_header(fname)

        tokens: Optional[Sequence[BaseSegment]]
        if templated_file:
            tokens, lvs, config = self._lex_templated_file(
                templated_file, config)
            violations += lvs
        else:
            tokens = None

        t2 = time.monotonic()
        bencher("Lexing {0!r}".format(short_fname))
        linter_logger.info("PARSING (%s)", fname)

        if tokens:
            parsed, pvs = self._parse_tokens(tokens, config, recurse=recurse)
            violations += pvs
        else:
            parsed = None

        t3 = time.monotonic()
        time_dict = {
            "templating": t1 - t0,
            "lexing": t2 - t1,
            "parsing": t3 - t2
        }
        bencher("Finish parsing {0!r}".format(short_fname))
        return ParsedString(parsed, violations, time_dict, templated_file,
                            config)
Esempio n. 13
0
    def parse_string(self, s, fname=None, recurse=True, config=None):
        """Parse a string.

        Returns:
            `tuple` of (`parsed`, `violations`, `time_dict`, `config_diff`).
                `parsed` is a segment structure representing the parsed file. If
                    parsing fails due to an inrecoverable violation then we will
                    return None.
                `violations` is a :obj:`list` of violations so far, which will either be
                    templating, lexing or parsing violations at this stage.
                `time_dict` is a :obj:`dict` containing timings for how long each step
                    took in the process.

        """
        violations = []
        t0 = time.monotonic()
        bencher = BenchIt()  # starts the timer
        if fname:
            short_fname = fname.replace('\\', '/').split('/')[-1]
        else:
            # this handles to potential case of a null fname
            short_fname = fname
        bencher("Staring parse_string for {0!r}".format(short_fname))

        # Dispatch the output for the parse header (including the config diff)
        if self.formatter:
            self.formatter.dispatch_parse_header(fname, self.config, config)

        linter_logger.info("TEMPLATING RAW [%s] (%s)", self.templater.name,
                           fname)
        s, templater_violations = self.templater.process(s,
                                                         fname=fname,
                                                         config=config
                                                         or self.config)
        violations += templater_violations
        # Detect the case of a catastrophic templater fail. In this case
        # we don't continue. We'll just bow out now.
        if not s:
            file_segment = None

        t1 = time.monotonic()
        bencher("Templating {0!r}".format(short_fname))

        if s:
            linter_logger.info("LEXING RAW (%s)", fname)
            # Lex the file and log any problems
            try:
                file_segment, lex_vs = FileSegment.from_raw(s,
                                                            config=config
                                                            or self.config)
                # We might just get the violations as a list
                violations += lex_vs
            except SQLLexError as err:
                violations.append(err)
                file_segment = None
        else:
            file_segment = None

        if file_segment:
            linter_logger.info(file_segment.stringify())

        t2 = time.monotonic()
        bencher("Lexing {0!r}".format(short_fname))
        linter_logger.info("PARSING (%s)", fname)
        # Parse the file and log any problems
        if file_segment:
            try:
                # Make a parse context and parse
                with RootParseContext.from_config(config=config or self.config,
                                                  recurse=recurse) as ctx:
                    parsed = file_segment.parse(parse_context=ctx)
            except SQLParseError as err:
                violations.append(err)
                parsed = None
            if parsed:
                linter_logger.info(
                    "\n###\n#\n# {0}\n#\n###".format("Parsed Tree:"))
                linter_logger.info("\n" + parsed.stringify())
                # We may succeed parsing, but still have unparsable segments. Extract them here.
                for unparsable in parsed.iter_unparsables():
                    # No exception has been raised explicitly, but we still create one here
                    # so that we can use the common interface
                    violations.append(
                        SQLParseError("Found unparsable section: {0!r}".format(
                            unparsable.raw if len(unparsable.raw) < 40 else
                            unparsable.raw[:40] + "..."),
                                      segment=unparsable))
                    linter_logger.info("Found unparsable segment...")
                    linter_logger.info(unparsable.stringify())
        else:
            parsed = None

        t3 = time.monotonic()
        time_dict = {
            'templating': t1 - t0,
            'lexing': t2 - t1,
            'parsing': t3 - t2
        }
        bencher("Finish parsing {0!r}".format(short_fname))
        return parsed, violations, time_dict
Esempio n. 14
0
def fix(force,
        paths,
        bench=False,
        fixed_suffix="",
        no_safety=False,
        logger=None,
        **kwargs):
    """Fix SQL files.

    PATH is the path to a sql file or directory to lint. This can be either a
    file ('path/to/file.sql'), a path ('directory/of/sql/files'), a single ('-')
    character to indicate reading from *stdin* or a dot/blank ('.'/' ') which will
    be interpreted like passing the current working directory as a path argument.
    """
    c = get_config(**kwargs)
    lnt, formatter = get_linter_and_formatter(c, silent=("-", ) == paths)
    verbose = c.get("verbose")

    bencher = BenchIt()

    formatter.dispatch_config(lnt)

    # Set up logging.
    set_logging_level(verbosity=verbose, logger=logger)

    # Check that if fix is specified, that we have picked only a subset of rules
    if no_safety:
        click.echo(
            colorize("NO SAFETY", "red") +
            ": Attempting fixes for all enabled rules.")
    elif lnt.config.get("rule_whitelist") is None:
        click.echo(("The fix option is only available in combination"
                    " with --rules. This is for your own safety! To"
                    " disable this safety feature use --no-safety or --s."))
        sys.exit(1)

    # handle stdin case. should output formatted sql to stdout and nothing else.
    if ("-", ) == paths:
        stdin = sys.stdin.read()
        # TODO: Remove verbose
        result = lnt.lint_string_wrapped(stdin, fname="stdin", fix=True)
        stdout = result.paths[0].files[0].fix_string()[0]
        click.echo(stdout, nl=False)
        sys.exit()

    # Lint the paths (not with the fix argument at this stage), outputting as we go.
    click.echo("==== finding fixable violations ====")
    try:
        result = lnt.lint_paths(paths,
                                fix=True,
                                ignore_non_existent_files=False)
    except IOError:
        click.echo(
            colorize(
                "The path(s) {0!r} could not be accessed. Check it/they exist(s)."
                .format(paths),
                "red",
            ))
        sys.exit(1)

    # NB: We filter to linting violations here, because they're
    # the only ones which can be potentially fixed.
    if result.num_violations(types=SQLLintError, fixable=True) > 0:
        click.echo("==== fixing violations ====")
        click.echo("{0} fixable linting violations found".format(
            result.num_violations(types=SQLLintError, fixable=True)))
        if force:
            click.echo(colorize("FORCE MODE", "red") + ": Attempting fixes...")
            # TODO: Remove verbose
            success = do_fixes(
                lnt,
                result,
                formatter,
                types=SQLLintError,
                fixed_file_suffix=fixed_suffix,
            )
            if not success:
                sys.exit(1)
        else:
            click.echo("Are you sure you wish to attempt to fix these? [Y/n] ",
                       nl=False)
            c = click.getchar().lower()
            click.echo("...")
            if c == "y":
                click.echo("Attempting fixes...")
                # TODO: Remove verbose
                success = do_fixes(
                    lnt,
                    result,
                    formatter,
                    types=SQLLintError,
                    fixed_file_suffix=fixed_suffix,
                )
                if not success:
                    sys.exit(1)
            elif c == "n":
                click.echo("Aborting...")
            else:
                click.echo("Invalid input :(")
                click.echo("Aborting...")
    else:
        click.echo("==== no fixable linting violations found ====")
        if result.num_violations(types=SQLLintError, fixable=False) > 0:
            click.echo("  [{0} unfixable linting violations found]".format(
                result.num_violations(types=SQLLintError, fixable=False)))

    if bench:
        click.echo("\n\n==== bencher stats ====")
        bencher.display()

    sys.exit(0)
Esempio n. 15
0
    def fix_string(self, verbosity=0):
        """Obtain the changes to a path as a string.

        We use the file_mask to do a safe merge, avoiding any templated
        sections. First we need to detect where there have been changes
        between the fixed and templated versions. The file mask is of
        the format: (raw_file, templated_file, fixed_file).

        We use difflib.SequenceMatcher.get_opcodes
        See: https://docs.python.org/3.7/library/difflib.html#difflib.SequenceMatcher.get_opcodes
        It returns a list of tuples ('equal|replace|delete|insert', ia1, ia2, ib1, ib2).

        """
        bencher = BenchIt()
        bencher("fix_string: start")

        # Do we have enough information to actually fix the file?
        if any(elem is None for elem in self.file_mask):
            verbosity_logger(
                "Insufficient information to fix file: {0}".format(
                    self.file_mask),
                verbosity=verbosity)
            return None, False

        verbosity_logger("Persisting file masks: {0}".format(self.file_mask),
                         verbosity=verbosity)
        # Compare Templated with Raw
        diff_templ = SequenceMatcher(autojunk=None,
                                     a=self.file_mask[0],
                                     b=self.file_mask[1])
        bencher("fix_string: Match 0&1")
        diff_templ_codes = diff_templ.get_opcodes()
        verbosity_logger("Templater diff codes: {0}".format(diff_templ_codes),
                         verbosity=verbosity)

        bencher("fix_string: Got Opcodes 0&1")
        # Compare Fixed with Templated
        diff_fix = SequenceMatcher(autojunk=None,
                                   a=self.file_mask[1],
                                   b=self.file_mask[2])
        bencher("fix_string: Matched 1&2")
        # diff_fix = SequenceMatcher(autojunk=None, a=self.file_mask[1][0], b=self.file_mask[2][0])
        diff_fix_codes = diff_fix.get_opcodes()
        verbosity_logger("Fixing diff codes: {0}".format(diff_fix_codes),
                         verbosity=verbosity)
        bencher("fix_string: Got Opcodes 1&2")

        # If diff_templ isn't the same then we should just keep the template. If there *was*
        # a fix in that space, then we should raise an issue
        # If it is the same, then we can apply fixes as expected.
        write_buff = ''
        fixed_block = None
        templ_block = None
        # index in raw, templ and fix
        idx = (0, 0, 0)
        loop_idx = 0
        bencher("fix_string: Loop Setup")
        while True:
            loop_idx += 1
            verbosity_logger("{0:04d}: Write Loop: idx:{1}, buff:{2!r}".format(
                loop_idx, idx, write_buff),
                             verbosity=verbosity)

            if templ_block is None:
                if diff_templ_codes:
                    templ_block = diff_templ_codes.pop(0)
                # We've exhausted the template. Have we exhausted the fixes?
                elif fixed_block is None and not diff_fix_codes:
                    # Yes - excellent. DONE
                    break
                # Deal with the case that we only have inserts left.
                elif all(elem[0] == 'insert' for elem in diff_fix_codes):
                    for fixed_block in diff_fix_codes:
                        write_buff += self.file_mask[2][
                            fixed_block[3]:fixed_block[4]]
                    break
                else:
                    raise NotImplementedError(
                        "Fix Block(s) left over! Don't know how to handle this! aeflf8wh"
                    )
            if fixed_block is None:
                if diff_fix_codes:
                    fixed_block = diff_fix_codes.pop(0)
                # One case is that we just consumed the last block of both, so check indexes
                # to see if we're at the end of the raw file.
                elif idx[0] >= len(self.file_mask[0]):
                    # Yep we're at the end
                    break
                else:
                    raise NotImplementedError(
                        "Unexpectedly depleted the fixes. Panic!")
            verbosity_logger("{0:04d}: Blocks: template:{1}, fix:{2}".format(
                loop_idx, templ_block, fixed_block),
                             verbosity=verbosity)

            if templ_block[0] == 'equal':
                if fixed_block[0] == 'equal':
                    # No templating, no fixes, go with middle and advance indexes
                    # Find out how far we can advance (we use the middle version because it's common)
                    if templ_block[4] == fixed_block[2]:
                        buff = self.file_mask[1][idx[1]:fixed_block[2]]
                        # consume both blocks
                        fixed_block = None
                        templ_block = None
                    elif templ_block[4] > fixed_block[2]:
                        buff = self.file_mask[1][idx[1]:fixed_block[2]]
                        # consume fixed block
                        fixed_block = None
                    elif templ_block[4] < fixed_block[2]:
                        buff = self.file_mask[1][idx[1]:templ_block[4]]
                        # consume templ block
                        templ_block = None
                    idx = (idx[0] + len(buff), idx[1] + len(buff),
                           idx[2] + len(buff))
                    write_buff += buff
                    continue
                elif fixed_block[0] == 'replace':
                    # Consider how to apply fixes.
                    # Can we implement the fix while staying in the equal segment?
                    if fixed_block[2] <= templ_block[4]:
                        # Yes! Write from the fixed version.
                        write_buff += self.file_mask[2][idx[2]:fixed_block[4]]
                        idx = (idx[0] + (fixed_block[2] - fixed_block[1]),
                               fixed_block[2], fixed_block[4])
                        # Consume the fixed block because we've written the whole thing.
                        fixed_block = None
                        continue
                    else:
                        raise NotImplementedError("DEF")
                elif fixed_block[0] == 'delete':
                    # We're deleting items, nothing to write but we can consume some
                    # blocks and advance some indexes.
                    idx = (idx[0] + (fixed_block[2] - fixed_block[1]),
                           fixed_block[2], fixed_block[4])
                    fixed_block = None
                elif fixed_block[0] == 'insert':
                    # We're inserting items, Write from the fix block, but only that index moves.
                    write_buff += self.file_mask[2][idx[2]:fixed_block[4]]
                    idx = (idx[0], idx[1], fixed_block[4])
                    fixed_block = None
                else:
                    raise NotImplementedError((
                        "Unexpected opcode {0} for fix block! Please report this "
                        "issue on github with the query and rules you're trying to "
                        "fix.").format(fixed_block[0]))
            elif templ_block[0] == 'replace':
                # We're in a templated section - we should write the templated version.
                # we should consume the whole replace block and then deal with where
                # we end up.
                buff = self.file_mask[0][idx[0]:templ_block[2]]
                new_templ_idx = templ_block[4]

                # Fast forward through fix blocks until we catch up. We're not implementing
                # any changes in a templated section.
                while True:
                    if fixed_block[2] > new_templ_idx >= fixed_block[1]:
                        # this block contains the end point
                        break
                    else:
                        # We're not at the end point yet, continue to fast forward through.
                        if fixed_block[0] != 'equal':
                            print("WARNING: Skipping edit block: {0}".format(
                                fixed_block))
                        if diff_fix_codes:
                            fixed_block = diff_fix_codes.pop(0)
                        else:
                            raise NotImplementedError(
                                "Unexpectedly depleted the fixes. Panic!")
                # Are we exactly on a join?
                if new_templ_idx == fixed_block[1]:
                    # GREAT - this makes things easy because we have an equality point already
                    idx = (templ_block[2], new_templ_idx, fixed_block[3])
                else:
                    if fixed_block[0] == 'equal':
                        # If it's in an equal block, we can use the same offset from the end.
                        idx = (templ_block[2], new_templ_idx, fixed_block[3] +
                               (new_templ_idx - fixed_block[1]))
                    else:
                        # TODO: We're trying to move through an templated section, but end up
                        # in a fixed section. We've lost track of indexes.
                        # We might need to panic if this happens...
                        print("UMMMMMM!")
                        print(new_templ_idx)
                        print(fixed_block)
                        raise NotImplementedError("ABC")
                write_buff += buff
                # consume template block
                templ_block = None
            elif templ_block[0] == 'delete':
                # The comparison, things that the templater has deleted
                # some characters. This is just a quirk of the differ.
                # In reality this means we just write these characters
                # and don't worry about advancing the other indexes.
                buff = self.file_mask[0][idx[0]:templ_block[2]]
                # consume templ block
                templ_block = None
                idx = (idx[0] + len(buff), idx[1], idx[2])
                write_buff += buff
            elif templ_block[0] == 'insert':
                # The templater has inserted something here. We don't need
                # to write anything here (because whatever we're looking at
                # was inserted by the templater), but we do need to keep
                # track of what happened to the rest of the section we're in.
                # If nothing was fixed then it's easy because the indices
                # will be the same. Otherwise... great question...

                # For now let's just deal with the happy case where the fixed
                # block is equal
                if fixed_block[0] == 'equal':
                    # Let's make sure we can consume enough to get through the
                    # templ block and not get to the end of the fix block.
                    if templ_block[4] <= fixed_block[2]:
                        insert_len = templ_block[4] - templ_block[3]
                        idx = (idx[0], idx[1] + insert_len,
                               idx[2] + insert_len)
                        # if things matched up perfectly, consume the fixed block
                        if templ_block[4] == fixed_block[2]:
                            fixed_block = None
                        # always consume templ block in this case
                        templ_block = None
                    else:
                        raise NotImplementedError((
                            "Unexpected scenario during insert opcode! Please report "
                            "this issue on github with the query and rules you're trying "
                            "to fix."))
                else:
                    raise NotImplementedError((
                        "Unexpected opcode {0} for fix block! Please report this "
                        "issue on github with the query and rules you're trying to "
                        "fix.").format(fixed_block[0]))
            else:
                raise NotImplementedError((
                    "Unexpected opcode {0} for template block! Please report this "
                    "issue on github with the query and rules you're trying to "
                    "fix.").format(templ_block[0]))

        bencher("fix_string: Fixing loop done")
        # The success metric here is whether anything ACTUALLY changed.
        return write_buff, write_buff != self.file_mask[0]
Esempio n. 16
0
def parse(path,
          code_only,
          format,
          profiler,
          bench,
          nofail,
          logger=None,
          **kwargs):
    """Parse SQL files and just spit out the result.

    PATH is the path to a sql file or directory to lint. This can be either a
    file ('path/to/file.sql'), a path ('directory/of/sql/files'), a single ('-')
    character to indicate reading from *stdin* or a dot/blank ('.'/' ') which will
    be interpreted like passing the current working directory as a path argument.
    """
    # Initialise the benchmarker
    bencher = BenchIt()  # starts the timer
    c = get_config(**kwargs)
    # We don't want anything else to be logged if we want a yaml output
    lnt, formatter = get_linter_and_formatter(c,
                                              silent=format
                                              in ("json", "yaml"))
    verbose = c.get("verbose")
    recurse = c.get("recurse")

    formatter.dispatch_config(lnt)

    # Set up logging.
    set_logging_level(verbosity=verbose, logger=logger)

    # TODO: do this better
    nv = 0
    if profiler:
        # Set up the profiler if required
        try:
            import cProfile
        except ImportError:
            click.echo("The cProfiler is not available on your platform.")
            sys.exit(1)
        pr = cProfile.Profile()
        pr.enable()

    bencher("Parse setup")
    try:
        # handle stdin if specified via lone '-'
        if "-" == path:
            # put the parser result in a list to iterate later
            config = lnt.config.make_child_from_path("stdin")
            result = [(
                # TODO: Remove verbose
                *lnt.parse_string(
                    sys.stdin.read(), "stdin", recurse=recurse, config=config),
                config,
            )]
        else:
            # A single path must be specified for this command
            # TODO: Remove verbose
            result = lnt.parse_path(path, recurse=recurse)

        # iterative print for human readout
        if format == "human":
            for parsed, violations, time_dict, f_cfg in result:
                if parsed:
                    click.echo(parsed.stringify(code_only=code_only))
                else:
                    # TODO: Make this prettier
                    click.echo("...Failed to Parse...")
                nv += len(violations)
                if violations:
                    click.echo("==== parsing violations ====")
                for v in violations:
                    click.echo(format_violation(v))
                if violations and f_cfg.get("dialect") == "ansi":
                    click.echo(format_dialect_warning())
                if verbose >= 2:
                    click.echo("==== timings ====")
                    click.echo(cli_table(time_dict.items()))
                bencher("Output details for file")
        else:
            # collect result and print as single payload
            # will need to zip in the file paths
            filepaths = ["stdin"] if "-" == path else lnt.paths_from_path(path)
            result = [
                dict(
                    filepath=filepath,
                    segments=parsed.as_record(code_only=code_only,
                                              show_raw=True),
                ) for filepath, (parsed, _, _, _) in zip(filepaths, result)
            ]

            if format == "yaml":
                # For yaml dumping always dump double quoted strings if they contain tabs or newlines.
                def quoted_presenter(dumper, data):
                    """Representer which always double quotes string values needing escapes."""
                    if "\n" in data or "\t" in data or "'" in data:
                        return dumper.represent_scalar("tag:yaml.org,2002:str",
                                                       data,
                                                       style='"')
                    else:
                        return dumper.represent_scalar("tag:yaml.org,2002:str",
                                                       data,
                                                       style="")

                yaml.add_representer(str, quoted_presenter)

                click.echo(yaml.dump(result))
            elif format == "json":
                click.echo(json.dumps(result))
    except IOError:
        click.echo(
            colorize(
                "The path {0!r} could not be accessed. Check it exists.".
                format(path),
                "red",
            ))
        sys.exit(1)

    if profiler:
        pr.disable()
        profiler_buffer = StringIO()
        ps = pstats.Stats(pr, stream=profiler_buffer).sort_stats("cumulative")
        ps.print_stats()
        click.echo("==== profiler stats ====")
        # Only print the first 50 lines of it
        click.echo("\n".join(profiler_buffer.getvalue().split("\n")[:50]))

    if bench:
        click.echo("\n\n==== bencher stats ====")
        bencher.display()

    if nv > 0 and not nofail:
        sys.exit(66)
    else:
        sys.exit(0)
Esempio n. 17
0
    def fix_string(self):
        """Obtain the changes to a path as a string.

        We use the source mapping features of TemplatedFile
        to generate a list of "patches" which cover the non
        templated parts of the file and refer back to the locations
        in the original file.

        NB: This is MUCH FASTER than the original approach
        using difflib in pre 0.4.0.

        There is an important distinction here between Slices and
        Segments. A Slice is a portion of a file which is determined
        by the templater based on which portions of the source file
        are templated or not, and therefore before Lexing and so is
        completely dialect agnostic. A Segment is determined by the
        Lexer from portions of strings after templating.
        """
        bencher = BenchIt()
        bencher("fix_string: start")

        linter_logger.debug("Original Tree: %r",
                            self.templated_file.templated_str)
        linter_logger.debug("Fixed Tree: %r", self.tree.raw)

        # The sliced file is contigious in the TEMPLATED space.
        # NB: It has gaps and repeats in the source space.
        # It's also not the FIXED file either.
        linter_logger.debug("### Templated File.")
        for idx, file_slice in enumerate(self.templated_file.sliced_file):
            t_str = self.templated_file.templated_str[
                file_slice.templated_slice]
            s_str = self.templated_file.source_str[file_slice.source_slice]
            if t_str == s_str:
                linter_logger.debug("    File slice: %s %r [invariant]", idx,
                                    file_slice)
            else:
                linter_logger.debug("    File slice: %s %r", idx, file_slice)
                linter_logger.debug("    \t\t\ttemplated: %r\tsource: %r",
                                    t_str, s_str)

        original_source = self.templated_file.source_str

        # Make sure no patches overlap and divide up the source file into slices.
        # Any Template tags in the source file are off limits.
        source_only_slices = self.templated_file.source_only_slices()

        linter_logger.debug("Source-only slices: %s", source_only_slices)

        # Iterate patches, filtering and translating as we go:
        linter_logger.debug("### Beginning Patch Iteration.")
        filtered_source_patches = []
        dedupe_buffer = []
        # We use enumerate so that we get an index for each patch. This is entirely
        # so when debugging logs we can find a given patch again!
        for idx, patch in enumerate(
                self.tree.iter_patches(
                    templated_str=self.templated_file.templated_str)):
            linter_logger.debug("  %s Yielded patch: %s", idx, patch)

            # This next bit is ALL FOR LOGGING AND DEBUGGING
            if patch.templated_slice.start >= 10:
                pre_hint = self.templated_file.templated_str[
                    patch.templated_slice.start -
                    10:patch.templated_slice.start]
            else:
                pre_hint = self.templated_file.templated_str[:patch.
                                                             templated_slice.
                                                             start]
            if patch.templated_slice.stop + 10 < len(
                    self.templated_file.templated_str):
                post_hint = self.templated_file.templated_str[
                    patch.templated_slice.stop:patch.templated_slice.stop + 10]
            else:
                post_hint = self.templated_file.templated_str[
                    patch.templated_slice.stop:]
            linter_logger.debug("        Templated Hint: ...%r <> %r...",
                                pre_hint, post_hint)

            # Attempt to convert to source space.
            try:
                source_slice = self.templated_file.templated_slice_to_source_slice(
                    patch.templated_slice, )
            except ValueError:
                linter_logger.info(
                    "      - Skipping. Source space Value Error. i.e. attempted insertion within templated section."
                )
                # If we try and slice within a templated section, then we may fail
                # in which case, we should skip this patch.
                continue

            # Check for duplicates
            dedupe_tuple = (source_slice, patch.fixed_raw)
            if dedupe_tuple in dedupe_buffer:
                linter_logger.info(
                    "      - Skipping. Source space Duplicate: %s",
                    dedupe_tuple)
                continue

            # We now evaluate patches in the source-space for whether they overlap
            # or disrupt any templated sections.
            # The intent here is that unless explicity stated, a fix should never
            # disrupt a templated section.
            # NOTE: We rely here on the patches being sorted.
            # TODO: Implement a mechanism for doing templated section fixes. For
            # now it's just not allowed.

            # Get the affected raw slices.
            local_raw_slices = self.templated_file.raw_slices_spanning_source_slice(
                source_slice)
            local_type_list = [slc.slice_type for slc in local_raw_slices]

            enriched_patch = EnrichedFixPatch(
                source_slice=source_slice,
                templated_slice=patch.templated_slice,
                patch_type=patch.patch_type,
                fixed_raw=patch.fixed_raw,
                templated_str=self.templated_file.templated_str[
                    patch.templated_slice],
                source_str=self.templated_file.source_str[source_slice],
            )

            # Deal with the easy case of only literals
            if set(local_type_list) == {"literal"}:
                linter_logger.info(
                    "      * Keeping patch on literal-only section: %s",
                    enriched_patch)
                filtered_source_patches.append(enriched_patch)
                dedupe_buffer.append(enriched_patch.dedupe_tuple())
            # Is it a zero length pathch.
            elif (enriched_patch.source_slice.start
                  == enriched_patch.source_slice.stop
                  and enriched_patch.source_slice.start
                  == local_raw_slices[0].source_idx):
                linter_logger.info(
                    "      * Keeping insertion patch on slice boundary: %s",
                    enriched_patch,
                )
                filtered_source_patches.append(enriched_patch)
                dedupe_buffer.append(enriched_patch.dedupe_tuple())
            # If it's ONLY templated then we should skip it.
            elif "literal" not in local_type_list:
                linter_logger.info(
                    "      - Skipping patch over templated section: %s",
                    enriched_patch)
            # If we span more than two slices then we should just skip it. Too Hard.
            elif len(local_raw_slices) > 2:
                linter_logger.info(
                    "      - Skipping patch over more than two raw slices: %s",
                    enriched_patch,
                )
            # If it's an insertion (i.e. the string in the pre-fix template is '') then we
            # won't be able to place it, so skip.
            elif not enriched_patch.templated_str:
                linter_logger.info(
                    "      - Skipping insertion patch in templated section: %s",
                    enriched_patch,
                )
            # If the string from the templated version isn't in the source, then we can't fix it.
            elif enriched_patch.templated_str not in enriched_patch.source_str:
                linter_logger.info(
                    "      - Skipping edit patch on templated content: %s",
                    enriched_patch,
                )
            else:
                # Identify all the places the string appears in the source content.
                positions = list(
                    findall(enriched_patch.templated_str,
                            enriched_patch.source_str))
                if len(positions) != 1:
                    linter_logger.debug(
                        "        - Skipping edit patch on non-unique templated content: %s",
                        enriched_patch,
                    )
                    continue
                # We have a single occurances of the thing we want to patch. This
                # means we can use it's position to place our patch.
                new_source_slice = slice(
                    enriched_patch.source_slice.start + positions[0],
                    enriched_patch.source_slice.start + positions[0] +
                    len(enriched_patch.templated_str),
                )
                enriched_patch = EnrichedFixPatch(
                    source_slice=new_source_slice,
                    templated_slice=enriched_patch.templated_slice,
                    patch_type=enriched_patch.patch_type,
                    fixed_raw=enriched_patch.fixed_raw,
                    templated_str=enriched_patch.templated_str,
                    source_str=enriched_patch.source_str,
                )
                linter_logger.debug(
                    "      * Keeping Tricky Case. Positions: %s, New Slice: %s, Patch: %s",
                    positions,
                    new_source_slice,
                    enriched_patch,
                )
                filtered_source_patches.append(enriched_patch)
                dedupe_buffer.append(enriched_patch.dedupe_tuple())
                continue

        # Sort the patches before building up the file.
        filtered_source_patches = sorted(filtered_source_patches,
                                         key=lambda x: x.source_slice.start)
        # We now slice up the file using the patches and any source only slices.
        # This gives us regions to apply changes to.
        slice_buff = []
        source_idx = 0
        for patch in filtered_source_patches:
            # Are there templated slices at or before the start of this patch?
            while (source_only_slices and source_only_slices[0].source_idx <
                   patch.source_slice.start):
                next_so_slice = source_only_slices.pop(0).source_slice()
                # Add a pre-slice before the next templated slices if needed.
                if next_so_slice.start > source_idx:
                    slice_buff.append(slice(source_idx, next_so_slice.start))
                # Add the templated slice.
                slice_buff.append(next_so_slice)
                source_idx = next_so_slice.stop

            # Is there a gap between current position and this patch?
            if patch.source_slice.start > source_idx:
                # Add a slice up to this patch.
                slice_buff.append(slice(source_idx, patch.source_slice.start))

            # Is this patch covering an area we've already covered?
            if patch.source_slice.start < source_idx:
                linter_logger.info(
                    "Skipping overlapping patch at Index %s, Patch: %s",
                    source_idx,
                    patch,
                )
                # Ignore the patch for now...
                continue

            # Add this patch.
            slice_buff.append(patch.source_slice)
            source_idx = patch.source_slice.stop
        # Add a tail slice.
        if source_idx < len(self.templated_file.source_str):
            slice_buff.append(
                slice(source_idx, len(self.templated_file.source_str)))

        linter_logger.debug("Final slice buffer: %s", slice_buff)

        # Iterate through the patches, building up the new string.
        str_buff = ""
        for source_slice in slice_buff:
            # Is it one in the patch buffer:
            for patch in filtered_source_patches:
                if patch.source_slice == source_slice:
                    # Use the patched version
                    linter_logger.debug(
                        "%-30s    %s    %r > %r",
                        "Appending {} Patch:".format(patch.patch_type),
                        patch.source_slice,
                        patch.source_str,
                        patch.fixed_raw,
                    )
                    str_buff += patch.fixed_raw
                    break
            else:
                # Use the raw string
                linter_logger.debug(
                    "Appending Raw:                    %s     %r",
                    source_slice,
                    self.templated_file.source_str[source_slice],
                )
                str_buff += self.templated_file.source_str[source_slice]

        bencher("fix_string: Fixing loop done")
        # The success metric here is whether anything ACTUALLY changed.
        return str_buff, str_buff != original_source