Esempio n. 1
0
    def load_contents(cls, src_path):
        try:
            with openpy(GitPathTool.relative_path(src_path)) as src_file:
                contents = src_file.read()
        except (SyntaxError, UnicodeDecodeError):
            # this tool was originally written with python in mind.
            # for processing non python files encoded in anything other than ascii or utf-8 that
            # code wont work
            with open(GitPathTool.relative_path(src_path), "rb") as src_file:
                contents = src_file.read()

        if isinstance(contents, bytes):
            encoding = chardet.detect(contents).get("encoding", "utf-8")
            try:
                contents = contents.decode(encoding)
            except UnicodeDecodeError:
                pass

        if isinstance(contents, bytes):
            # We failed to decode the file.
            # if this is happening a lot I should just bite the bullet
            # and write a parameter to let people list their file encodings
            print(
                "Warning: I was not able to decode your src file. "
                "I can continue but code snippets in the final report may look wrong"
            )
            contents = contents.decode("utf-8", "replace")
        return contents
Esempio n. 2
0
    def load_snippets(cls, src_path, violation_lines):
        """
        Load snippets from the file at `src_path` to show
        violations on lines in the list `violation_lines`
        (list of line numbers, starting at index 0).

        The file at `src_path` should be a text file (not binary).

        Returns a list of `Snippet` instances.

        Raises an `IOError` if the file could not be loaded.
        """
        # Load the contents of the file
        with openpy(GitPathTool.relative_path(src_path)) as src_file:
            contents = src_file.read()

        # Convert the source file to unicode (Python < 3)
        if isinstance(contents, bytes):
            contents = contents.decode("utf-8", "replace")

        # Construct a list of snippet ranges
        src_lines = contents.split("\n")
        snippet_ranges = cls._snippet_ranges(len(src_lines), violation_lines)

        # Parse the source into tokens
        token_stream = cls._parse_src(contents, src_path)

        # Group the tokens by snippet
        token_groups = cls._group_tokens(token_stream, snippet_ranges)

        return [
            Snippet(tokens, src_path, start, violation_lines)
            for (start, _), tokens in sorted(token_groups.items())
        ]
Esempio n. 3
0
    def load_snippets(cls, src_path, violation_lines):
        """
        Load snippets from the file at `src_path` to show
        violations on lines in the list `violation_lines`
        (list of line numbers, starting at index 0).

        The file at `src_path` should be a text file (not binary).

        Returns a list of `Snippet` instances.

        Raises an `IOError` if the file could not be loaded.
        """
        # Load the contents of the file
        with openpy(GitPathTool.relative_path(src_path)) as src_file:
            contents = src_file.read()

        # Convert the source file to unicode (Python < 3)
        if isinstance(contents, six.binary_type):
            contents = contents.decode('utf-8', 'replace')

        # Construct a list of snippet ranges
        src_lines = contents.split('\n')
        snippet_ranges = cls._snippet_ranges(len(src_lines), violation_lines)

        # Parse the source into tokens
        token_stream = cls._parse_src(contents, src_path)

        # Group the tokens by snippet
        token_groups = cls._group_tokens(token_stream, snippet_ranges)

        return [
            Snippet(tokens, src_path, start, violation_lines)
            for (start, _), tokens in sorted(token_groups.items())
        ]