def load_contents(cls, src_path): try: with openpy(GitPathTool.relative_path(src_path)) as src_file: contents = src_file.read() except (SyntaxError, UnicodeDecodeError): # this tool was originally written with python in mind. # for processing non python files encoded in anything other than ascii or utf-8 that # code wont work with open(GitPathTool.relative_path(src_path), "rb") as src_file: contents = src_file.read() if isinstance(contents, bytes): encoding = chardet.detect(contents).get("encoding", "utf-8") try: contents = contents.decode(encoding) except UnicodeDecodeError: pass if isinstance(contents, bytes): # We failed to decode the file. # if this is happening a lot I should just bite the bullet # and write a parameter to let people list their file encodings print( "Warning: I was not able to decode your src file. " "I can continue but code snippets in the final report may look wrong" ) contents = contents.decode("utf-8", "replace") return contents
def load_snippets(cls, src_path, violation_lines): """ Load snippets from the file at `src_path` to show violations on lines in the list `violation_lines` (list of line numbers, starting at index 0). The file at `src_path` should be a text file (not binary). Returns a list of `Snippet` instances. Raises an `IOError` if the file could not be loaded. """ # Load the contents of the file with openpy(GitPathTool.relative_path(src_path)) as src_file: contents = src_file.read() # Convert the source file to unicode (Python < 3) if isinstance(contents, bytes): contents = contents.decode("utf-8", "replace") # Construct a list of snippet ranges src_lines = contents.split("\n") snippet_ranges = cls._snippet_ranges(len(src_lines), violation_lines) # Parse the source into tokens token_stream = cls._parse_src(contents, src_path) # Group the tokens by snippet token_groups = cls._group_tokens(token_stream, snippet_ranges) return [ Snippet(tokens, src_path, start, violation_lines) for (start, _), tokens in sorted(token_groups.items()) ]
def load_snippets(cls, src_path, violation_lines): """ Load snippets from the file at `src_path` to show violations on lines in the list `violation_lines` (list of line numbers, starting at index 0). The file at `src_path` should be a text file (not binary). Returns a list of `Snippet` instances. Raises an `IOError` if the file could not be loaded. """ # Load the contents of the file with openpy(GitPathTool.relative_path(src_path)) as src_file: contents = src_file.read() # Convert the source file to unicode (Python < 3) if isinstance(contents, six.binary_type): contents = contents.decode('utf-8', 'replace') # Construct a list of snippet ranges src_lines = contents.split('\n') snippet_ranges = cls._snippet_ranges(len(src_lines), violation_lines) # Parse the source into tokens token_stream = cls._parse_src(contents, src_path) # Group the tokens by snippet token_groups = cls._group_tokens(token_stream, snippet_ranges) return [ Snippet(tokens, src_path, start, violation_lines) for (start, _), tokens in sorted(token_groups.items()) ]