def load_contents(cls, src_path): try: with openpy(GitPathTool.relative_path(src_path)) as src_file: contents = src_file.read() except (SyntaxError, UnicodeDecodeError): # this tool was originally written with python in mind. # for processing non python files encoded in anything other than ascii or utf-8 that # code wont work with open(GitPathTool.relative_path(src_path), "rb") as src_file: contents = src_file.read() if isinstance(contents, bytes): encoding = chardet.detect(contents).get("encoding", "utf-8") try: contents = contents.decode(encoding) except UnicodeDecodeError: pass if isinstance(contents, bytes): # We failed to decode the file. # if this is happening a lot I should just bite the bullet # and write a parameter to let people list their file encodings print( "Warning: I was not able to decode your src file. " "I can continue but code snippets in the final report may look wrong" ) contents = contents.decode("utf-8", "replace") return contents
def _get_src_path_line_nodes(self, xml_document, src_path): """ Returns a list of nodes containing line information for `src_path` in `xml_document`. If file is not present in `xml_document`, return None """ # Remove git_root from src_path for searching the correct filename # If cwd is `/home/user/work/diff-cover/diff_cover` # and src_path is `diff_cover/violations_reporter.py` # search for `violations_reporter.py` src_rel_path = GitPathTool.relative_path(src_path) # If cwd is `/home/user/work/diff-cover/diff_cover` # and src_path is `other_package/some_file.py` # search for `/home/user/work/diff-cover/other_package/some_file.py` src_abs_path = GitPathTool.absolute_path(src_path) xpath = ".//class" classes = [ class_tree for class_tree in xml_document.findall(xpath) or [] ] classes = ([ clazz for clazz in classes if clazz.get('filename') == src_abs_path ] or [ clazz for clazz in classes if clazz.get('filename') == src_rel_path ]) if not classes: return None lines = [clazz.findall('./lines/line') for clazz in classes] return [elem for elem in itertools.chain(*lines)]
def parse_reports(self, reports): """ Args: reports: list[str] - output from the report Return: A dict[Str:Violation] Violation is a simple named tuple Defined above """ violations_dict = defaultdict(list) for report in reports: xml_document = etree.fromstring("".join(report)) bugs = xml_document.findall(".//BugInstance") for bug in bugs: category = bug.get('category') short_message = bug.find('ShortMessage').text line = bug.find('SourceLine') if line.get('start') is None or line.get('end') is None: continue start = int(line.get('start')) end = int(line.get('end')) for line_number in range(start, end + 1): error_str = "{}: {}".format(category, short_message) violation = Violation(line_number, error_str) filename = GitPathTool.relative_path( line.get('sourcepath')) violations_dict[filename].append(violation) return violations_dict
def _get_src_path_line_nodes(self, xml_document, src_path): """ Returns a list of nodes containing line information for `src_path` in `xml_document`. If file is not present in `xml_document`, return None """ # Remove git_root from src_path for searching the correct filename # If cwd is `/home/user/work/diff-cover/diff_cover` # and src_path is `diff_cover/violations_reporter.py` # search for `violations_reporter.py` src_rel_path = GitPathTool.relative_path(src_path) # If cwd is `/home/user/work/diff-cover/diff_cover` # and src_path is `other_package/some_file.py` # search for `/home/user/work/diff-cover/other_package/some_file.py` src_abs_path = GitPathTool.absolute_path(src_path) xpath = ".//class" classes = [class_tree for class_tree in xml_document.findall(xpath) or []] classes = ([clazz for clazz in classes if clazz.get('filename') == src_abs_path] or [clazz for clazz in classes if clazz.get('filename') == src_rel_path]) if not classes: return None lines = [clazz.findall('./lines/line') for clazz in classes] return [elem for elem in itertools.chain(*lines)]
def _get_src_path_line_nodes(self, xml_document, src_path): """ Returns a list of nodes containing line information for `src_path` in `xml_document`. If file is not present in `xml_document`, return None """ # Remove git_root from src_path for searching the correct filename # If cwd is `/home/user/work/diff-cover/diff_cover` # and src_path is `diff_cover/violations_reporter.py` # search for `violations_reporter.py` src_rel_path = GitPathTool.relative_path(src_path) # If cwd is `/home/user/work/diff-cover/diff_cover` # and src_path is `other_package/some_file.py` # search for `/home/user/work/diff-cover/other_package/some_file.py` src_abs_path = GitPathTool.absolute_path(src_path) xpath_template = ".//class[@filename='{0}']/lines/line" xpath = None src_node_xpath = ".//class[@filename='{0}']".format(src_rel_path) if xml_document.find(src_node_xpath) is not None: xpath = xpath_template.format(src_rel_path) src_node_xpath = ".//class[@filename='{0}']".format(src_abs_path) if xml_document.find(src_node_xpath) is not None: xpath = xpath_template.format(src_abs_path) if xpath is None: return None return xml_document.findall(xpath)
def load_snippets(cls, src_path, violation_lines): """ Load snippets from the file at `src_path` to show violations on lines in the list `violation_lines` (list of line numbers, starting at index 0). The file at `src_path` should be a text file (not binary). Returns a list of `Snippet` instances. Raises an `IOError` if the file could not be loaded. """ # Load the contents of the file with openpy(GitPathTool.relative_path(src_path)) as src_file: contents = src_file.read() # Convert the source file to unicode (Python < 3) if isinstance(contents, bytes): contents = contents.decode("utf-8", "replace") # Construct a list of snippet ranges src_lines = contents.split("\n") snippet_ranges = cls._snippet_ranges(len(src_lines), violation_lines) # Parse the source into tokens token_stream = cls._parse_src(contents, src_path) # Group the tokens by snippet token_groups = cls._group_tokens(token_stream, snippet_ranges) return [ Snippet(tokens, src_path, start, violation_lines) for (start, _), tokens in sorted(token_groups.items()) ]
def load_snippets(cls, src_path, violation_lines): """ Load snippets from the file at `src_path` to show violations on lines in the list `violation_lines` (list of line numbers, starting at index 0). The file at `src_path` should be a text file (not binary). Returns a list of `Snippet` instances. Raises an `IOError` if the file could not be loaded. """ # Load the contents of the file with openpy(GitPathTool.relative_path(src_path)) as src_file: contents = src_file.read() # Convert the source file to unicode (Python < 3) if isinstance(contents, six.binary_type): contents = contents.decode('utf-8', 'replace') # Construct a list of snippet ranges src_lines = contents.split('\n') snippet_ranges = cls._snippet_ranges(len(src_lines), violation_lines) # Parse the source into tokens token_stream = cls._parse_src(contents, src_path) # Group the tokens by snippet token_groups = cls._group_tokens(token_stream, snippet_ranges) return [ Snippet(tokens, src_path, start, violation_lines) for (start, _), tokens in sorted(token_groups.items()) ]
def parse_reports(self, reports): """ Args: reports: list[str] - output from the report Return: A dict[Str:Violation] Violation is a simple named tuple Defined above """ violations_dict = defaultdict(list) for report in reports: xml_document = cElementTree.fromstring("".join(report)) bugs = xml_document.findall(".//BugInstance") for bug in bugs: category = bug.get('category') short_message = bug.find('ShortMessage').text line = bug.find('SourceLine') if line.get('start') is None or line.get('end') is None: continue start = int(line.get('start')) end = int(line.get('end')) for line_number in range(start, end+1): error_str = u"{0}: {1}".format(category, short_message) violation = Violation(line_number, error_str) filename = GitPathTool.relative_path( line.get('sourcepath')) violations_dict[filename].append(violation) return violations_dict
def _measured_source_path_matches(self, package_name, file_name, src_path): # find src_path in any of the source roots for root in self._src_roots: if os.path.normcase( GitPathTool.relative_path( os.path.join( root, package_name, file_name))) == os.path.normcase(src_path): return True
def test_relative_path(self): self._set_git_root(b'/home/user/work/diff-cover') expected = 'violations_reporter.py' cwd = '/home/user/work/diff-cover/diff_cover' GitPathTool.set_cwd(cwd) path = GitPathTool.relative_path('diff_cover/violations_reporter.py') # Expect relative path from diff_cover self.assertEqual(path, expected)
def test_relative_path(self): self._set_git_root(b"/home/user/work/diff-cover") expected = "violations_reporter.py" cwd = "/home/user/work/diff-cover/diff_cover" GitPathTool.set_cwd(cwd) path = GitPathTool.relative_path("diff_cover/violations_reporter.py") # Expect relative path from diff_cover self.assertEqual(path, expected)
def test_relative_path(self): self._set_git_root(b'/home/user/work/diff-cover') expected = 'violations_reporter.py' cwd = '/home/user/work/diff-cover/diff_cover' GitPathTool.set_cwd(cwd) path = GitPathTool.relative_path('diff_cover/violations_reporter.py') # Expect relative path from diff_cover self.assertEqual(path, expected)
def _get_classes(self, xml_document, src_path): """ Given a path and parsed xml_document provides class nodes with the relevant lines First, we look to see if xml_document contains a source node providing paths to search for If we don't have that we check each nodes filename attribute matches an absolute path Finally, if we found no nodes, we check the filename attribute for the relative path """ # Remove git_root from src_path for searching the correct filename # If cwd is `/home/user/work/diff-cover/diff_cover` # and src_path is `diff_cover/violations_reporter.py` # search for `violations_reporter.py` src_rel_path = self._to_unix_path(GitPathTool.relative_path(src_path)) # If cwd is `/home/user/work/diff-cover/diff_cover` # and src_path is `other_package/some_file.py` # search for `/home/user/work/diff-cover/other_package/some_file.py` src_abs_path = self._to_unix_path(GitPathTool.absolute_path(src_path)) # cobertura sometimes provides the sources for the measurements # within it. If we have that we outta use it sources = xml_document.findall("sources/source") sources = [source.text for source in sources if source.text] classes = [class_tree for class_tree in xml_document.findall(".//class") or []] classes = ( [ clazz for clazz in classes if src_abs_path in [ self._to_unix_path( os.path.join(source.strip(), clazz.get("filename")) ) for source in sources ] ] or [ clazz for clazz in classes if self._to_unix_path(clazz.get("filename")) == src_abs_path ] or [ clazz for clazz in classes if self._to_unix_path(clazz.get("filename")) == src_rel_path ] ) return classes
def test_relative_path(process): process.communicate.return_value = (b"/home/user/work/diff-cover", b"") expected = "violations_reporter.py" cwd = "/home/user/work/diff-cover/diff_cover" GitPathTool.set_cwd(cwd) path = GitPathTool.relative_path("diff_cover/violations_reporter.py") # Expect relative path from diff_cover assert path == expected
def _get_classes(self, xml_document, src_path): """ Given a path and parsed xml_document provides class nodes with the relevant lines First, we look to see if xml_document contains a source node providing paths to search for If we don't have that we check each nodes filename attribute matches an absolute path Finally, if we found no nodes, we check the filename attribute for the relative path """ # Remove git_root from src_path for searching the correct filename # If cwd is `/home/user/work/diff-cover/diff_cover` # and src_path is `diff_cover/violations_reporter.py` # search for `violations_reporter.py` src_rel_path = self._to_unix_path(GitPathTool.relative_path(src_path)) # If cwd is `/home/user/work/diff-cover/diff_cover` # and src_path is `other_package/some_file.py` # search for `/home/user/work/diff-cover/other_package/some_file.py` src_abs_path = self._to_unix_path(GitPathTool.absolute_path(src_path)) # cobertura sometimes provides the sources for the measurements # within it. If we have that we outta use it sources = xml_document.findall('sources/source') sources = [source.text for source in sources] classes = [class_tree for class_tree in xml_document.findall(".//class") or []] classes = ( [clazz for clazz in classes if src_abs_path in [ self._to_unix_path( os.path.join( source, clazz.get('filename') ) ) for source in sources]] or [clazz for clazz in classes if self._to_unix_path(clazz.get('filename')) == src_abs_path] or [clazz for clazz in classes if self._to_unix_path(clazz.get('filename')) == src_rel_path] ) return classes
def _get_src_path_line_nodes(xml_document, src_path): """ Return a list of nodes containing line information for `src_path` in `xml_document`. If file is not present in `xml_document`, return None """ files = [file_tree for file_tree in xml_document.findall(".//file") if GitPathTool.relative_path(file_tree.get('path')) == src_path or []] if not files: return None lines = [file_tree.findall('./line[@type="stmt"]') for file_tree in files] return [elem for elem in itertools.chain(*lines)]
def _get_src_path_line_nodes(xml_document, src_path): """ Return a list of nodes containing line information for `src_path` in `xml_document`. If file is not present in `xml_document`, return None """ files = [ file_tree for file_tree in xml_document.findall(".//file") if GitPathTool.relative_path(file_tree.get('path')) == src_path or [] ] if not files: return None lines = [ file_tree.findall('./line[@type="stmt"]') for file_tree in files ] return [elem for elem in itertools.chain(*lines)]
def get_src_path_line_nodes_clover(xml_document, src_path): """ Return a list of nodes containing line information for `src_path` in `xml_document`. If file is not present in `xml_document`, return None """ files = [ file_tree for file_tree in xml_document.findall(".//file") if GitPathTool.relative_path(file_tree.get("path")) == src_path ] if not files: return None lines = [] for file_tree in files: lines.append(file_tree.findall('./line[@type="stmt"]')) lines.append(file_tree.findall('./line[@type="cond"]')) return list(itertools.chain(*lines))
def parse_reports(self, reports): """ Args: reports: list[str] - output from the report Return: A dict[Str:Violation] Violation is a simple named tuple Defined above """ violations_dict = defaultdict(list) for report in reports: xml_document = etree.fromstring("".join(report)) files = xml_document.findall(".//file") for file_tree in files: for error in file_tree.findall('error'): line_number = error.get('line') error_str = "{}: {}".format(error.get('severity'), error.get('message')) violation = Violation(int(line_number), error_str) filename = GitPathTool.relative_path(file_tree.get('name')) violations_dict[filename].append(violation) return violations_dict
def parse_reports(self, reports): """ Args: reports: list[str] - output from the report Return: A dict[Str:Violation] Violation is a simple named tuple Defined above """ violations_dict = defaultdict(list) for report in reports: xml_document = cElementTree.fromstring("".join(report)) files = xml_document.findall(".//file") for file_tree in files: for error in file_tree.findall('error'): line_number = error.get('line') error_str = u"{0}: {1}".format(error.get('severity'), error.get('message')) violation = Violation(int(line_number), error_str) filename = GitPathTool.relative_path(file_tree.get('name')) violations_dict[filename].append(violation) return violations_dict
def parse_reports(self, reports): """ Args: reports: list[str] - output from the report Return: A dict[Str:Violation] Violation is a simple named tuple Defined above """ violations_dict = defaultdict(list) for report in reports: xml_document = etree.fromstring("".join(report)) node_files = xml_document.findall(".//file") for node_file in node_files: for error in node_file.findall("violation"): line_number = error.get("beginline") error_str = "{}: {}".format(error.get("rule"), error.text.strip()) violation = Violation(int(line_number), error_str) filename = GitPathTool.relative_path(node_file.get("name")) filename = filename.replace(os.sep, "/") violations_dict[filename].append(violation) return violations_dict