def test_prefix_in_filename(self): test_path = '/a/b/common.txt' self.assertEqual(test_path, trim_path_prefixes(test_path, ['/a/b/c'])) self.assertEqual(test_path, trim_path_prefixes(test_path, ['/a/b/common'])) self.assertEqual('common.txt', trim_path_prefixes(test_path, ['/a/b/']))
def trim_path_prefixes(self, path_prefixes=None): """ Removes the longest matching leading path from the file paths. """ self.__files = { i: util.trim_path_prefixes(file_path, path_prefixes) for i, file_path in self.__files.items() } self.__main['location']['file'] = \ util.trim_path_prefixes(self.__main['location']['file'], path_prefixes)
def test_trim_path_prefix_store(self): """Trim the path prefix from the sored reports. The source file paths are converted to absolute with the temporary test directory, the test trims that temporary test directory from the source file path during the storage. """ report_file = os.path.join(self._temp_workspace, "divide_zero.plist") report_content = {} with open(report_file, mode="rb") as rf: report_content = plistlib.load(rf) trimmed_paths = [ util.trim_path_prefixes(path, [self._temp_workspace]) for path in report_content["files"] ] run_name = "store_test" store_cmd = [ env.codechecker_cmd(), "store", self._temp_workspace, "--name", run_name, "--url", env.parts_to_url(self._codechecker_cfg), "--trim-path-prefix", self._temp_workspace, "--verbose", "debug", ] ret, _, _ = _call_cmd(store_cmd) self.assertEqual(ret, 0, "Plist file could not store.") query_cmd = [ env.codechecker_cmd(), "cmd", "results", run_name, # Use the 'Default' product. "--url", env.parts_to_url(self._codechecker_cfg), "-o", "json", ] ret, out, _ = _call_cmd(query_cmd) self.assertEqual(ret, 0, "Could not read from server.") reports = json.loads(out) print(json.dumps(reports, indent=2)) self.assertEqual(len(reports), 4) for report in reports: self.assertIn(report["checkedFile"], trimmed_paths)
def __store_source_files( self, source_root: str, filename_to_hash: Dict[str, str] ) -> Dict[str, int]: """ Storing file contents from plist. """ file_path_to_id = {} for file_name, file_hash in filename_to_hash.items(): source_file_name = os.path.join(source_root, file_name.strip("/")) source_file_name = os.path.realpath(source_file_name) LOG.debug("Storing source file: %s", source_file_name) trimmed_file_path = util.trim_path_prefixes( file_name, self.__trim_path_prefixes) if not os.path.isfile(source_file_name): # The file was not in the ZIP file, because we already # have the content. Let's check if we already have a file # record in the database or we need to add one. LOG.debug('%s not found or already stored.', trimmed_file_path) with DBSession(self.__Session) as session: fid = add_file_record( session, trimmed_file_path, file_hash) if not fid: LOG.error("File ID for %s is not found in the DB with " "content hash %s. Missing from ZIP?", source_file_name, file_hash) file_path_to_id[trimmed_file_path] = fid LOG.debug("%d fileid found", fid) continue with DBSession(self.__Session) as session: self.__add_file_content(session, source_file_name, file_hash) file_path_to_id[trimmed_file_path] = add_file_record( session, trimmed_file_path, file_hash) return file_path_to_id
def write(self, file_report_map, output=sys.stdout): """ Format an already parsed plist report file to a more human readable format. The formatted text is written to the output. During writing the output statistics are collected. Write out the bugs to the output and collect report statistics. """ severity_stats = defaultdict(int) file_stats = defaultdict(int) report_count = defaultdict(int) for file_path in sorted(file_report_map, key=lambda key: len(file_report_map[key])): non_suppressed = 0 sorted_reports = sorted(file_report_map[file_path], key=lambda r: r.main['location']['line']) for report in sorted_reports: path_hash = get_report_path_hash(report, report.files) if path_hash in self._processed_path_hashes: LOG.debug("Not showing report because it is a " "deduplication of an already processed report!") LOG.debug("Path hash: %s", path_hash) LOG.debug(report) continue self._processed_path_hashes.add(path_hash) events = [i for i in report.bug_path if i.get('kind') == 'event'] f_path = report.files[events[-1]['location']['file']] if self.skiplist_handler and \ self.skiplist_handler.should_skip(f_path): LOG.debug("Skipped report in '%s'", f_path) LOG.debug(report) continue last_report_event = report.bug_path[-1] source_file = \ report.files[last_report_event['location']['file']] trimmed_source_file = \ util.trim_path_prefixes(source_file, self._trim_path_prefixes) report_line = last_report_event['location']['line'] report_hash = \ report.main['issue_hash_content_of_line_in_context'] checker_name = report.main['check_name'] if skip_report(report_hash, source_file, report_line, checker_name, self.src_comment_handler): continue file_stats[f_path] += 1 severity = self.__severity_map.get(checker_name) severity_stats[severity] += 1 report_count["report_count"] += 1 output.write(self.__format_bug_event(checker_name, severity, last_report_event, trimmed_source_file)) output.write('\n') output.write(self.__format_location(last_report_event, source_file)) output.write('\n') if self.print_steps: output.write(' Report hash: ' + report_hash + '\n') # Print out macros. macros = report.macro_expansions if macros: output.write(' Macro expansions:\n') index_format = ' %%%dd, ' % \ int(math.floor( math.log10(len(macros))) + 1) for index, macro in enumerate(macros): output.write(index_format % (index + 1)) source = report.files[ macro['location']['file']] output.write(self.__format_macro_expansion(macro, source)) output.write('\n') # Print out notes. notes = report.notes if notes: output.write(' Notes:\n') index_format = ' %%%dd, ' % \ int(math.floor( math.log10(len(notes))) + 1) for index, note in enumerate(notes): output.write(index_format % (index + 1)) source_file = report.files[ note['location']['file']] output.write(self.__format_bug_note(note, source_file)) output.write('\n') output.write(' Steps:\n') index_format = ' %%%dd, ' % \ int(math.floor(math.log10(len(events))) + 1) for index, event in enumerate(events): output.write(index_format % (index + 1)) source_file = report.files[event['location']['file']] output.write( self.__format_bug_event(None, None, event, trimmed_source_file)) output.write('\n') output.write('\n') non_suppressed += 1 base_file = os.path.basename(file_path) if non_suppressed == 0: output.write('Found no defects in %s\n' % base_file) else: output.write('Found %d defect(s) in %s\n\n' % (non_suppressed, base_file)) return {"severity": severity_stats, "files": file_stats, "reports": report_count}
def test_prefix_in_dir_name(self): test_path = '/a/b/common/foo.txt' self.assertEqual(test_path, trim_path_prefixes(test_path, ['/a/b/c'])) self.assertEqual('foo.txt', trim_path_prefixes(test_path, ['/a/b/common']))
def trim_path_prefixes(self, path_prefixes=None): """ Removes the longest matching leading path from the file paths. """ self.__files = [util.trim_path_prefixes(file_path, path_prefixes) for file_path in self.__files]
def __process_report_file(self, report_file_path: str, session: DBSession, source_root: str, run_id: int, file_path_to_id: Dict[str, int], run_history_time: datetime, skip_handler: skiplist_handler.SkipListHandler, hash_map_reports: Dict[str, List[Any]]) -> bool: """ Process and save reports from the given report file to the database. """ try: files, reports = plist_parser.parse_plist_file(report_file_path) except Exception as ex: LOG.warning('Parsing the plist failed: %s', str(ex)) return False if not reports: return True trimmed_files = {} file_ids = {} missing_ids_for_files = [] for k, v in files.items(): trimmed_files[k] = \ util.trim_path_prefixes(v, self.__trim_path_prefixes) for file_name in trimmed_files.values(): file_id = file_path_to_id.get(file_name, -1) if file_id == -1: missing_ids_for_files.append(file_name) continue file_ids[file_name] = file_id if missing_ids_for_files: LOG.warning("Failed to get file path id for '%s'!", ' '.join(missing_ids_for_files)) return False def set_review_status(report: ReportType): """ Set review status for the given report if there is any source code comment. """ checker_name = report.main['check_name'] last_report_event = report.bug_path[-1] # The original file path is needed here not the trimmed # because the source files are extracted as the original # file path. file_name = files[last_report_event['location']['file']] source_file_name = os.path.realpath( os.path.join(source_root, file_name.strip("/"))) # Check and store source code comments. if not os.path.isfile(source_file_name): return report_line = last_report_event['location']['line'] source_file = os.path.basename(file_name) src_comment_data = parse_codechecker_review_comment( source_file_name, report_line, checker_name) if len(src_comment_data) == 1: status = src_comment_data[0]['status'] rw_status = ttypes.ReviewStatus.FALSE_POSITIVE if status == 'confirmed': rw_status = ttypes.ReviewStatus.CONFIRMED elif status == 'intentional': rw_status = ttypes.ReviewStatus.INTENTIONAL self.__report_server._setReviewStatus( session, report.report_hash, rw_status, src_comment_data[0]['message'], run_history_time) elif len(src_comment_data) > 1: LOG.warning( "Multiple source code comment can be found " "for '%s' checker in '%s' at line %s. " "This bug will not be suppressed!", checker_name, source_file, report_line) self.__wrong_src_code_comments.append( f"{source_file}|{report_line}|{checker_name}") root_dir_path = os.path.dirname(report_file_path) mip = self.__mips[root_dir_path] analysis_info = self.__analysis_info.get(root_dir_path) for report in reports: self.__all_report_checkers.add(report.check_name) if skip_handler.should_skip(report.file_path): continue report.trim_path_prefixes(self.__trim_path_prefixes) report_path_hash = get_report_path_hash(report) if report_path_hash in self.__already_added_report_hashes: LOG.debug('Not storing report. Already added: %s', report) continue LOG.debug("Storing report to the database...") bug_id = report.report_hash detection_status = 'new' detected_at = run_history_time if bug_id in hash_map_reports: old_report = hash_map_reports[bug_id][0] old_status = old_report.detection_status detection_status = 'reopened' \ if old_status == 'resolved' else 'unresolved' detected_at = old_report.detected_at analyzer_name = get_analyzer_name(report.check_name, mip.checker_to_analyzer, report.metadata) path_events = collect_paths_events(report, file_ids, trimmed_files) report_id = self.__add_report(session, run_id, file_ids[report.file_path], report.main, path_events, detection_status, detected_at, analysis_info, analyzer_name) self.__new_report_hashes.add(bug_id) self.__already_added_report_hashes.add(report_path_hash) set_review_status(report) LOG.debug("Storing report done. ID=%d", report_id) return True
def test_file_path(self): test_path = '/a/b/c/foo.txt' self.assertEqual('foo.txt', trim_path_prefixes(test_path, ['/a/b/c']))
def test_longest_matches(self): test_path = '/a/b/c' self.assertEqual('b/c', trim_path_prefixes(test_path, ['/a'])) self.assertEqual('c', trim_path_prefixes(test_path, ['/a', '/a/b']))
def test_only_root_matches(self): test_path = '/a/b/c' self.assertEqual(test_path, trim_path_prefixes(test_path, ['/'])) self.assertEqual(test_path, trim_path_prefixes(test_path, ['/x']))
def test_no_prefix(self): test_path = '/a/b/c' self.assertEqual(test_path, trim_path_prefixes(test_path, None)) self.assertEqual(test_path, trim_path_prefixes(test_path, [])) self.assertEqual(test_path, trim_path_prefixes(test_path, ['x']))
def test_trim_path_prefix_store(self): """Trim the path prefix from the sored reports. The source file paths are converted to absolute with the temporary test directory, the test trims that temporary test directory from the source file path during the storage. """ report_file = os.path.join(self.test_proj_dir, "divide_zero.plist") report_content = {} with open(report_file, mode="rb") as rf: report_content = plistlib.load(rf) trimmed_paths = [ util.trim_path_prefixes(path, [self.test_proj_dir]) for path in report_content["files"] ] run_name = "store_test" store_cmd = [ env.codechecker_cmd(), "store", self.test_proj_dir, "--name", run_name, "--url", env.parts_to_url(self.codechecker_cfg), "--trim-path-prefix", self.test_proj_dir, "--verbose", "debug", ] try: out = subprocess.check_output(store_cmd, encoding="utf-8", errors="ignore") print(out) except subprocess.CalledProcessError as cerr: print(cerr.stdout) print(cerr.stderr) raise query_cmd = [ env.codechecker_cmd(), "cmd", "results", run_name, # Use the 'Default' product. "--url", env.parts_to_url(self.codechecker_cfg), "-o", "json", ] out = subprocess.check_output(query_cmd, encoding="utf-8", errors="ignore") reports = json.loads(out) print(json.dumps(reports, indent=2)) self.assertEqual(len(reports), 4) for report in reports: self.assertIn(report["checkedFile"], trimmed_paths)
def trim_path_prefixes_handler(source_file): """ Callback to util.trim_path_prefixes to prevent module dependency of plist_to_html """ return util.trim_path_prefixes(source_file, trim_path_prefixes)
def write(self, file_report_map, output=sys.stdout): """ Format an already parsed plist report file to a more human readable format. The formatted text is written to the output. During writing the output statistics are collected. Write out the bugs to the output and collect report statistics. """ severity_stats = defaultdict(int) file_stats = defaultdict(int) report_count = defaultdict(int) for file_path in sorted(file_report_map, key=lambda key: len(file_report_map[key])): non_suppressed = 0 sorted_reports = sorted(file_report_map[file_path], key=lambda r: r.main['location']['line']) for report in sorted_reports: path_hash = get_report_path_hash(report, report.files) if path_hash in self._processed_path_hashes: LOG.debug("Not showing report because it is a " "deduplication of an already processed report!") LOG.debug("Path hash: %s", path_hash) LOG.debug(report) continue self._processed_path_hashes.add(path_hash) events = [i for i in report.bug_path if i.get('kind') == 'event'] f_path = report.files[events[-1]['location']['file']] if self.skiplist_handler and \ self.skiplist_handler.should_skip(f_path): LOG.debug("Skipped report in '%s'", f_path) LOG.debug(report) continue last_report_event = report.bug_path[-1] source_file = \ report.files[last_report_event['location']['file']] trimmed_source_file = \ util.trim_path_prefixes(source_file, self._trim_path_prefixes) report_line = last_report_event['location']['line'] report_hash = \ report.main['issue_hash_content_of_line_in_context'] checker_name = report.main['check_name'] if skip_report(report_hash, source_file, report_line, checker_name, self.src_comment_handler): continue file_stats[f_path] += 1 severity = self.__severity_map.get(checker_name) severity_stats[severity] += 1 report_count["report_count"] += 1 output.write(self.__format_bug_event(checker_name, severity, last_report_event, trimmed_source_file)) output.write('\n') output.write(self.__format_location(last_report_event, source_file)) output.write('\n') if self.print_steps: output.write(' Report hash: ' + report_hash + '\n') # Print out macros. macros = report.macro_expansions if macros: output.write(' Macro expansions:\n') index_format = ' %%%dd, ' % \ int(math.floor( math.log10(len(macros))) + 1) for index, macro in enumerate(macros): output.write(index_format % (index + 1)) source = report.files[ macro['location']['file']] output.write(self.__format_macro_expansion(macro, source)) output.write('\n') # Print out notes. notes = report.notes if notes: output.write(' Notes:\n') index_format = ' %%%dd, ' % \ int(math.floor( math.log10(len(notes))) + 1) for index, note in enumerate(notes): output.write(index_format % (index + 1)) source_file = report.files[ note['location']['file']] output.write(self.__format_bug_note(note, source_file)) output.write('\n') output.write(' Steps:\n') index_format = ' %%%dd, ' % \ int(math.floor(math.log10(len(events))) + 1) for index, event in enumerate(events): output.write(index_format % (index + 1)) source_file = report.files[event['location']['file']] trimmed_source_file = \ util.trim_path_prefixes(source_file, self._trim_path_prefixes) output.write( self.__format_bug_event(None, None, event, trimmed_source_file)) output.write('\n') output.write('\n') non_suppressed += 1 base_file = os.path.basename(file_path) if non_suppressed == 0: output.write('Found no defects in %s\n' % base_file) else: output.write('Found %d defect(s) in %s\n\n' % (non_suppressed, base_file)) return {"severity": severity_stats, "files": file_stats, "reports": report_count}
def test_tim_path_prefix_store(self): """Trim the path prefix from the sored reports. The source file paths are converted to absolute with the temporary test directory, the test trims that temporary test directory from the source file path during the storage. """ test_dir = os.path.dirname(os.path.realpath(__file__)) report_dir = os.path.join(test_dir, "test_proj") codechecker_cfg = self._test_cfg["codechecker_cfg"] # Copy report files to a temporary directory not to modify the # files in the repository. # Report files will be overwritten during the tests. temp_workspace = os.path.join(codechecker_cfg["workspace"], "test_proj") shutil.copytree(report_dir, temp_workspace) report_file = os.path.join(temp_workspace, "divide_zero.plist") # Convert file paths to absolute in the report. plist_test.prefix_file_path(report_file, temp_workspace) report_content = {} with open(report_file, mode="rb") as rf: report_content = plistlib.load(rf) trimmed_paths = [ util.trim_path_prefixes(path, [temp_workspace]) for path in report_content["files"] ] run_name = "store_test" store_cmd = [ env.codechecker_cmd(), "store", temp_workspace, "--name", run_name, "--url", env.parts_to_url(codechecker_cfg), "--trim-path-prefix", temp_workspace, "--verbose", "debug", ] try: out = subprocess.check_output(store_cmd, encoding="utf-8", errors="ignore") print(out) except subprocess.CalledProcessError as cerr: print(cerr.stdout) print(cerr.stderr) raise query_cmd = [ env.codechecker_cmd(), "cmd", "results", run_name, # Use the 'Default' product. "--url", env.parts_to_url(codechecker_cfg), "-o", "json", ] out = subprocess.check_output(query_cmd, encoding="utf-8", errors="ignore") reports = json.loads(out) print(json.dumps(reports, indent=2)) self.assertEqual(len(reports), 4) for report in reports: self.assertIn(report["checkedFile"], trimmed_paths)