def parse(plist_file): """ Parse a plist report file. Returns: - list of source files - list of reports (type Report) """ files, reports = [], [] try: files, reports = plist_parser.parse_plist_file(plist_file) except Exception as ex: traceback.print_stack() LOG.error('The generated plist is not valid!') LOG.error(ex) finally: return files, reports
def parse_convert_reports(input_dirs: List[str], out_format: str, severity_map: Dict, trim_path_prefixes: List[str]) \ -> Tuple[Union[Dict, List], int]: """Parse and convert the reports from the input dirs to the out_format. Retuns a dictionary which can be converted to the out_format type of json to be printed out or saved on the disk. """ assert (out_format in [fmt for fmt in EXPORT_TYPES if fmt != 'html']) input_files = set() for input_path in input_dirs: input_path = os.path.abspath(input_path) if os.path.isfile(input_path): input_files.add(input_path) elif os.path.isdir(input_path): _, _, file_names = next(os.walk(input_path), ([], [], [])) input_paths = [ os.path.join(input_path, file_name) for file_name in file_names ] input_files.update(input_paths) all_reports = [] for input_file in input_files: if not input_file.endswith('.plist'): continue _, reports = plist_parser.parse_plist_file(input_file) all_reports.extend(reports) if trim_path_prefixes: for report in all_reports: report.trim_path_prefixes(trim_path_prefixes) number_of_reports = len(all_reports) if out_format == "codeclimate": return (codeclimate.convert(all_reports, severity_map), number_of_reports) if out_format == "gerrit": return gerrit.convert(all_reports, severity_map), number_of_reports if out_format == "json": return [out_json.convert_to_parse(r) for r in all_reports], \ number_of_reports
def collect_file_hashes_from_plist(plist_file): """ Collects file content hashes and last modification times for the source files which can be found in the given plist file. :returns List of file paths which are in the processed plist file but missing from the user's disk and the source file modification times for the still available source files. """ source_file_mod_times = {} missing_files = [] sc_handler = SourceCodeCommentHandler() try: files, reports = plist_parser.parse_plist_file(plist_file) for f in files: if not os.path.isfile(f): missing_files.append(f) missing_source_files.add(f) continue content_hash = get_file_content_hash(f) hash_to_file[content_hash] = f file_to_hash[f] = content_hash source_file_mod_times[f] = util.get_last_mod_time(f) # Get file hashes which contain source code comments. for report in reports: last_report_event = report.bug_path[-1] file_path = files[last_report_event['location']['file']] if not os.path.isfile(file_path): continue file_hash = file_to_hash[file_path] if file_hash in file_hash_with_review_status: continue report_line = last_report_event['location']['line'] if sc_handler.has_source_line_comments(file_path, report_line): file_hash_with_review_status.add(file_hash) return missing_files, source_file_mod_times except Exception as ex: LOG.error('Parsing the plist failed: %s', str(ex))
def parse_convert_reports(input_dirs: List[str], out_format: str, severity_map: Dict, trim_path_prefixes: List[str]) -> Dict: """Parse and convert the reports from the input dirs to the out_format. Retuns a dictionary which can be converted to the out_format type of json to be printed out or saved on the disk. """ input_files = set() for input_path in input_dirs: input_path = os.path.abspath(input_path) if os.path.isfile(input_path): input_files.add(input_path) elif os.path.isdir(input_path): _, _, file_names = next(os.walk(input_path), ([], [], [])) input_paths = [ os.path.join(input_path, file_name) for file_name in file_names ] input_files.update(input_paths) all_reports = [] for input_file in input_files: if not input_file.endswith('.plist'): continue _, reports = plist_parser.parse_plist_file(input_file) all_reports.extend(reports) if trim_path_prefixes: for report in all_reports: report.trim_path_prefixes(trim_path_prefixes) if out_format == "codeclimate": return codeclimate.convert(all_reports) if out_format == "gerrit": return gerrit.convert(all_reports, severity_map) if out_format == "json": return [out_json.convert_to_parse(r) for r in all_reports] LOG.error("Unknown export format: %s", out_format) return {}
def parse_convert_reports(input_dirs: List[str], out_format: str, trim_path_prefixes: List[str]) -> Dict: """Parse and convert the reports from the input dirs to the out_format. Retuns a dictionary which can be converted to the out_format type of json to be printed out or saved on the disk. """ res = [] input_files = set() for input_path in input_dirs: input_path = os.path.abspath(input_path) if os.path.isfile(input_path): input_files.add(input_path) elif os.path.isdir(input_path): _, _, file_names = next(os.walk(input_path), ([], [], [])) input_paths = [ os.path.join(input_path, file_name) for file_name in file_names ] input_files.update(input_paths) for input_file in input_files: if not input_file.endswith('.plist'): continue _, reports = plist_parser.parse_plist_file(input_file) if out_format == "codeclimate": cc_reports = codeclimate.convert(reports, trim_path_prefixes) res.extend(cc_reports) for report in reports: if trim_path_prefixes: report.trim_path_prefixes(trim_path_prefixes) if out_format == "json": out = out_json.convert_to_parse(report) res.append(out) return res
def convert_reports(input_dirs, out_format, trim_path_prefixes): """ Converts reports found in the input directories to the given format. """ res = [] input_files = set() for input_path in input_dirs: input_path = os.path.abspath(input_path) if os.path.isfile(input_path): input_files.add(input_path) elif os.path.isdir(input_path): _, _, file_names = next(os.walk(input_path), ([], [], [])) input_paths = [ os.path.join(input_path, file_name) for file_name in file_names ] input_files.update(input_paths) for input_file in input_files: if not input_file.endswith('.plist'): continue _, reports = plist_parser.parse_plist_file(input_file) for report in reports: if trim_path_prefixes: report.trim_path_prefixes(trim_path_prefixes) if out_format == "json": out = report.to_json() elif out_format == "codeclimate": out = report.to_codeclimate() else: LOG.error("Unsupported output format: %s", out_format) sys.exit(1) res.append(out) return json.dumps(res)
def convert_reports_to_json(input_dirs): """ Converts reports found in the input directories to json. """ res = [] input_files = set() for input_path in input_dirs: input_path = os.path.abspath(input_path) if os.path.isfile(input_path): input_files.add(input_path) elif os.path.isdir(input_path): _, _, file_names = next(os.walk(input_path), ([], [], [])) input_paths = [os.path.join(input_path, file_name) for file_name in file_names] input_files.update(input_paths) for input_file in input_files: if not input_file.endswith('.plist'): continue _, reports = plist_parser.parse_plist_file(input_file) for report in reports: res.append(report.to_json()) return json.dumps(res)
def test_empty_file(self): """Plist file is empty.""" empty_plist = os.path.join(self.__plist_test_files, 'empty_file') files, reports = plist_parser.parse_plist_file(empty_plist, False) self.assertEqual(files, {}) self.assertEqual(reports, [])
def test_collect_path_events(self): """ Test path event collect before store. """ clang50_trunk_plist = os.path.join( self.__plist_test_files, 'clang-5.0-trunk.plist') files, reports = plist_parser.parse_plist_file(clang50_trunk_plist, None, False) self.assertEqual(len(reports), 3) # Generate dummy file_ids which should come from the database. file_ids = {} for i, file_name in enumerate(files, 1): file_ids[file_name] = i msg = "This test is prepared to handle 3 reports." self.assertEqual(len(reports), 3, msg) report1_path = [ ttypes.BugPathPos(startLine=19, filePath=None, endCol=7, startCol=5, endLine=19, fileId=1), ttypes.BugPathPos(startLine=20, filePath=None, endCol=7, startCol=5, endLine=20, fileId=1), ttypes.BugPathPos(startLine=21, filePath=None, endCol=13, startCol=5, endLine=21, fileId=1), ttypes.BugPathPos(startLine=7, filePath=None, endCol=7, startCol=5, endLine=7, fileId=1), ttypes.BugPathPos(startLine=8, filePath=None, endCol=6, startCol=5, endLine=8, fileId=1), ttypes.BugPathPos(startLine=8, filePath=None, endCol=25, startCol=22, endLine=8, fileId=1), ttypes.BugPathPos(startLine=8, filePath=None, endCol=20, startCol=10, endLine=8, fileId=1), ttypes.BugPathPos(startLine=7, filePath=None, endCol=14, startCol=14, endLine=7, fileId=2) ] report1_events = [ ttypes.BugPathEvent(startLine=20, filePath=None, endCol=12, startCol=5, msg="'base' initialized to 0", endLine=20, fileId=1), ttypes.BugPathEvent(startLine=21, filePath=None, endCol=18, startCol=15, msg="Passing the value 0 via " "1st parameter 'base'", endLine=21, fileId=1), ttypes.BugPathEvent(startLine=21, filePath=None, endCol=19, startCol=5, msg="Calling 'test_func'", endLine=21, fileId=1), ttypes.BugPathEvent(startLine=6, filePath=None, endCol=1, startCol=1, msg="Entered call from 'main'", endLine=6, fileId=1), ttypes.BugPathEvent(startLine=8, filePath=None, endCol=25, startCol=22, msg="Passing the value 0 via " "1st parameter 'num'", endLine=8, fileId=1), ttypes.BugPathEvent(startLine=8, filePath=None, endCol=26, startCol=10, msg="Calling 'generate_id'", endLine=8, fileId=1), ttypes.BugPathEvent(startLine=6, filePath=None, endCol=1, startCol=1, msg="Entered call from 'test_func'", endLine=6, fileId=2), ttypes.BugPathEvent(startLine=7, filePath=None, endCol=17, startCol=12, msg='Division by zero', endLine=7, fileId=2) ] path1, events1, _ = store_handler.collect_paths_events(reports[0], file_ids, files) self.assertEqual(path1, report1_path) self.assertEqual(events1, report1_events) report2_path = [] report2_events = [ ttypes.BugPathEvent(startLine=8, filePath=None, endCol=26, startCol=10, msg="Value stored to 'id' is ""never read", endLine=8, fileId=1) ] path2, events2, _ = store_handler.collect_paths_events(reports[1], file_ids, files) self.assertEqual(path2, report2_path) self.assertEqual(events2, report2_events) report3_path = [ ttypes.BugPathPos(startLine=14, filePath=None, endCol=6, startCol=3, endLine=14, fileId=1), ttypes.BugPathPos(startLine=15, filePath=None, endCol=3, startCol=3, endLine=15, fileId=1), ttypes.BugPathPos(startLine=16, filePath=None, endCol=1, startCol=1, endLine=16, fileId=1) ] report3_events = [ ttypes.BugPathEvent(startLine=14, filePath=None, endCol=29, startCol=3, msg="Address of stack memory associated" " with local variable 'str'" " is still referred to by the global " "variable 'p' upon returning to the " "caller. This will be a dangling " "reference", endLine=14, fileId=1) ] path, events, _ = store_handler.collect_paths_events(reports[2], file_ids, files) self.assertEqual(path, report3_path) self.assertEqual(events, report3_events)
def __process_report_file(self, report_file_path: str, session: DBSession, source_root: str, run_id: int, file_path_to_id: Dict[str, int], run_history_time: datetime, skip_handler: skiplist_handler.SkipListHandler, hash_map_reports: Dict[str, List[Any]]) -> bool: """ Process and save reports from the given report file to the database. """ try: files, reports = plist_parser.parse_plist_file(report_file_path) except Exception as ex: LOG.warning('Parsing the plist failed: %s', str(ex)) return False if not reports: return True trimmed_files = {} file_ids = {} missing_ids_for_files = [] for k, v in files.items(): trimmed_files[k] = \ util.trim_path_prefixes(v, self.__trim_path_prefixes) for file_name in trimmed_files.values(): file_id = file_path_to_id.get(file_name, -1) if file_id == -1: missing_ids_for_files.append(file_name) continue file_ids[file_name] = file_id if missing_ids_for_files: LOG.warning("Failed to get file path id for '%s'!", ' '.join(missing_ids_for_files)) return False def set_review_status(report: ReportType): """ Set review status for the given report if there is any source code comment. """ checker_name = report.main['check_name'] last_report_event = report.bug_path[-1] # The original file path is needed here not the trimmed # because the source files are extracted as the original # file path. file_name = files[last_report_event['location']['file']] source_file_name = os.path.realpath( os.path.join(source_root, file_name.strip("/"))) # Check and store source code comments. if not os.path.isfile(source_file_name): return report_line = last_report_event['location']['line'] source_file = os.path.basename(file_name) src_comment_data = parse_codechecker_review_comment( source_file_name, report_line, checker_name) if len(src_comment_data) == 1: status = src_comment_data[0]['status'] rw_status = ttypes.ReviewStatus.FALSE_POSITIVE if status == 'confirmed': rw_status = ttypes.ReviewStatus.CONFIRMED elif status == 'intentional': rw_status = ttypes.ReviewStatus.INTENTIONAL self.__report_server._setReviewStatus( session, report.report_hash, rw_status, src_comment_data[0]['message'], run_history_time) elif len(src_comment_data) > 1: LOG.warning( "Multiple source code comment can be found " "for '%s' checker in '%s' at line %s. " "This bug will not be suppressed!", checker_name, source_file, report_line) self.__wrong_src_code_comments.append( f"{source_file}|{report_line}|{checker_name}") root_dir_path = os.path.dirname(report_file_path) mip = self.__mips[root_dir_path] analysis_info = self.__analysis_info.get(root_dir_path) for report in reports: self.__all_report_checkers.add(report.check_name) if skip_handler.should_skip(report.file_path): continue report.trim_path_prefixes(self.__trim_path_prefixes) report_path_hash = get_report_path_hash(report) if report_path_hash in self.__already_added_report_hashes: LOG.debug('Not storing report. Already added: %s', report) continue LOG.debug("Storing report to the database...") bug_id = report.report_hash detection_status = 'new' detected_at = run_history_time if bug_id in hash_map_reports: old_report = hash_map_reports[bug_id][0] old_status = old_report.detection_status detection_status = 'reopened' \ if old_status == 'resolved' else 'unresolved' detected_at = old_report.detected_at analyzer_name = get_analyzer_name(report.check_name, mip.checker_to_analyzer, report.metadata) path_events = collect_paths_events(report, file_ids, trimmed_files) report_id = self.__add_report(session, run_id, file_ids[report.file_path], report.main, path_events, detection_status, detected_at, analysis_info, analyzer_name) self.__new_report_hashes.add(bug_id) self.__already_added_report_hashes.add(report_path_hash) set_review_status(report) LOG.debug("Storing report done. ID=%d", report_id) return True