def parse_with_plt_formatter(plist_file: str, metadata: Dict, plist_pltf: PlistToPlaintextFormatter, file_report_map: Dict[str, List[Report]]) -> Set: """Parse a plist with plaintext formatter and collect changed source files. Returns the report statistics collected by the result handler. """ if not plist_file.endswith(".plist"): LOG.debug("Skipping input file '%s' as it is not a plist.", plist_file) return set() LOG.debug("Parsing input file '%s'", plist_file) result_source_files = {} if 'result_source_files' in metadata: result_source_files = metadata['result_source_files'] else: for tool in metadata.get('tools', {}): result_src_files = tool.get('result_source_files', {}) result_source_files.update(result_src_files.items()) if plist_file in result_source_files: analyzed_source_file = \ result_source_files[plist_file] if analyzed_source_file not in file_report_map: file_report_map[analyzed_source_file] = [] files, reports = plist_pltf.parse(plist_file) plist_mtime = util.get_last_mod_time(plist_file) changed_files = set() for _, source_file in files.items(): if plist_mtime is None: # Failed to get the modification time for # a file mark it as changed. changed_files.add(source_file) LOG.warning('%s is missing since the last analysis.', source_file) continue file_mtime = util.get_last_mod_time(source_file) if not file_mtime: changed_files.add(source_file) LOG.warning('%s does not exist.', source_file) continue if file_mtime > plist_mtime: changed_files.add(source_file) LOG.warning('%s did change since the last analysis.', source_file) if not changed_files: for report in reports: file_path = report.file_path if file_path not in file_report_map: file_report_map[file_path] = [] file_report_map[file_path].append(report) return changed_files
def parse(plist_file, metadata_dict, rh, file_report_map): """ Prints the results in the given file to the standard output in a human- readable format. Returns the report statistics collected by the result handler. """ if not plist_file.endswith(".plist"): LOG.debug("Skipping input file '%s' as it is not a plist.", plist_file) return set() LOG.debug("Parsing input file '%s'", plist_file) result_source_files = {} if 'result_source_files' in metadata_dict: result_source_files = metadata_dict['result_source_files'] else: for tool in metadata_dict.get('tools', {}): result_src_files = tool.get('result_source_files', {}) result_source_files.update(result_src_files.items()) if plist_file in result_source_files: analyzed_source_file = \ result_source_files[plist_file] if analyzed_source_file not in file_report_map: file_report_map[analyzed_source_file] = [] files, reports = rh.parse(plist_file) plist_mtime = util.get_last_mod_time(plist_file) changed_files = set() for source_file in files: if plist_mtime is None: # Failed to get the modification time for # a file mark it as changed. changed_files.add(source_file) LOG.warning('%s is missing since the last analysis.', source_file) continue file_mtime = util.get_last_mod_time(source_file) if file_mtime > plist_mtime: changed_files.add(source_file) LOG.warning('%s did change since the last analysis.', source_file) if not changed_files: for report in reports: file_path = report.file_path if file_path not in file_report_map: file_report_map[file_path] = [] file_report_map[file_path].append(report) return changed_files
def parse(plist_file, metadata_dict, rh, file_report_map): """ Prints the results in the given file to the standard output in a human- readable format. Returns the report statistics collected by the result handler. """ if not plist_file.endswith(".plist"): LOG.debug("Skipping input file '%s' as it is not a plist.", plist_file) return set() LOG.debug("Parsing input file '%s'", plist_file) if 'result_source_files' in metadata_dict and \ plist_file in metadata_dict['result_source_files']: analyzed_source_file = \ metadata_dict['result_source_files'][plist_file] if analyzed_source_file not in file_report_map: file_report_map[analyzed_source_file] = [] files, reports = rh.parse(plist_file) plist_mtime = util.get_last_mod_time(plist_file) changed_files = set() for source_file in files: if plist_mtime is None: # Failed to get the modification time for # a file mark it as changed. changed_files.add(source_file) LOG.warning('%s is missing since the last analysis.', source_file) continue file_mtime = util.get_last_mod_time(source_file) if file_mtime > plist_mtime: changed_files.add(source_file) LOG.warning('%s did change since the last analysis.', source_file) if not changed_files: for report in reports: file_path = report.file_path if file_path not in file_report_map: file_report_map[file_path] = [] file_report_map[file_path].append(report) return changed_files
def collect_file_hashes_from_plist(plist_file): """ Collects file content hashes and last modification times of files which can be found in the given plist file. :returns List of file paths which are in the processed plist file but missing from the user's disk. """ missing_files = [] try: files, _ = plist_parser.parse_plist(plist_file) for f in files: if not os.path.isfile(f): missing_files.append(f) missing_source_files.add(f) continue content_hash = util.get_file_content_hash(f) hash_to_file[content_hash] = f file_to_hash[f] = content_hash file_to_mtime[f] = util.get_last_mod_time(f) return missing_files except Exception as ex: LOG.error('Parsing the plist failed: %s', str(ex))
def collect_file_hashes_from_plist(plist_file): """ Collects file content hashes and last modification times for the source files which can be found in the given plist file. :returns List of file paths which are in the processed plist file but missing from the user's disk and the source file modification times for the still available source files. """ source_file_mod_times = {} missing_files = [] sc_handler = SourceCodeCommentHandler() try: files, reports = plist_parser.parse_plist_file(plist_file) if not reports: return missing_files, source_file_mod_times # CppCheck generates a '0' value for the bug hash. # In case all of the reports in a plist file contain only # a hash with '0' value oeverwrite the hash values in the # plist report files with a context free hash value. rep_hash = [rep.report_hash == '0' for rep in reports] if all(rep_hash): replace_report_hash(plist_file, HashType.CONTEXT_FREE) for f in files: if not os.path.isfile(f): missing_files.append(f) missing_source_files.add(f) continue content_hash = get_file_content_hash(f) hash_to_file[content_hash] = f file_to_hash[f] = content_hash source_file_mod_times[f] = util.get_last_mod_time(f) # Get file hashes which contain source code comments. for report in reports: last_report_event = report.bug_path[-1] file_path = files[last_report_event['location']['file']] if not os.path.isfile(file_path): continue file_hash = file_to_hash[file_path] if file_hash in file_hash_with_review_status: continue report_line = last_report_event['location']['line'] if sc_handler.has_source_line_comments(file_path, report_line): file_hash_with_review_status.add(file_hash) return missing_files, source_file_mod_times except Exception as ex: import traceback traceback.print_stack() LOG.error('Parsing the plist failed: %s', str(ex))
def collect_file_info(files): """Collect file information about given list of files like: - last modification time - content hash If the file is missing the corresponding data will be empty. """ res = {} for sf in files: res[sf] = {} if os.path.isfile(sf): res[sf]["hash"] = get_file_content_hash(sf) res[sf]["mtime"] = util.get_last_mod_time(sf) return res
def collect_file_hashes_from_plist(plist_file): """ Collects file content hashes and last modification times for the source files which can be found in the given plist file. :returns List of file paths which are in the processed plist file but missing from the user's disk and the source file modification times for the still available source files. """ source_file_mod_times = {} missing_files = [] sc_handler = SourceCodeCommentHandler() try: files, reports = plist_parser.parse_plist_file(plist_file) for f in files: if not os.path.isfile(f): missing_files.append(f) missing_source_files.add(f) continue content_hash = get_file_content_hash(f) hash_to_file[content_hash] = f file_to_hash[f] = content_hash source_file_mod_times[f] = util.get_last_mod_time(f) # Get file hashes which contain source code comments. for report in reports: last_report_event = report.bug_path[-1] file_path = files[last_report_event['location']['file']] if not os.path.isfile(file_path): continue file_hash = file_to_hash[file_path] if file_hash in file_hash_with_review_status: continue report_line = last_report_event['location']['line'] if sc_handler.has_source_line_comments(file_path, report_line): file_hash_with_review_status.add(file_hash) return missing_files, source_file_mod_times except Exception as ex: LOG.error('Parsing the plist failed: %s', str(ex))
def collect_file_hashes_from_plist(plist_file): """ Collects file content hashes and last modification times of files which can be found in the given plist file. :returns List of file paths which are in the processed plist file but missing from the user's disk. """ missing_files = [] try: files, reports = plist_parser.parse_plist(plist_file) for f in files: if not os.path.isfile(f): missing_files.append(f) missing_source_files.add(f) continue content_hash = util.get_file_content_hash(f) hash_to_file[content_hash] = f file_to_hash[f] = content_hash file_to_mtime[f] = util.get_last_mod_time(f) # Get file hashes which contain source code comments. for report in reports: last_report_event = report.bug_path[-1] file_path = files[last_report_event['location']['file']] if not os.path.isfile(file_path): continue file_hash = file_to_hash[file_path] if file_hash in file_hash_with_review_status: continue report_line = last_report_event['location']['line'] sc_handler = SourceCodeCommentHandler(file_path) if sc_handler.has_source_line_comments(report_line): file_hash_with_review_status.add(file_hash) return missing_files except Exception as ex: LOG.error('Parsing the plist failed: %s', str(ex))
def parse_collect_plist_info(plist_file): """Parse one plist report file and collect information about the source files mentioned in the report file. """ source_files, reports = parse_report_file(plist_file) if len(source_files) == 0: # If there is no source in the plist we will not upload # it to the server. LOG.debug("Skip empty plist file: %s", plist_file) rli = ReportFileInfo(store_it=False, main_report_positions=[]) sfir = SourceFilesInReport(source_info={}, missing=set(), changed_since_report_gen=set()) return rli, sfir source_info = collect_file_info(source_files) missing_files = set() missing_files = check_missing_files(source_info) if missing_files: LOG.warning("Skipping '%s' because it refers " "the following missing source files: %s", plist_file, missing_files) for mf in missing_files: missing_files.add(mf) rli = ReportFileInfo(store_it=False, main_report_positions=[]) sfir = SourceFilesInReport(source_info=source_info, missing=missing_files, changed_since_report_gen=set()) return rli, sfir if overwrite_cppcheck_report_hash(reports, plist_file): # If overwrite was needed parse it back again to update the hashes. source_files, reports = parse_report_file(plist_file) main_report_positions = [] rdata = get_report_data(reports) # Replace the file index values to source file path. for rda in rdata: rda = rda._replace(filepath=source_files[rda.fileidx]) main_report_positions.append(rda) plist_mtime = util.get_last_mod_time(plist_file) changed_files = set() # Check if any source file corresponding to a plist # file changed since the plist file was generated. for k, v in source_info.items(): if bool(v): if v['mtime'] > plist_mtime: changed_files.add(k) rli = ReportFileInfo(store_it=True, main_report_positions=main_report_positions) sfir = SourceFilesInReport(source_info=source_info, missing=missing_files, changed_since_report_gen=changed_files) return rli, sfir
def assemble_zip(inputs, zip_file, client): hash_to_file = {} # There can be files with same hash, # but different path. file_to_hash = {} file_to_mtime = {} missing_source_files = set() file_hash_with_review_status = set() def collect_file_hashes_from_plist(plist_file): """ Collects file content hashes and last modification times of files which can be found in the given plist file. :returns List of file paths which are in the processed plist file but missing from the user's disk. """ missing_files = [] try: files, reports = plist_parser.parse_plist(plist_file) for f in files: if not os.path.isfile(f): missing_files.append(f) missing_source_files.add(f) continue content_hash = util.get_file_content_hash(f) hash_to_file[content_hash] = f file_to_hash[f] = content_hash file_to_mtime[f] = util.get_last_mod_time(f) # Get file hashes which contain source code comments. for report in reports: last_report_event = report.bug_path[-1] file_path = files[last_report_event['location']['file']] if not os.path.isfile(file_path): continue file_hash = file_to_hash[file_path] if file_hash in file_hash_with_review_status: continue report_line = last_report_event['location']['line'] sc_handler = SourceCodeCommentHandler(file_path) if sc_handler.has_source_line_comments(report_line): file_hash_with_review_status.add(file_hash) return missing_files except Exception as ex: LOG.error('Parsing the plist failed: %s', str(ex)) plist_report_files = [] changed_files = set() for input_path in inputs: input_path = os.path.abspath(input_path) if not os.path.exists(input_path): raise OSError(errno.ENOENT, "Input path does not exist", input_path) if os.path.isfile(input_path): files = [input_path] else: _, _, files = next(os.walk(input_path), ([], [], [])) for f in files: plist_file = os.path.join(input_path, f) if f.endswith(".plist"): missing_files = collect_file_hashes_from_plist(plist_file) if not missing_files: LOG.debug("Copying file '%s' to ZIP assembly dir...", plist_file) plist_report_files.append(os.path.join(input_path, f)) else: LOG.warning("Skipping '%s' because it refers " "the following missing source files: %s", plist_file, missing_files) elif f == 'metadata.json': plist_report_files.append(os.path.join(input_path, f)) elif f == 'skip_file': plist_report_files.append(os.path.join(input_path, f)) plist_mtime = util.get_last_mod_time(plist_file) for k, v in file_to_mtime.items(): if v > plist_mtime: changed_files.add(k) if changed_files: changed_files = '\n'.join([' - ' + f for f in changed_files]) LOG.warning("The following source file contents changed since the " "latest analysis:\n%s\nPlease analyze your project " "again to update the reports!", changed_files) sys.exit(1) with zipfile.ZipFile(zip_file, 'a', allowZip64=True) as zipf: for pl in plist_report_files: _, plist_filename = os.path.split(pl) zip_target = os.path.join('reports', plist_filename) zipf.write(pl, zip_target) if len(hash_to_file) == 0: LOG.warning("There is no report to store. After uploading these " "results the previous reports become resolved.") file_hashes = list(hash_to_file.keys()) necessary_hashes = client.getMissingContentHashes(file_hashes) \ if file_hashes else [] for f, h in file_to_hash.items(): if h in necessary_hashes or h in file_hash_with_review_status: LOG.debug("File contents for '%s' needed by the server", f) zipf.write(f, os.path.join('root', f.lstrip('/'))) zipf.writestr('content_hashes.json', json.dumps(file_to_hash)) # Compressing .zip file with open(zip_file, 'rb') as source: compressed = zlib.compress(source.read(), zlib.Z_BEST_COMPRESSION) with open(zip_file, 'wb') as target: target.write(compressed) LOG.debug("[ZIP] Mass store zip written at '%s'", zip_file) if missing_source_files: LOG.warning("Missing source files: \n%s", '\n'.join( map(lambda f_: " - " + f_, missing_source_files)))
def assemble_zip(inputs, zip_file, client): hash_to_file = {} # There can be files with same hash, # but different path. file_to_hash = {} missing_source_files = set() file_hash_with_review_status = set() def collect_file_hashes_from_plist(plist_file): """ Collects file content hashes and last modification times for the source files which can be found in the given plist file. :returns List of file paths which are in the processed plist file but missing from the user's disk and the source file modification times for the still available source files. """ source_file_mod_times = {} missing_files = [] sc_handler = SourceCodeCommentHandler() try: files, reports = plist_parser.parse_plist_file(plist_file) if not reports: return missing_files, source_file_mod_times # CppCheck generates a '0' value for the bug hash. # In case all of the reports in a plist file contain only # a hash with '0' value oeverwrite the hash values in the # plist report files with a context free hash value. rep_hash = [rep.report_hash == '0' for rep in reports] if all(rep_hash): replace_report_hash(plist_file, HashType.CONTEXT_FREE) for f in files: if not os.path.isfile(f): missing_files.append(f) missing_source_files.add(f) continue content_hash = get_file_content_hash(f) hash_to_file[content_hash] = f file_to_hash[f] = content_hash source_file_mod_times[f] = util.get_last_mod_time(f) # Get file hashes which contain source code comments. for report in reports: last_report_event = report.bug_path[-1] file_path = files[last_report_event['location']['file']] if not os.path.isfile(file_path): continue file_hash = file_to_hash[file_path] if file_hash in file_hash_with_review_status: continue report_line = last_report_event['location']['line'] if sc_handler.has_source_line_comments(file_path, report_line): file_hash_with_review_status.add(file_hash) return missing_files, source_file_mod_times except Exception as ex: import traceback traceback.print_stack() LOG.error('Parsing the plist failed: %s', str(ex)) files_to_compress = set() metadata_json_to_compress = set() changed_files = set() for input_path in inputs: input_path = os.path.abspath(input_path) if not os.path.exists(input_path): raise OSError(errno.ENOENT, "Input path does not exist", input_path) if os.path.isfile(input_path): files = [input_path] else: _, _, files = next(os.walk(input_path), ([], [], [])) for f in files: plist_file = os.path.join(input_path, f) if f.endswith(".plist"): missing_files, source_file_mod_times = \ collect_file_hashes_from_plist(plist_file) if missing_files: LOG.warning( "Skipping '%s' because it refers " "the following missing source files: %s", plist_file, missing_files) elif not source_file_mod_times: # If there is no source in the plist we will not upload # it to the server. LOG.debug("Skip empty plist file: %s", plist_file) else: LOG.debug("Copying file '%s' to ZIP assembly dir...", plist_file) files_to_compress.add(os.path.join(input_path, f)) plist_mtime = util.get_last_mod_time(plist_file) # Check if any source file corresponding to a plist # file changed since the plist file was generated. for k, v in source_file_mod_times.items(): if v > plist_mtime: changed_files.add(k) elif f == 'metadata.json': metadata_json_to_compress.add(os.path.join(input_path, f)) elif f == 'skip_file': files_to_compress.add(os.path.join(input_path, f)) if changed_files: changed_files = '\n'.join([' - ' + f for f in changed_files]) LOG.warning( "The following source file contents changed since the " "latest analysis:\n%s\nPlease analyze your project " "again to update the reports!", changed_files) sys.exit(1) with zipfile.ZipFile(zip_file, 'a', allowZip64=True) as zipf: # Add the files to the zip which will be sent to the server. for ftc in files_to_compress: _, filename = os.path.split(ftc) zip_target = os.path.join('reports', filename) zipf.write(ftc, zip_target) merged_metadata = merge_metadata_json(metadata_json_to_compress, len(inputs)) zipf.writestr(os.path.join('reports', 'metadata.json'), json.dumps(merged_metadata)) if not hash_to_file: LOG.warning("There is no report to store. After uploading these " "results the previous reports become resolved.") file_hashes = list(hash_to_file.keys()) LOG.debug("Get missing content hashes from the server.") necessary_hashes = client.getMissingContentHashes(file_hashes) \ if file_hashes else [] for f, h in file_to_hash.items(): if h in necessary_hashes or h in file_hash_with_review_status: LOG.debug("File contents for '%s' needed by the server", f) zipf.write(f, os.path.join('root', f.lstrip('/'))) zipf.writestr('content_hashes.json', json.dumps(file_to_hash)) # Compressing .zip file with open(zip_file, 'rb') as source: compressed = zlib.compress(source.read(), zlib.Z_BEST_COMPRESSION) with open(zip_file, 'wb') as target: target.write(compressed) LOG.debug("[ZIP] Mass store zip written at '%s'", zip_file) if missing_source_files: LOG.warning("Missing source files: \n%s", '\n'.join([" - " + f_ for f_ in missing_source_files]))