def convert_dataflow(working_dir, tool_args, dataflows): """ Convert dataflow into a simpler source and sink format for better representation in SARIF based viewers :param working_dir: Work directory :param tool_args: Tool args :param dataflows: List of dataflows from Inspect :return List of filename and location """ if not dataflows: return None file_name_prefix = "" location_list = [] for flow in dataflows: fn = flow["location"].get("fileName") if not fn or fn == "N/A": continue if not is_generic_package(fn): location = flow["location"] fileName = location.get("fileName") if not file_name_prefix: file_name_prefix = find_path_prefix(working_dir, fileName) location_list.append( { "filename": os.path.join(file_name_prefix, fileName), "line_number": location.get("lineNumber"), } ) if len(location_list) >= 2: first = location_list[0] last = location_list[-1] if ( first["filename"] == last["filename"] and first["line_number"] == last["line_number"] ): location_list = [first] else: location_list = [first, last] return location_list
def extract_from_file(tool_name, working_dir, report_file, file_path_list=None): """Extract properties from reports :param tool_name: tool name :param working_dir: Working directory :param report_file: Report file :param file_path_list: Full file path for any manipulation :return issues, metrics, skips information """ issues = [] metrics = None skips = [] # If the tools did not produce any result do not crash if not os.path.isfile(report_file): return issues, metrics, skips extn = pathlib.PurePosixPath(report_file).suffix with io.open(report_file, "r") as rfile: # Static check use jsonlines format, duh if tool_name == "staticcheck": contents = rfile.read() try: issues = [ json.loads(str(item)) for item in contents.strip().split("\n") ] except json.decoder.JSONDecodeError: LOG.warning( "staticcheck produced no result since the project was not built before analysis!" ) return issues, metrics, skips if extn == ".json": try: report_data = json.loads(rfile.read()) except json.decoder.JSONDecodeError: return issues, metrics, skips # Inspect uses vulnerabilities if tool_name == "inspect": file_name_prefix = "" for v in report_data.get("vulnerabilities"): if not v: continue vuln = v["vulnerability"] location = {} if vuln.get("dataFlow") and vuln.get("dataFlow").get( "dataFlow"): for l in vuln["dataFlow"]["dataFlow"]["list"]: if not is_generic_package( l["location"].get("fileName")): location = l["location"] break fileName = location.get("fileName") if fileName == "N/A": continue if not file_name_prefix: file_name_prefix = find_path_prefix( working_dir, fileName) issues.append({ "rule_id": vuln["category"], "title": vuln["title"], "description": vuln["description"], "score": vuln["score"], "severity": vuln["severity"], "line_number": location.get("lineNumber"), "filename": os.path.join(file_name_prefix, fileName), "first_found": vuln["firstVersionDetected"], "issue_confidence": "HIGH", }) elif isinstance(report_data, list): issues = report_data else: if tool_name == "checkov": issues = report_data.get("results", {}).get("failed_checks") elif "sec_issues" in report_data: # NodeJsScan uses sec_issues sec_data = report_data["sec_issues"] for key, value in sec_data.items(): if isinstance(value, list): issues = issues + value else: issues.append(value) elif "Issues" in report_data: tmpL = report_data.get("Issues", []) if tmpL: issues += tmpL else: LOG.debug("%s produced no result" % tool_name) elif "results" in report_data: tmpL = report_data.get("results", []) if tmpL: issues += tmpL else: LOG.debug("%s produced no result" % tool_name) if extn == ".csv": headers, issues = csv_parser.get_report_data(rfile) if extn == ".xml": issues, metrics = xml_parser.get_report_data(rfile, file_path_list) return issues, metrics, skips