def java_build(src, reports_dir, lang_tools): """ Automatically build java project :param src: Source directory :param reports_dir: Reports directory to store any logs :param lang_tools: Language specific build tools :return: boolean status from the build. True if the command executed successfully. False otherwise """ cmd_args = [] pom_files = [p.as_posix() for p in Path(src).glob("pom.xml")] env = os.environ.copy() if os.environ.get("USE_JAVA_8") or os.environ.get("WITH_JAVA_8"): env["SCAN_JAVA_HOME"] = os.environ.get("SCAN_JAVA_8_HOME") else: env["SCAN_JAVA_HOME"] = os.environ.get("SCAN_JAVA_11_HOME") if pom_files: cmd_args = lang_tools.get("maven") else: gradle_files = [p.as_posix() for p in Path(src).glob("build.gradle")] if gradle_files: cmd_args = lang_tools.get("gradle") if not cmd_args: LOG.info( "Java auto build is supported only for maven or gradle based projects" ) return False cp = exec_tool(cmd_args, src, env=env, stdout=subprocess.PIPE) LOG.debug(cp.stdout) return cp.returncode == 0
def kotlin_build(src, reports_dir, lang_tools): """ Automatically build kotlin project :param src: Source directory :param reports_dir: Reports directory to store any logs :param lang_tools: Language specific build tools :return: boolean status from the build. True if the command executed successfully. False otherwise """ # Check if this is a android kotlin project gradle_kts_files = [p.as_posix() for p in Path(src).rglob("build.gradle.kts")] if find_files(src, "proguard-rules.pro", False, True) or find_files( src, "AndroidManifest.xml", False, True ): return android_build(src, reports_dir, lang_tools) if gradle_kts_files: cmd_args = get_gradle_cmd(src, lang_tools.get("gradle")) cp = exec_tool( "auto-build", cmd_args, src, env=get_env(), stdout=subprocess.PIPE ) if cp: LOG.debug(cp.stdout) return cp.returncode == 0 else: return java_build(src, reports_dir, lang_tools)
def java_build(src, reports_dir, lang_tools): """ Automatically build java project :param src: Source directory :param reports_dir: Reports directory to store any logs :param lang_tools: Language specific build tools :return: boolean status from the build. True if the command executed successfully. False otherwise """ cmd_args = [] pom_files = [p.as_posix() for p in Path(src).rglob("pom.xml")] gradle_files = [p.as_posix() for p in Path(src).rglob("build.gradle")] sbt_files = [p.as_posix() for p in Path(src).rglob("build.sbt")] env = get_env() if pom_files: cmd_args = lang_tools.get("maven") elif gradle_files: cmd_args = get_gradle_cmd(src, lang_tools.get("gradle")) elif sbt_files: cmd_args = lang_tools.get("sbt") if not cmd_args: LOG.info( "Java auto build is supported only for maven or gradle based projects" ) return False cp = exec_tool("auto-build", cmd_args, src, env=env, stdout=subprocess.PIPE) if cp: LOG.debug(cp.stdout) return cp.returncode == 0 return False
def exec_tool(args, cwd=None, env=os.environ.copy(), stdout=subprocess.DEVNULL): """ Convenience method to invoke cli tools Args: args cli command and args cwd Current working directory env Environment variables stdout stdout configuration for run command Returns: CompletedProcess instance """ try: env = use_java(env) LOG.info("=" * 80) LOG.debug('⚡︎ Executing "{}"'.format(" ".join(args))) cp = subprocess.run( args, stdout=stdout, stderr=subprocess.STDOUT, cwd=cwd, env=env, check=False, shell=False, encoding="utf-8", ) return cp except Exception as e: LOG.error(e) return None
def nodejs_build(src, reports_dir, lang_tools): """ Automatically build nodejs project :param src: Source directory :param reports_dir: Reports directory to store any logs :param lang_tools: Language specific build tools :return: boolean status from the build. True if the command executed successfully. False otherwise """ cmd_args = lang_tools.get("npm") yarn_mode = False pjson_files = [p.as_posix() for p in Path(src).glob("package.json")] ylock_files = [p.as_posix() for p in Path(src).glob("yarn.lock")] if ylock_files: cmd_args = lang_tools.get("yarn") yarn_mode = True elif not pjson_files: LOG.info( "Nodejs auto build is supported only for npm or yarn based projects" ) return False cp = exec_tool(cmd_args, src) LOG.debug(cp.stdout) ret = cp.returncode == 0 try: cmd_args = ["npm"] if yarn_mode: cmd_args = ["yarn"] cmd_args += ["run", "build"] exec_tool(cmd_args, src) except Exception: LOG.debug("Automatic build has failed for the node.js project") return ret
def android_build(src, reports_dir, lang_tools): """ Automatically build android project :param src: Source directory :param reports_dir: Reports directory to store any logs :param lang_tools: Language specific build tools :return: boolean status from the build. True if the command executed successfully. False otherwise """ if not os.getenv("ANDROID_SDK_ROOT") and not os.getenv("ANDROID_HOME"): LOG.info( "ANDROID_SDK_ROOT or ANDROID_HOME should be set for automatically building android projects" ) return False lang_tools = build_tools_map.get("android") env = get_env() gradle_files = [p.as_posix() for p in Path(src).rglob("build.gradle")] gradle_kts_files = [ p.as_posix() for p in Path(src).rglob("build.gradle.kts") ] if gradle_files or gradle_kts_files: cmd_args = get_gradle_cmd(src, lang_tools.get("gradle")) cp = exec_tool("auto-build", cmd_args, src, env=env, stdout=subprocess.PIPE) if cp: LOG.debug(cp.stdout) return cp.returncode == 0 return False
def auto_build(type_list, src, reports_dir): """ Automatically build project identified by type :param type_list: Project types :param src: Source directory :param reports_dir: Reports directory to store any logs :return: boolean status from the build. True if the command executed successfully. False otherwise """ ret = True for ptype in type_list: lang_tools = build_tools_map.get(ptype) if not lang_tools: continue if isinstance(lang_tools, list): cp = exec_tool(lang_tools, src, env=os.environ.copy(), stdout=subprocess.PIPE) LOG.debug(cp.stdout) ret = ret & (cp.returncode == 0) # Look for any _scan function in this module for execution try: ret = ret & getattr(sys.modules[__name__], "%s_build" % ptype)( src, reports_dir, lang_tools) except Exception: LOG.debug("Unable to auto build project of type {}".format(ptype)) return ret
def annotate_pr(self, repo_context, findings_file, report_summary, build_status): if not findings_file: return with open(findings_file, mode="r") as fp: try: github_context = self.get_context(repo_context) findings_obj = json.load(fp) findings = findings_obj.get("findings") if not findings: LOG.debug("No findings from scan available to report") if not github_context.get("githubToken") or not g: LOG.debug("Did not receive GITHUB_TOKEN") return self.create_status( findings, github_context, report_summary, build_status ) workflow_run = self.get_workflow(github_context) if not workflow_run: LOG.debug("Unable to find the workflow run for this invocation") return pull_requests = workflow_run.pull_requests if not pull_requests: LOG.debug("No Pull Requests are associated with this workflow run") return if findings: self.create_review( pull_requests, findings, github_context, report_summary, build_status, ) except Exception as e: LOG.debug(e)
def report(vulnerabilities, insights, report_fname): """ Prints issues in JSON format. Args: vulnerabilities: list of vulnerabilities to report insights: list of insights report_fname: The output file name """ TZ_AGNOSTIC_FORMAT = "%Y-%m-%dT%H:%M:%SZ" time_string = datetime.utcnow().strftime(TZ_AGNOSTIC_FORMAT) filtered_vulns = [] filtered_insights = [] vuln_keys = {} for vuln in vulnerabilities: if not isinstance(vuln, SanitisedVulnerability) and not isinstance( vuln, UnknownVulnerability): avuln = vuln.as_dict() avuln_key = f"""{avuln["rule_id"]}|{avuln["source"]["line_number"]}|{avuln["source"]["path"]}|{avuln["sink"]["line_number"]}|{avuln["sink"]["path"]}""" if not vuln_keys.get(avuln_key): filtered_vulns.append(avuln) vuln_keys[avuln_key] = True for ins in insights: filtered_insights.append({ "rule_id": ins.code, "rule_name": ins.name, "short_description": ins.short_description, "description": ins.short_description, "recommendation": ins.recommendation, "cwe_category": ins.cwe_category, "owasp_category": ins.owasp_category, "severity": ins.severity, "source": { "trigger_word": ins.source.trigger_word, "line_number": ins.source.line_number, "label": ins.source.label, "path": ins.source.path, }, "sink": { "trigger_word": ins.sink.trigger_word, "line_number": ins.sink.line_number, "label": ins.sink.label, "path": ins.sink.path, }, }) if filtered_insights: filtered_vulns += filtered_insights machine_output = { "generated_at": time_string, "vulnerabilities": filtered_vulns } try: with open(report_fname, mode="w") as fileobj: json.dump(machine_output, fileobj, indent=2) except Exception as e: LOG.debug(e)
def get_code(self, max_lines=config.get("CODE_SNIPPET_MAX_LINES"), tabbed=False): """Gets lines of code from a file the generated this issue. :param max_lines: Max lines of context to return :param tabbed: Use tabbing in the output :return: strings of code """ if not self.fname: return "" lines = [] max_lines = max(max_lines, 1) if not self.snippet_based: lmin = max(1, self.lineno - max_lines // 2) lmax = lmin + len(self.linerange) + max_lines - 1 tmplt = "%i\t%s" if tabbed else "%i %s" for line in moves.xrange(lmin, lmax): text = self._get_code_line(self.fname, line) if isinstance(text, bytes): text = text.decode("utf-8", "ignore") if not len(text): break lines.append(tmplt % (line, text)) if lines: return "".join(lines) elif self.code: # Validate if the code snippet is in the right format orig_lines = self.code.split("\n") if orig_lines: orig_first_line = orig_lines[0] firstword = orig_first_line.split(" ", 1)[0] if firstword and str(firstword).isdigit(): return self.code return "" else: return "" else: lineno = self.lineno try: tmplineno = 1 with open(self.fname, mode="r") as fp: for aline in fp: if aline.strip() == self.code.strip(): lineno = tmplineno # Fix the line number self.lineno = lineno break tmplineno = tmplineno + 1 except Exception as e: LOG.debug(e) tmplt = "%i\t%s" if tabbed else "%i %s" return tmplt % (lineno, self.code)
def find_insights(ast_tree, path): violations_list = [] # Invoke all the _check methods for mods in sys.modules[__name__].__dict__.keys(): if mods.startswith("_check"): try: dfn = getattr(sys.modules[__name__], mods, None) if dfn: violations = dfn(ast_tree, path) if violations: violations_list += violations except Exception as e: LOG.debug(e) return violations_list
def nodejs_build(src, reports_dir, lang_tools): """ Automatically build nodejs project :param src: Source directory :param reports_dir: Reports directory to store any logs :param lang_tools: Language specific build tools :return: boolean status from the build. True if the command executed successfully. False otherwise """ cmd_args = lang_tools.get("npm") yarn_mode = False rush_mode = False rushjson_files = [p.as_posix() for p in Path(src).glob("rush.json")] pjson_files = [p.as_posix() for p in Path(src).glob("package.json")] ylock_files = [p.as_posix() for p in Path(src).glob("yarn.lock")] if ylock_files: cmd_args = lang_tools.get("yarn") yarn_mode = True elif rushjson_files: cmd_args = lang_tools.get("rush") rush_mode = True elif not pjson_files: LOG.debug( "Nodejs auto build is supported only for npm or yarn or rush based projects" ) return False cp = exec_tool("auto-build", cmd_args, src) if cp: ret = cp.returncode == 0 else: ret = False try: cmd_args = ["npm"] if yarn_mode: cmd_args = ["yarn"] if rush_mode: cmd_args = ["rush", "rebuild"] else: cmd_args += ["run", "build"] exec_tool("auto-build", cmd_args, src) except Exception: if rush_mode: LOG.warning( "Automatic build for rush.js has failed. Try installing the packages manually before invoking scan.\nIf this works then let us know the build steps by filing an issue." ) else: LOG.debug("Automatic build has failed for the node.js project") return ret
def track(track_obj): """ Method to send a track message to the telemetry api :param track_obj: :return: """ # Check if telemetry is disabled disable_telemetry = config.get("DISABLE_TELEMETRY", False) if disable_telemetry == "true" or disable_telemetry == "1": disable_telemetry = True else: disable_telemetry = False if track_obj and not disable_telemetry: try: track_obj["tool"] = "@ShiftLeft/scan" requests.post(config.TELEMETRY_URL, json=track_obj) except Exception: LOG.debug("Unable to send telemetry")
def get_gradle_cmd(src, cmd_args): # Check for the presence of local gradle wrapper fullPath = os.path.join(src, "gradlew") if os.path.exists(fullPath): try: os.chmod( fullPath, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH, ) except Exception: LOG.debug("Ensure {} has execute permissions".format(fullPath)) cmd_args[0] = fullPath return cmd_args
def should_suppress_fingerprint(fingerprint, working_dir): """Method to check if a result has to be suppressed based on its fingerprint hash :param fingerprint: Fingerprint hash object :param working_dir: Working directory """ if not fingerprint: return False supress_fps = config.get_suppress_fingerprints(working_dir) if not supress_fps or not isinstance(supress_fps, dict): return False # supress_fps = {"scanPrimaryLocationHash": [], "scanTagsHash": [], "scanFileHash": []} for sk, svl in supress_fps.items(): if not svl: continue if fingerprint[sk] in svl: LOG.debug(f"Suppressing fingerprint {fingerprint[sk]} of type {sk}") return True return False
def _get_code_line(self, fname, line): """Return the given line from the file. Handles any utf8 error from tokenize :param fname: File name :param line: Line number :return: Exact line as string """ text = "" try: text = linecache.getline(fname, line) except UnicodeDecodeError: LOG.debug( f"Error parsing the file {fname} in utf-8. Falling to binary mode" ) with io.open(fname, "rb") as fp: all_lines = fp.readlines() if line < len(all_lines): text = all_lines[line] return text
def get_code(self, max_lines=3, tabbed=False): """Gets lines of code from a file the generated this issue. :param max_lines: Max lines of context to return :param tabbed: Use tabbing in the output :return: strings of code """ if not self.fname: return "" lines = [] max_lines = max(max_lines, 1) if not self.snippet_based: lmin = max(1, self.lineno - max_lines // 2) lmax = lmin + len(self.linerange) + max_lines - 1 tmplt = "%i\t%s" if tabbed else "%i %s" for line in moves.xrange(lmin, lmax): text = linecache.getline(self.fname, line) if isinstance(text, bytes): text = text.decode("utf-8") if not len(text): break lines.append(tmplt % (line, text)) return "".join(lines) else: lineno = self.lineno try: tmplineno = 1 with open(self.fname, mode="r") as fp: for aline in fp: if aline.strip() == self.code.strip(): lineno = tmplineno # Fix the line number self.lineno = lineno break tmplineno = tmplineno + 1 except Exception as e: LOG.debug(e) tmplt = "%i\t%s" if tabbed else "%i %s" return tmplt % (lineno, self.code)
def php_build(src, reports_dir, lang_tools): """ Automatically build php project :param src: Source directory :param reports_dir: Reports directory to store any logs :param lang_tools: Language specific build tools :return: boolean status from the build. True if the command executed successfully. False otherwise """ ret = False cmd_args = lang_tools.get("install") cjson_files = [p.as_posix() for p in Path(src).glob("composer.json")] # If there is no composer.json try to create one if not cjson_files: cp = exec_tool( "auto-build", lang_tools.get("init"), src, env=os.environ.copy(), stdout=subprocess.PIPE, ) if cp: LOG.debug(cp.stdout) cp = exec_tool("auto-build", cmd_args, src, env=os.environ.copy(), stdout=subprocess.PIPE) if cp: LOG.debug(cp.stdout) ret = cp.returncode == 0 # If composer install fails, try composer update if not ret: cmd_args = lang_tools.get("update") cp = exec_tool("auto-build", cmd_args, src, env=os.environ.copy(), stdout=subprocess.PIPE) if cp: LOG.debug(cp.stdout) ret = cp.returncode == 0 return ret
def inspect_scan(language, src, reports_dir, convert, repo_context): """ Method to perform inspect cloud scan Args: language Project language src Project dir reports_dir Directory for output reports convert Boolean to enable normalisation of reports json repo_context Repo context """ run_uuid = config.get("run_uuid") cpg_mode = config.get("SHIFTLEFT_CPG") env = os.environ.copy() env["SCAN_JAVA_HOME"] = os.environ.get("SCAN_JAVA_8_HOME") report_fname = utils.get_report_file( "ng-sast", reports_dir, convert, ext_name="json" ) sl_cmd = config.get("SHIFTLEFT_NGSAST_CMD") # Check if sl cli is available if not utils.check_command(sl_cmd): LOG.warning( "sl cli is not available. Please check if your build uses shiftleft/scan-java as the image" ) return analyze_files = config.get("SHIFTLEFT_ANALYZE_FILE") analyze_target_dir = config.get( "SHIFTLEFT_ANALYZE_DIR", os.path.join(src, "target") ) extra_args = None if not analyze_files: if language == "java": analyze_files = utils.find_java_artifacts(analyze_target_dir) elif language == "csharp": if not utils.check_dotnet(): LOG.warning( "dotnet is not available. Please check if your build uses shiftleft/scan-csharp as the image" ) return analyze_files = utils.find_csharp_artifacts(src) cpg_mode = True else: if language == "ts" or language == "nodejs": language = "js" extra_args = ["--", "--ts", "--babel"] analyze_files = [src] cpg_mode = True app_name = find_app_name(src, repo_context) branch = repo_context.get("revisionId") if not branch: branch = "master" if not analyze_files: LOG.warning( "Unable to find any build artifacts. Compile your project first before invoking scan or use the auto build feature." ) return if isinstance(analyze_files, list) and len(analyze_files) > 1: LOG.warning( "Multiple files found in {}. Only {} will be analyzed".format( analyze_target_dir, analyze_files[0] ) ) analyze_files = analyze_files[0] sl_args = [ sl_cmd, "analyze", "--no-auto-update" if language == "java" else None, "--wait", "--cpg" if cpg_mode else None, "--" + language, "--tag", "branch=" + branch, "--app", app_name, ] sl_args += [analyze_files] if extra_args: sl_args += extra_args sl_args = [arg for arg in sl_args if arg is not None] LOG.info( "About to perform ShiftLeft NG SAST cloud analysis. This might take a few minutes ..." ) LOG.debug(" ".join(sl_args)) LOG.debug(repo_context) cp = exec_tool("NG SAST", sl_args, src, env=env) if cp.returncode != 0: LOG.warning("NG SAST cloud analyze has failed with the below logs") LOG.debug(sl_args) LOG.info(cp.stderr) return findings_data = fetch_findings(app_name, branch, report_fname) if findings_data and convert: crep_fname = utils.get_report_file( "ng-sast", reports_dir, convert, ext_name="sarif" ) convertLib.convert_file("ng-sast", sl_args[1:], src, report_fname, crep_fname) track({"id": run_uuid, "scan_mode": "ng-sast", "sl_args": sl_args})
def report( tool_name, tool_args, working_dir, metrics, skips, issues, crep_fname, file_path_list=None, ): """Prints issues in SARIF format :param tool_name: tool name :param tool_args: Args used for the tool :param working_dir: Working directory :param metrics: metrics data :param skips: skips data :param issues: issues data :param crep_fname: The output file name :param file_path_list: Full file path for any manipulation :return serialized_log: SARIF output data """ if not tool_args: tool_args = [] tool_args_str = tool_args if isinstance(tool_args, list): tool_args_str = " ".join(tool_args) repo_details = find_repo_details(working_dir) log_uuid = str(uuid.uuid4()) run_uuid = config.get("run_uuid") # working directory to use in the log WORKSPACE_PREFIX = config.get("WORKSPACE", None) wd_dir_log = WORKSPACE_PREFIX if WORKSPACE_PREFIX is not None else working_dir driver_name = config.tool_purpose_message.get(tool_name, tool_name) # Construct SARIF log log = om.SarifLog( schema_uri="https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json", version="2.1.0", inline_external_properties=[ om.ExternalProperties(guid=log_uuid, run_guid=run_uuid) ], runs=[ om.Run( automation_details=om.RunAutomationDetails( guid=log_uuid, description=om.Message( text="Static Analysis Security Test results using @ShiftLeft/sast-scan" ), ), tool=om.Tool( driver=om.ToolComponent( name=driver_name, full_name=driver_name, version="1.0.0-scan" ) ), invocations=[ om.Invocation( end_time_utc=datetime.datetime.utcnow().strftime(TS_FORMAT), execution_successful=True, working_directory=om.ArtifactLocation(uri=to_uri(wd_dir_log)), ) ], conversion={ "tool": om.Tool( driver=om.ToolComponent(name="@ShiftLeft/sast-scan") ), "invocation": om.Invocation( execution_successful=True, command_line=tool_args_str, arguments=tool_args, working_directory=om.ArtifactLocation(uri=to_uri(wd_dir_log)), end_time_utc=datetime.datetime.utcnow().strftime(TS_FORMAT), ), }, version_control_provenance=[ om.VersionControlDetails( repository_uri=repo_details["repositoryUri"], branch=repo_details["branch"], revision_id=repo_details["revisionId"], ) ], ) ], ) run = log.runs[0] invocation = run.invocations[0] add_skipped_file_notifications(skips, invocation) add_results(tool_name, issues, run, file_path_list, working_dir) serialized_log = to_json(log) if crep_fname: html_file = crep_fname.replace(".sarif", ".html") with io.open(crep_fname, "w") as fileobj: fileobj.write(serialized_log) if tool_name != "empty-scan": render_html(json.loads(serialized_log), html_file) if fileobj.name != sys.stdout.name: LOG.debug( "SARIF and HTML report written to file: %s, %s :thumbsup:", fileobj.name, html_file, ) return serialized_log
def extract_from_file( tool_name, tool_args, working_dir, report_file, file_path_list=None ): """Extract properties from reports :param tool_name: tool name :param tool_args: tool args :param working_dir: Working directory :param report_file: Report file :param file_path_list: Full file path for any manipulation :return issues, metrics, skips information """ issues = [] metrics = None skips = [] # If the tools did not produce any result do not crash if not os.path.isfile(report_file): return issues, metrics, skips extn = pathlib.PurePosixPath(report_file).suffix with io.open(report_file, "r") as rfile: # Static check use jsonlines format, duh if tool_name == "staticcheck": contents = rfile.read() try: issues = [ json.loads(str(item)) for item in contents.strip().split("\n") ] except json.decoder.JSONDecodeError: LOG.warning( "staticcheck produced no result since the project was not built before analysis!" ) return issues, metrics, skips if extn == ".json": try: report_data = json.loads(rfile.read()) except json.decoder.JSONDecodeError: return issues, metrics, skips # NG SAST (Formerly Inspect) uses vulnerabilities if tool_name == "ng-sast": for v in report_data.get("vulnerabilities"): if not v: continue vuln = v["vulnerability"] location_list = [] if vuln.get("dataFlow") and vuln.get("dataFlow", {}).get( "dataFlow" ): location_list = convert_dataflow( working_dir, tool_args, vuln["dataFlow"]["dataFlow"]["list"] ) for location in location_list: issues.append( { "rule_id": vuln["category"], "title": vuln["title"], "description": vuln["description"], "score": vuln["score"], "severity": vuln["severity"], "line_number": location.get("line_number"), "filename": location.get("filename"), "first_found": vuln["firstVersionDetected"], "issue_confidence": "HIGH", } ) elif tool_name == "taint-php": for entry in report_data: taint_trace = entry.get("taint_trace") labels = [] if taint_trace: source, sink, labels = get_from_taints(taint_trace) else: source, _, _ = get_from_taints([entry]) issues.append( { "rule_id": entry.get("shortcode"), "test_name": entry.get("type"), "description": "{}: {}".format( entry.get("message"), "\\n".join(labels) ), "link": entry.get("link"), "severity": entry.get("severity"), "issue_confidence": "HIGH", "line_number": source.get("line_number"), "filename": source.get("filename"), } ) elif tool_name == "taint-python": taint_list = report_data.get("vulnerabilities") for taint in taint_list: source = taint.get("source") sink = taint.get("sink") tags = {} for taint_props in [ "source_trigger_word", "source_label", "source_type", "sink_trigger_word", "sink_label", "sink_type", ]: if taint.get(taint_props): tags[taint_props] = taint.get(taint_props) issues.append( { "rule_id": taint.get("rule_id"), "test_name": taint.get("rule_name"), "short_description": taint.get("short_description"), "cwe_category": taint.get("cwe_category"), "owasp_category": taint.get("owasp_category"), "description": taint.get("description"), "severity": taint.get("severity"), "issue_confidence": "HIGH", "line_from": source.get("line_number"), "line_to": sink.get("line_number"), "filename": source.get("path"), "tags": tags, } ) elif tool_name == "phpstan" or tool_name == "source-php": file_errors = report_data.get("files") for filename, messageobj in file_errors.items(): messages = messageobj.get("messages") for msg in messages: # Create a rule id for phpstan rule_word = msg.get("message", "").split(" ")[0] rule_word = "phpstan-" + rule_word.lower() issues.append( { "rule_id": rule_word, "title": msg.get("message"), "line_number": msg.get("line"), "filename": filename, "severity": "LOW", "issue_confidence": "MEDIUM", } ) elif tool_name == "source-js": njs_findings = report_data.get("nodejs", {}) njs_findings.update(report_data.get("templates", {})) for k, v in njs_findings.items(): # Password detection by njsscan is full of false positives if k == "node_password": continue files = v.get("files", []) metadata = v.get("metadata", {}) if not files or not metadata: continue for afile in files: line_number = 0 if afile.get("match_lines"): line_number = afile.get("match_lines")[0] issues.append( { "rule_id": metadata.get("owasp") .replace(":", "-") .replace(" ", "") .lower(), "title": metadata.get("cwe"), "description": metadata.get("description"), "severity": metadata.get("severity"), "line_number": line_number, "filename": afile.get("file_path"), "issue_confidence": "HIGH", } ) elif tool_name == "checkov": if isinstance(report_data, list): for rd in report_data: issues += rd.get("results", {}).get("failed_checks") else: issues = report_data.get("results", {}).get("failed_checks") elif tool_name == "source-ruby": issues = report_data.get("warnings", []) issues += report_data.get("errors", []) elif isinstance(report_data, list): issues = report_data else: if "sec_issues" in report_data: # NodeJsScan uses sec_issues sec_data = report_data["sec_issues"] for key, value in sec_data.items(): if isinstance(value, list): issues = issues + value else: issues.append(value) elif "Issues" in report_data: tmpL = report_data.get("Issues", []) if tmpL: issues += tmpL else: LOG.debug("%s produced no result" % tool_name) elif "results" in report_data: tmpL = report_data.get("results", []) if tmpL: issues += tmpL else: LOG.debug("%s produced no result" % tool_name) if extn == ".csv": headers, issues = csv_parser.get_report_data(rfile) if extn == ".xml": issues, metrics = xml_parser.get_report_data( rfile, file_path_list=file_path_list, working_dir=working_dir ) return issues, metrics, skips
def summary(sarif_files, aggregate_file=None, override_rules={}): """Generate overall scan summary based on the generated SARIF file :param sarif_files: List of generated sarif report files :param aggregate_file: Filename to store aggregate data :param override_rules Build break rules to override for testing :returns dict representing the summary """ report_summary = {} build_status = "pass" # This is the list of all runs which will get stored as an aggregate run_data_list = [] for sf in sarif_files: with open(sf, mode="r") as report_file: report_data = json.loads(report_file.read()) # skip this file if the data is empty if not report_data or not report_data.get("runs"): LOG.warn("Report file {} is invalid. Skipping ...".format(sf)) continue # Iterate through all the runs for run in report_data["runs"]: # Add it to the run data list for aggregation run_data_list.append(run) tool_desc = run["tool"]["driver"]["name"] tool_name = tool_desc # Initialise report_summary[tool_name] = { "tool": tool_desc, "critical": 0, "high": 0, "medium": 0, "low": 0, "status": "✅", } results = run.get("results", []) metrics = run.get("properties", {}).get("metrics", None) # If the result includes metrics use it. If not compute it if metrics: report_summary[tool_name].update(metrics) report_summary[tool_name].pop("total", None) else: for aresult in results: sev = aresult["properties"]["issue_severity"].lower() report_summary[tool_name][sev] += 1 # Compare against the build break rule to determine status default_rules = config.get("build_break_rules").get("default") tool_rules = config.get("build_break_rules").get(tool_name, {}) build_break_rules = { **default_rules, **tool_rules, **override_rules } for rsev in ["critical", "high", "medium", "low"]: if build_break_rules.get("max_" + rsev) is not None: if (report_summary.get(tool_name).get(rsev) > build_break_rules["max_" + rsev]): report_summary[tool_name]["status"] = "❌" build_status = "fail" # Should we store the aggregate data if aggregate_file: # agg_sarif_file = aggregate_file.replace(".json", ".sarif") # aggregate.sarif_aggregate(run_data_list, agg_sarif_file) aggregate.jsonl_aggregate(run_data_list, aggregate_file) LOG.debug("Aggregate report written to {}\n".format(aggregate_file)) return report_summary, build_status
def find_repo_details(src_dir=None): """Method to find repo details such as url, sha etc This will be populated into versionControlProvenance attribute :param src_dir: Source directory """ # See if repository uri is specified in the config repositoryName = None repositoryUri = "" revisionId = "" branch = "" invokedBy = "" pullRequest = False gitProvider = "" ciProvider = "" """ Since CI servers typically checkout repo in detached mode, we need to rely on environment variables as a starting point to find the repo details. To make matters worse, since we run the tools inside a container these variables should be passed as part of the docker run command. With native integrations such as GitHub action and cloudbuild this could be taken care by our builders. Env variables detection for popular CI server is implemented here anyways. But they are effective only in few cases. Azure pipelines - https://docs.microsoft.com/en-us/azure/devops/pipelines/build/variables?view=azure-devops&tabs=yaml BitBucket - https://confluence.atlassian.com/bitbucket/environment-variables-in-bitbucket-pipelines-794502608.html GitHub actions - https://help.github.com/en/actions/automating-your-workflow-with-github-actions/using-environment-variables Google CloudBuild - https://cloud.google.com/cloud-build/docs/configuring-builds/substitute-variable-values CircleCI - https://circleci.com/docs/2.0/env-vars/#built-in-environment-variables Travis - https://docs.travis-ci.com/user/environment-variables/#default-environment-variables AWS CodeBuild - https://docs.aws.amazon.com/codebuild/latest/userguide/build-env-ref-env-vars.html GitLab - https://docs.gitlab.com/ee/ci/variables/predefined_variables.html Jenkins - https://jenkins.io/doc/book/pipeline/jenkinsfile/#using-environment-variables """ for key, value in os.environ.items(): # Check REPOSITORY_URL first followed CI specific vars # Some CI such as GitHub pass only the slug instead of the full url :( if not gitProvider or not ciProvider: if key.startswith("GITHUB_"): if key == "GITHUB_REPOSITORY": gitProvider = "github" if key == "GITHUB_ACTION": ciProvider = "github" elif key.startswith("GITLAB_"): gitProvider = "gitlab" if key == "GITLAB_CI": ciProvider = "gitlab" elif key.startswith("BITBUCKET_"): gitProvider = "bitbucket" if key == "BITBUCKET_BUILD_NUMBER": ciProvider = "bitbucket" elif key.startswith("CIRCLE_"): ciProvider = "circle" elif key.startswith("TRAVIS_"): ciProvider = "travis" elif key.startswith("CODEBUILD_"): ciProvider = "codebuild" elif key.startswith("BUILD_REQUESTEDFOREMAIL"): ciProvider = "azure" elif key.startswith("JENKINS_"): ciProvider = "jenkins" if not repositoryName: if key in [ "BUILD_REPOSITORY_NAME", "GITHUB_REPOSITORY", "BITBUCKET_REPO_SLUG", "REPO_NAME", "CIRCLE_PROJECT_REPONAME", "TRAVIS_REPO_SLUG", "CI_PROJECT_NAME", ]: if "/" in value: repositoryName = value.split("/")[-1] else: repositoryName = value if not repositoryUri: if key in [ "REPOSITORY_URL", "BUILD_REPOSITORY_URI", "GITHUB_REPOSITORY", "BITBUCKET_GIT_HTTP_ORIGIN", "REPO_NAME", "CIRCLE_REPOSITORY_URL", "TRAVIS_REPO_SLUG", "CODEBUILD_SOURCE_REPO_URL", "CI_REPOSITORY_URL", ]: repositoryUri = value if key in [ "COMMIT_SHA", "BUILD_SOURCEVERSION", "BITBUCKET_COMMIT", "GITHUB_SHA", "CIRCLE_SHA1", "TRAVIS_COMMIT", "CODEBUILD_SOURCE_VERSION", "CI_COMMIT_SHA", ]: revisionId = value if key in [ "BRANCH", "BUILD_SOURCEBRANCH", "BITBUCKET_BRANCH", "GITHUB_REF", "BRANCH_NAME", "CIRCLE_BRANCH", "TRAVIS_BRANCH", "CI_COMMIT_REF_NAME", ]: branch = value if key in [ "BUILD_REQUESTEDFOREMAIL", "GITHUB_ACTOR", "PROJECT_ID", "CIRCLE_USERNAME", "GITLAB_USER_EMAIL", ]: invokedBy = value if key.startswith("CI_MERGE_REQUEST"): pullRequest = True if src_dir and os.path.isdir(os.path.join(src_dir, ".git")): # Try interacting with git try: repo = Repo(src_dir) head = repo.head if not branch and not head.is_detached: branch = repo.active_branch.name if not revisionId and head: revisionId = head.commit.hexsha if not repositoryUri: repositoryUri = next(iter(repo.remote().urls)) if not invokedBy or "@" not in invokedBy: if head and head.commit.author and head.commit.author.email: invokedBy = "{} <{}>".format( head.commit.author.name, head.commit.author.email ) except Exception: LOG.debug("Unable to find repo details from the local repository") if branch.startswith("refs/pull"): pullRequest = True branch = branch.replace("refs/pull/", "") # Cleanup the variables branch = branch.replace("refs/heads/", "") if repositoryUri: repositoryUri = repositoryUri.replace( "[email protected]:", "https://github.com/" ).replace(".git", "") # Is it a repo slug? repo_slug = True repositoryUri = sanitize_url(repositoryUri) for pref in repo_url_prefixes: if repositoryUri.startswith(pref): repo_slug = False break if not repo_slug: if "vs-ssh" in repositoryUri: repo_slug = False # For repo slug just assume github for now if repo_slug: repositoryUri = "https://github.com/" + repositoryUri if not repositoryName and repositoryUri: repositoryName = os.path.basename(repositoryUri) if not gitProvider: if "github" in repositoryUri: gitProvider = "github" if "gitlab" in repositoryUri: gitProvider = "gitlab" if "atlassian" in repositoryUri or "bitbucket" in repositoryUri: gitProvider = "bitbucket" if "azure" in repositoryUri or "visualstudio" in repositoryUri: gitProvider = "azure" if not ciProvider: ciProvider = "azure" if not gitProvider and "tfs" in repositoryUri: gitProvider = "tfs" ciProvider = "tfs" return { "gitProvider": gitProvider, "ciProvider": ciProvider, "repositoryName": "" if not repositoryName else repositoryName, "repositoryUri": repositoryUri, "revisionId": revisionId, "branch": branch, "invokedBy": invokedBy, "pullRequest": pullRequest, "botUser": is_bot(invokedBy), }
def annotate_pr(self, repo_context, findings_file, report_summary, build_status): if not findings_file: return with open(findings_file, mode="r") as fp: try: findings_obj = json.load(fp) findings = findings_obj.get("findings") if not findings: LOG.debug("No findings from scan available to report") return context = self.get_context(repo_context) # Leave a comment on the pull request if context.get("prID") and context.get("bitbucketToken"): summary = "| Tool | Critical | High | Medium | Low | Status |\n" summary = ( summary + "| ---- | ------- | ------ | ----- | ---- | ---- |\n") for rk, rv in report_summary.items(): status_emoji = self.to_emoji(rv.get("status")) summary = f'{summary}| {rv.get("tool")} | {rv.get("critical")} | {rv.get("high")} | {rv.get("medium")} | {rv.get("low")} | {status_emoji} |\n' template = config.get("PR_COMMENT_BASIC_TEMPLATE") recommendation = ( f"Please review the scan reports before approving this pull request for {context.get('prTargetBranch')} branch" if build_status == "fail" else "Looks good") repoOwner = f"{context.get('BITBUCKET_REPO_OWNER')}" repoFullname = f"{context.get('BITBUCKET_REPO_FULL_NAME')}" repoWorkspace = f"{context.get('BITBUCKET_WORKSPACE')}" repoUUID = f"{context.get('BITBUCKET_REPO_UUID')}" prID = f"{context.get('BITBUCKET_PR_ID')}" prTargetBranch = f"{context.get('BITBUCKET_PR_DESTINATION_BRANCH')}" bitbucketToken = f"{context.get('BITBUCKET_TOKEN')}" commitSHA = f"{context.get('BITBUCKET_COMMIT')}" repoId = f"{context.get('BITBUCKET_REPO_UUID')}" projectUrl = f"{context.get('BITBUCKET_REPO_SLUG')}" jobId = f"{context.get('BITBUCKET_BUILD_NUMBER')}" body = template % dict( summary=summary, recommendation=recommendation, repoOwner=repoOwner, repoFullname=repoFullname, repoWorkspace=repoWorkspace, repoUUID=repoUUID, prID=prID, prTargetBranch=prTargetBranch, bitbucketToken=bitbucketToken, commitSHA=commitSHA, repoId=repoId, projectUrl=projectUrl, jobId=jobId, ) rc = requests.post( self.get_pr_comments_url(repo_context), auth=( context.get("repoWorkspace"), context.get("bitbucketToken"), ), headers={"Content-Type": "application/json"}, json={"content": { "raw": body }}, ) if not rc.ok: LOG.debug(rc.json()) else: LOG.debug( "Either build is not part of a PR or variable BITBUCKET_TOKEN was not set with Pull Request write permission" ) total_count = len(findings) data_list = [ { "title": "Safe to merge?", "type": "BOOLEAN", "value": build_status != "fail", }, ] for rk, rv in report_summary.items(): data_list.append({ "title": rv.get("tool"), "type": "TEXT", "value": rv.get("status"), }) scan_id = config.get("run_uuid", "001") # Create a PR report based on the total findings rr = requests.put( f"{self.get_reports_url(repo_context)}-{scan_id}", proxies=proxies, headers={"Content-Type": "application/json"}, json={ "title": "Scan", "details": f"This pull request contains {total_count} issues", "report_type": "SECURITY", "reporter": f"Scan report for {repo_context.get('repositoryName')}", "link": "https://slscan.io", "logo_url": "https://www.shiftleft.io/static/images/ShiftLeft_logo_white.svg", "result": "FAILED" if build_status == "fail" else "PASSED", "data": data_list, }, ) if rr.ok: for f in findings: finternal = f.get("internal_id") tmpA = finternal.split("/") title = tmpA[0] occurrenceHash = tmpA[-1] annotation_url = f"{self.get_reports_url(repo_context)}-{scan_id}/annotations/scan-{occurrenceHash}" fileName = "" lineNumber = None if f.get("details"): fileName = f.get("details", {}).get("fileName") lineNumber = f.get("details", {}).get("lineNumber") workspace = utils.get_workspace(repo_context) # Remove the workspace if workspace: workspace = workspace + "/" fileName = fileName.replace(workspace, "") # Cleanup title and description title = f.get("title") description = f.get("description") if len(title) > len(description) and "\n" in title: description = f.get("title") if "\n" in title: title = title.split("\n")[0] annotation = { "title": "Scan Report", "annotation_type": "VULNERABILITY", "summary": title, "details": description, "severity": self.convert_severity(f.get("severity")), "path": fileName, "line": lineNumber, } ar = requests.put( annotation_url, proxies=proxies, headers={"Content-Type": "application/json"}, json=annotation, ) if not ar.ok: break else: LOG.debug(rr.json()) except Exception as e: LOG.debug(e)
def extract_from_file(tool_name, working_dir, report_file, file_path_list=None): """Extract properties from reports :param tool_name: tool name :param working_dir: Working directory :param report_file: Report file :param file_path_list: Full file path for any manipulation :return issues, metrics, skips information """ issues = [] metrics = None skips = [] # If the tools did not produce any result do not crash if not os.path.isfile(report_file): return issues, metrics, skips extn = pathlib.PurePosixPath(report_file).suffix with io.open(report_file, "r") as rfile: # Static check use jsonlines format, duh if tool_name == "staticcheck": contents = rfile.read() try: issues = [ json.loads(str(item)) for item in contents.strip().split("\n") ] except json.decoder.JSONDecodeError: LOG.warning( "staticcheck produced no result since the project was not built before analysis!" ) return issues, metrics, skips if extn == ".json": try: report_data = json.loads(rfile.read()) except json.decoder.JSONDecodeError: return issues, metrics, skips # Inspect uses vulnerabilities if tool_name == "inspect": file_name_prefix = "" for v in report_data.get("vulnerabilities"): if not v: continue vuln = v["vulnerability"] location = {} if vuln.get("dataFlow") and vuln.get("dataFlow").get( "dataFlow"): for l in vuln["dataFlow"]["dataFlow"]["list"]: if not is_generic_package( l["location"].get("fileName")): location = l["location"] break fileName = location.get("fileName") if fileName == "N/A": continue if not file_name_prefix: file_name_prefix = find_path_prefix( working_dir, fileName) issues.append({ "rule_id": vuln["category"], "title": vuln["title"], "description": vuln["description"], "score": vuln["score"], "severity": vuln["severity"], "line_number": location.get("lineNumber"), "filename": os.path.join(file_name_prefix, fileName), "first_found": vuln["firstVersionDetected"], "issue_confidence": "HIGH", }) elif isinstance(report_data, list): issues = report_data else: if tool_name == "checkov": issues = report_data.get("results", {}).get("failed_checks") elif "sec_issues" in report_data: # NodeJsScan uses sec_issues sec_data = report_data["sec_issues"] for key, value in sec_data.items(): if isinstance(value, list): issues = issues + value else: issues.append(value) elif "Issues" in report_data: tmpL = report_data.get("Issues", []) if tmpL: issues += tmpL else: LOG.debug("%s produced no result" % tool_name) elif "results" in report_data: tmpL = report_data.get("results", []) if tmpL: issues += tmpL else: LOG.debug("%s produced no result" % tool_name) if extn == ".csv": headers, issues = csv_parser.get_report_data(rfile) if extn == ".xml": issues, metrics = xml_parser.get_report_data(rfile, file_path_list) return issues, metrics, skips
def convert_sarif(app_name, repo_context, sarif_files, findings_fname): """ Method to convert sarif to findings json :param app_name: Application name :param sarif_file: :param findings_fname: :return: """ finding_id = 1 findings_list = [] rule_id_owasp_cache = {} for sf in sarif_files: with open(sf, mode="r") as report_file: report_data = None try: report_data = json.loads(report_file.read()) # skip this file if the data is empty if not report_data or not report_data.get("runs"): continue # Iterate through all the runs for run in report_data["runs"]: results = run.get("results") if not results: continue tool_name = run.get("tool", {}).get("driver", {}).get("name") rules = { r["id"]: r for r in run.get("tool", {}).get("driver", {}).get("rules") if r and r.get("id") } for result in results: rule_id = result.get("ruleId", "") rule = rules.get(rule_id) if not rule: continue owasp_category = rule_id_owasp_cache.get(rule_id, "") if not owasp_category: # Check the config for any available owasp category mapping for rok, rov in config.get("rules_owasp_category").items(): if ( rok.upper() == rule_id.upper() or rok.upper() in rule_id.upper() ): rule_id_owasp_cache[rule_id] = rov owasp_category = rov category = rule.get("name") if not category: category = rule_id desc = get_help( rule_id, rule_obj=rule, tool_name=tool_name, owasp_category=owasp_category, ) short_desc = rule.get("shortDescription", {}).get("text") if not short_desc: short_desc = result.get("message", {}).get("text") ngsev = convert_severity( result.get("properties", {})["issue_severity"] ) # Populate tags tags = [] if "CWE" in rule_id: tags.append( { "key": "cwe_category", "value": rule_id.replace("CWE-", ""), "shiftleft_managed": True, } ) if "CKV_" in rule_id or "CIS_" in rule_id or "AWS" in rule_id: cis_rule = cis.get_rule(rule_id) if cis_rule: tags.append( { "key": "cis_category", "value": cis_rule.get("id", ""), "shiftleft_managed": False, } ) if cis_rule.get("scored"): tags.append( { "key": "cis_status", "value": "SCORED", "shiftleft_managed": False, } ) for location in result.get("locations"): filename = location["physicalLocation"]["artifactLocation"][ "uri" ] lineno = location.get("physicalLocation", {})["region"][ "startLine" ] end_lineno = location.get("physicalLocation", {})[ "contextRegion" ]["endLine"] finding = { "app": app_name, "type": "extscan", "title": result.get("message", {})["text"], "description": desc, "internal_id": "{}/{}".format( rule_id, utils.calculate_line_hash( filename, lineno, end_lineno, location.get("physicalLocation", {})["region"][ "snippet" ]["text"], short_desc, ), ), "severity": ngsev, "owasp_category": owasp_category, "category": category, "details": { "repoContext": repo_context, "name": result.get("message", {})["text"], "tags": ",".join(rule["properties"]["tags"]), "fileName": filename, "DATA_TYPE": "OSS_SCAN", "lineNumber": lineno, "ruleId": rule_id, "ruleName": rule.get("name"), "contextText": location.get("physicalLocation", {})[ "region" ]["snippet"]["text"], "snippetText": location.get("physicalLocation", {})[ "contextRegion" ]["snippet"]["text"], }, "tags": tags, } findings_list.append(finding) finding_id = finding_id + 1 except Exception as e: LOG.debug(e) continue with open(findings_fname, mode="w") as out_file: json.dump({"findings": findings_list}, out_file)
def fetch_findings(app_name, version, report_fname): """ Fetch findings from the NG SAST Cloud """ sl_org = config.get("SHIFTLEFT_ORG_ID", config.get("SHIFTLEFT_ORGANIZATION_ID")) sl_org_token = config.get( "SHIFTLEFT_ORG_TOKEN", config.get("SHIFTLEFT_ORGANIZATION_TOKEN") ) if not sl_org_token: sl_org_token = config.get("SHIFTLEFT_API_TOKEN") findings_api = config.get("SHIFTLEFT_VULN_API") findings_list = [] if sl_org and sl_org_token: findings_api = findings_api % dict( sl_org=sl_org, app_name=app_name, version=version ) query_obj = { "query": { "returnRuntimeData": False, "orderByDirection": "VULNERABILITY_ORDER_DIRECTION_DESC", } } headers = { "Content-Type": "application/json", "Authorization": "Bearer " + sl_org_token, } try: r = requests.post(findings_api, headers=headers, json=query_obj) if r.status_code == 200: findings_data = r.json() if findings_data: findings_list += findings_data.get("vulnerabilities", []) nextPageBookmark = findings_data.get("nextPageBookmark") # Recurse and fetch all pages while nextPageBookmark: LOG.debug("Retrieving findings from next page") r = requests.post( findings_api, headers=headers, json={"pageBookmark": nextPageBookmark}, ) if r.status_code == 200: findings_data = r.json() if findings_data: findings_list += findings_data.get( "vulnerabilities", [] ) nextPageBookmark = findings_data.get("nextPageBookmark") else: nextPageBookmark = None with open(report_fname, mode="w") as rp: json.dump({"vulnerabilities": findings_list}, rp) LOG.debug( "Data written to {}, {}".format( report_fname, len(findings_list) ) ) return findings_list else: if not findings_list: LOG.warning( "Unable to retrieve any findings from NG SAST Cloud. Status {}".format( r.status_code ) ) else: LOG.debug( "Unable to retrieve some findings from NG SAST Cloud. Proceeding with partial list. Status {}".format( r.status_code ) ) return findings_list except Exception as e: LOG.error(e) else: return findings_list
def execute_default_cmd( # scan:ignore cmd_map_list, type_str, tool_name, src, reports_dir, convert, scan_mode, repo_context, ): """ Method to execute default command for the given type Args: cmd_map_list Default commands in the form of a dict (multiple) or list type_str Project type tool_name Tool name src Project dir reports_dir Directory for output reports convert Boolean to enable normalisation of reports json scan_mode Scan mode string repo_context Repo context """ # Check if there is a default command specified for the given type # Create the reports dir report_fname_prefix = os.path.join(reports_dir, tool_name + "-report") # Look for any additional direct arguments for the tool and inject them if config.get(tool_name + "_direct_args"): direct_args = config.get(tool_name + "_direct_args").split(" ") if direct_args: cmd_map_list += direct_args src_or_file = src if config.get("SHIFTLEFT_ANALYZE_FILE"): src_or_file = config.get("SHIFTLEFT_ANALYZE_FILE") default_cmd = " ".join(cmd_map_list) % dict( src=src, src_or_file=src_or_file, reports_dir=reports_dir, report_fname_prefix=report_fname_prefix, type=type_str, scan_mode=scan_mode, ) # Try to detect if the output could be json outext = ".out" if "json" in default_cmd: outext = ".json" elif "csv" in default_cmd: outext = ".csv" elif "sarif" in default_cmd: outext = ".sarif" elif "xml" in default_cmd: outext = ".xml" report_fname = report_fname_prefix + outext # If the command doesn't support file output then redirect stdout automatically stdout = None if LOG.isEnabledFor(DEBUG): stdout = None if reports_dir and report_fname_prefix not in default_cmd: report_fname = report_fname_prefix + outext stdout = io.open(report_fname, "w") LOG.debug("Output will be written to {}".format(report_fname)) # If the command is requesting list of files then construct the argument filelist_prefix = "(filelist=" if default_cmd.find(filelist_prefix) > -1: si = default_cmd.find(filelist_prefix) ei = default_cmd.find(")", si + 10) ext = default_cmd[si + 10:ei] filelist = utils.find_files(src, ext) # Temporary fix for the yaml issue if ext == "yaml": yml_list = utils.find_files(src, "yml") if yml_list: filelist.extend(yml_list) delim = " " default_cmd = default_cmd.replace(filelist_prefix + ext + ")", delim.join(filelist)) cmd_with_args = default_cmd.split(" ") # Suppress psalm output if should_suppress_output(type_str, cmd_with_args[0]): stdout = subprocess.DEVNULL exec_tool(tool_name, cmd_with_args, cwd=src, stdout=stdout) # Should we attempt to convert the report to sarif format if should_convert(convert, tool_name, cmd_with_args[0], report_fname): crep_fname = utils.get_report_file(tool_name, reports_dir, convert, ext_name="sarif") if (cmd_with_args[0] == "java" or "pmd-bin" in cmd_with_args[0] or "php" in tool_name): convertLib.convert_file( tool_name, cmd_with_args, src, report_fname, crep_fname, ) else: convertLib.convert_file( cmd_with_args[0], cmd_with_args[1:], src, report_fname, crep_fname, ) try: if not LOG.isEnabledFor(DEBUG): os.remove(report_fname) except Exception: LOG.debug("Unable to remove file {}".format(report_fname)) elif type_str == "depscan": # Convert depscan and license scan files to html depscan_files = utils.find_files(reports_dir, "depscan", True) for df in depscan_files: if not df.endswith(".html"): depscan_data = grafeas.parse(df) if depscan_data and len(depscan_data): html_fname = df.replace(".json", ".html") grafeas.render_html(depscan_data, html_fname) track({ "id": config.get("run_uuid"), "depscan_summary": depscan_data }) LOG.debug( "Depscan and HTML report written to file: %s, %s :thumbsup:", df, html_fname, ) licence_files = utils.find_files(reports_dir, "license", True) for lf in licence_files: if not lf.endswith(".html"): licence_data = licence.parse(lf) if licence_data and len(licence_data): html_fname = lf.replace(".json", ".html") licence.render_html(licence_data, html_fname) track({ "id": config.get("run_uuid"), "license_summary": licence_data }) LOG.debug( "License check and HTML report written to file: %s, %s :thumbsup:", lf, html_fname, )
def summary(sarif_files, depscan_files=None, aggregate_file=None, override_rules={}): """Generate overall scan summary based on the generated SARIF file :param sarif_files: List of generated sarif report files :param aggregate_file: Filename to store aggregate data :param override_rules Build break rules to override for testing :returns dict representing the summary """ report_summary = {} build_status = "pass" # This is the list of all runs which will get stored as an aggregate run_data_list = [] default_rules = config.get("build_break_rules").get("default") depscan_default_rules = config.get("build_break_rules").get("depscan") # Collect stats from depscan files if available if depscan_files: for df in depscan_files: with open(df, mode="r") as drep_file: dep_data = get_depscan_data(drep_file) if not dep_data: continue # depscan-java or depscan-nodejs based on filename dep_type = (os.path.basename(df).replace(".json", "").replace( "-report", "")) metrics, required_pkgs_found = calculate_depscan_metrics( dep_data) report_summary[dep_type] = { "tool": f"""Dependency Scan ({dep_type.replace("depscan-", "")})""", "critical": metrics["critical"], "high": metrics["high"], "medium": metrics["medium"], "low": metrics["low"], "status": ":white_heavy_check_mark:", } report_summary[dep_type].pop("total", None) # Compare against the build break rule to determine status dep_tool_rules = config.get("build_break_rules").get( dep_type, {}) build_break_rules = {**depscan_default_rules, **dep_tool_rules} if override_rules and override_rules.get("depscan"): build_break_rules = { **build_break_rules, **override_rules.get("depscan"), } # Default severity categories for build status build_status_categories = ( "critical", "required_critical", "optional_critical", "high", "required_high", "optional_high", "medium", "required_medium", "optional_medium", "low", "required_low", "optional_low", ) # Issue 233 - Consider only required packages if available if required_pkgs_found: build_status_categories = ( "required_critical", "required_high", "required_medium", "required_low", ) for rsev in build_status_categories: if build_break_rules.get("max_" + rsev) is not None: if metrics.get(rsev) > build_break_rules["max_" + rsev]: report_summary[dep_type]["status"] = ":cross_mark:" build_status = "fail" for sf in sarif_files: with open(sf, mode="r") as report_file: report_data = json.load(report_file) # skip this file if the data is empty if not report_data or not report_data.get("runs"): LOG.warn("Report file {} is invalid. Skipping ...".format(sf)) continue # Iterate through all the runs for run in report_data["runs"]: # Add it to the run data list for aggregation run_data_list.append(run) tool_desc = run["tool"]["driver"]["name"] tool_name = tool_desc # Initialise report_summary[tool_name] = { "tool": tool_desc, "critical": 0, "high": 0, "medium": 0, "low": 0, "status": ":white_heavy_check_mark:", } results = run.get("results", []) metrics = run.get("properties", {}).get("metrics", None) # If the result includes metrics use it. If not compute it if metrics: report_summary[tool_name].update(metrics) report_summary[tool_name].pop("total", None) else: for aresult in results: sev = aresult["properties"]["issue_severity"].lower() report_summary[tool_name][sev] += 1 # Compare against the build break rule to determine status tool_rules = config.get("build_break_rules").get(tool_name, {}) build_break_rules = { **default_rules, **tool_rules, **override_rules } for rsev in ("critical", "high", "medium", "low"): if build_break_rules.get("max_" + rsev) is not None: if (report_summary.get(tool_name).get(rsev) > build_break_rules["max_" + rsev]): report_summary[tool_name][ "status"] = ":cross_mark:" build_status = "fail" # Should we store the aggregate data if aggregate_file: # agg_sarif_file = aggregate_file.replace(".json", ".sarif") # aggregate.sarif_aggregate(run_data_list, agg_sarif_file) aggregate.jsonl_aggregate(run_data_list, aggregate_file) LOG.debug("Aggregate report written to {}\n".format(aggregate_file)) return report_summary, build_status
def exec_tool( # scan:ignore tool_name, args, cwd=None, env=utils.get_env(), stdout=subprocess.DEVNULL): """ Convenience method to invoke cli tools Args: tool_name Tool name args cli command and args cwd Current working directory env Environment variables stdout stdout configuration for run command Returns: CompletedProcess instance """ with Progress( console=console, redirect_stderr=False, redirect_stdout=False, refresh_per_second=1, ) as progress: task = None try: env = use_java(env) LOG.debug('⚡︎ Executing {} "{}"'.format(tool_name, " ".join(args))) stderr = subprocess.DEVNULL if LOG.isEnabledFor(DEBUG): stderr = subprocess.STDOUT tool_verb = "Scanning with" if "init" in tool_name: tool_verb = "Initializing" elif "build" in tool_name: tool_verb = "Building with" task = progress.add_task("[green]" + tool_verb + " " + tool_name, total=100, start=False) cp = subprocess.run( args, stdout=stdout, stderr=stderr, cwd=cwd, env=env, check=False, shell=False, encoding="utf-8", ) if cp and stdout == subprocess.PIPE: for line in cp.stdout: progress.update(task, completed=5) if (cp and LOG.isEnabledFor(DEBUG) and cp.returncode and cp.stdout is not None): LOG.debug(cp.stdout) progress.update(task, completed=100, total=100) return cp except Exception as e: if task: progress.update(task, completed=20, total=10, visible=False) if not LOG.isEnabledFor(DEBUG): LOG.info( f"{tool_name} has reported few errors. To view, pass the environment variable SCAN_DEBUG_MODE=debug" ) LOG.debug(e) return None