def authenticate(): """ Method to authenticate with ShiftLeft NG SAST cloud when the required tokens gets passed via environment variables """ if is_authenticated(): return sl_org = config.get("SHIFTLEFT_ORG_ID", config.get("SHIFTLEFT_ORGANIZATION_ID")) sl_token = config.get("SHIFTLEFT_ACCESS_TOKEN") sl_cmd = config.get("SHIFTLEFT_NGSAST_CMD") run_uuid = config.get("run_uuid") if sl_org and sl_token and sl_cmd and utils.check_command(sl_cmd): inspect_login_args = [ sl_cmd, "auth", "--no-auto-update", "--no-diagnostic", "--org", sl_org, "--token", sl_token, ] cp = exec_tool("NG SAST", inspect_login_args) if cp.returncode != 0: LOG.warning( "ShiftLeft NG SAST authentication has failed. Please check the credentials" ) else: LOG.info("Successfully authenticated with NG SAST cloud") track({"id": run_uuid, "scan_mode": "ng-sast", "sl_org": sl_org})
def nodejs_build(src, reports_dir, lang_tools): """ Automatically build nodejs project :param src: Source directory :param reports_dir: Reports directory to store any logs :param lang_tools: Language specific build tools :return: boolean status from the build. True if the command executed successfully. False otherwise """ cmd_args = lang_tools.get("npm") yarn_mode = False rush_mode = False rushjson_files = [p.as_posix() for p in Path(src).glob("rush.json")] pjson_files = [p.as_posix() for p in Path(src).glob("package.json")] ylock_files = [p.as_posix() for p in Path(src).glob("yarn.lock")] if ylock_files: cmd_args = lang_tools.get("yarn") yarn_mode = True elif rushjson_files: cmd_args = lang_tools.get("rush") rush_mode = True elif not pjson_files: LOG.debug( "Nodejs auto build is supported only for npm or yarn or rush based projects" ) return False cp = exec_tool("auto-build", cmd_args, src) if cp: ret = cp.returncode == 0 else: ret = False try: cmd_args = ["npm"] if yarn_mode: cmd_args = ["yarn"] if rush_mode: cmd_args = ["rush", "rebuild"] else: cmd_args += ["run", "build"] exec_tool("auto-build", cmd_args, src) except Exception: if rush_mode: LOG.warning( "Automatic build for rush.js has failed. Try installing the packages manually before invoking scan.\nIf this works then let us know the build steps by filing an issue." ) else: LOG.debug("Automatic build has failed for the node.js project") return ret
def fetch_findings(app_name, version, report_fname): """ Fetch findings from the NG SAST Cloud """ sl_org = config.get("SHIFTLEFT_ORG_ID", config.get("SHIFTLEFT_ORGANIZATION_ID")) sl_org_token = config.get( "SHIFTLEFT_ORG_TOKEN", config.get("SHIFTLEFT_ORGANIZATION_TOKEN") ) if not sl_org_token: sl_org_token = config.get("SHIFTLEFT_API_TOKEN") findings_api = config.get("SHIFTLEFT_VULN_API") findings_list = [] if sl_org and sl_org_token: findings_api = findings_api % dict( sl_org=sl_org, app_name=app_name, version=version ) query_obj = { "query": { "returnRuntimeData": False, "orderByDirection": "VULNERABILITY_ORDER_DIRECTION_DESC", } } headers = { "Content-Type": "application/json", "Authorization": "Bearer " + sl_org_token, } try: r = requests.post(findings_api, headers=headers, json=query_obj) if r.status_code == 200: findings_data = r.json() if findings_data: findings_list += findings_data.get("vulnerabilities", []) nextPageBookmark = findings_data.get("nextPageBookmark") # Recurse and fetch all pages while nextPageBookmark: LOG.debug("Retrieving findings from next page") r = requests.post( findings_api, headers=headers, json={"pageBookmark": nextPageBookmark}, ) if r.status_code == 200: findings_data = r.json() if findings_data: findings_list += findings_data.get( "vulnerabilities", [] ) nextPageBookmark = findings_data.get("nextPageBookmark") else: nextPageBookmark = None with open(report_fname, mode="w") as rp: json.dump({"vulnerabilities": findings_list}, rp) LOG.debug( "Data written to {}, {}".format( report_fname, len(findings_list) ) ) return findings_list else: if not findings_list: LOG.warning( "Unable to retrieve any findings from NG SAST Cloud. Status {}".format( r.status_code ) ) else: LOG.debug( "Unable to retrieve some findings from NG SAST Cloud. Proceeding with partial list. Status {}".format( r.status_code ) ) return findings_list except Exception as e: LOG.error(e) else: return findings_list
def inspect_scan(language, src, reports_dir, convert, repo_context): """ Method to perform inspect cloud scan Args: language Project language src Project dir reports_dir Directory for output reports convert Boolean to enable normalisation of reports json repo_context Repo context """ run_uuid = config.get("run_uuid") cpg_mode = config.get("SHIFTLEFT_CPG") env = os.environ.copy() env["SCAN_JAVA_HOME"] = os.environ.get("SCAN_JAVA_8_HOME") report_fname = utils.get_report_file( "ng-sast", reports_dir, convert, ext_name="json" ) sl_cmd = config.get("SHIFTLEFT_NGSAST_CMD") # Check if sl cli is available if not utils.check_command(sl_cmd): LOG.warning( "sl cli is not available. Please check if your build uses shiftleft/scan-java as the image" ) return analyze_files = config.get("SHIFTLEFT_ANALYZE_FILE") analyze_target_dir = config.get( "SHIFTLEFT_ANALYZE_DIR", os.path.join(src, "target") ) extra_args = None if not analyze_files: if language == "java": analyze_files = utils.find_java_artifacts(analyze_target_dir) elif language == "csharp": if not utils.check_dotnet(): LOG.warning( "dotnet is not available. Please check if your build uses shiftleft/scan-csharp as the image" ) return analyze_files = utils.find_csharp_artifacts(src) cpg_mode = True else: if language == "ts" or language == "nodejs": language = "js" extra_args = ["--", "--ts", "--babel"] analyze_files = [src] cpg_mode = True app_name = find_app_name(src, repo_context) branch = repo_context.get("revisionId") if not branch: branch = "master" if not analyze_files: LOG.warning( "Unable to find any build artifacts. Compile your project first before invoking scan or use the auto build feature." ) return if isinstance(analyze_files, list) and len(analyze_files) > 1: LOG.warning( "Multiple files found in {}. Only {} will be analyzed".format( analyze_target_dir, analyze_files[0] ) ) analyze_files = analyze_files[0] sl_args = [ sl_cmd, "analyze", "--no-auto-update" if language == "java" else None, "--wait", "--cpg" if cpg_mode else None, "--" + language, "--tag", "branch=" + branch, "--app", app_name, ] sl_args += [analyze_files] if extra_args: sl_args += extra_args sl_args = [arg for arg in sl_args if arg is not None] LOG.info( "About to perform ShiftLeft NG SAST cloud analysis. This might take a few minutes ..." ) LOG.debug(" ".join(sl_args)) LOG.debug(repo_context) cp = exec_tool("NG SAST", sl_args, src, env=env) if cp.returncode != 0: LOG.warning("NG SAST cloud analyze has failed with the below logs") LOG.debug(sl_args) LOG.info(cp.stderr) return findings_data = fetch_findings(app_name, branch, report_fname) if findings_data and convert: crep_fname = utils.get_report_file( "ng-sast", reports_dir, convert, ext_name="sarif" ) convertLib.convert_file("ng-sast", sl_args[1:], src, report_fname, crep_fname) track({"id": run_uuid, "scan_mode": "ng-sast", "sl_args": sl_args})
def extract_from_file( tool_name, tool_args, working_dir, report_file, file_path_list=None ): """Extract properties from reports :param tool_name: tool name :param tool_args: tool args :param working_dir: Working directory :param report_file: Report file :param file_path_list: Full file path for any manipulation :return issues, metrics, skips information """ issues = [] metrics = None skips = [] # If the tools did not produce any result do not crash if not os.path.isfile(report_file): return issues, metrics, skips extn = pathlib.PurePosixPath(report_file).suffix with io.open(report_file, "r") as rfile: # Static check use jsonlines format, duh if tool_name == "staticcheck": contents = rfile.read() try: issues = [ json.loads(str(item)) for item in contents.strip().split("\n") ] except json.decoder.JSONDecodeError: LOG.warning( "staticcheck produced no result since the project was not built before analysis!" ) return issues, metrics, skips if extn == ".json": try: report_data = json.loads(rfile.read()) except json.decoder.JSONDecodeError: return issues, metrics, skips # NG SAST (Formerly Inspect) uses vulnerabilities if tool_name == "ng-sast": for v in report_data.get("vulnerabilities"): if not v: continue vuln = v["vulnerability"] location_list = [] if vuln.get("dataFlow") and vuln.get("dataFlow", {}).get( "dataFlow" ): location_list = convert_dataflow( working_dir, tool_args, vuln["dataFlow"]["dataFlow"]["list"] ) for location in location_list: issues.append( { "rule_id": vuln["category"], "title": vuln["title"], "description": vuln["description"], "score": vuln["score"], "severity": vuln["severity"], "line_number": location.get("line_number"), "filename": location.get("filename"), "first_found": vuln["firstVersionDetected"], "issue_confidence": "HIGH", } ) elif tool_name == "taint-php": for entry in report_data: taint_trace = entry.get("taint_trace") labels = [] if taint_trace: source, sink, labels = get_from_taints(taint_trace) else: source, _, _ = get_from_taints([entry]) issues.append( { "rule_id": entry.get("shortcode"), "test_name": entry.get("type"), "description": "{}: {}".format( entry.get("message"), "\\n".join(labels) ), "link": entry.get("link"), "severity": entry.get("severity"), "issue_confidence": "HIGH", "line_number": source.get("line_number"), "filename": source.get("filename"), } ) elif tool_name == "taint-python": taint_list = report_data.get("vulnerabilities") for taint in taint_list: source = taint.get("source") sink = taint.get("sink") tags = {} for taint_props in [ "source_trigger_word", "source_label", "source_type", "sink_trigger_word", "sink_label", "sink_type", ]: if taint.get(taint_props): tags[taint_props] = taint.get(taint_props) issues.append( { "rule_id": taint.get("rule_id"), "test_name": taint.get("rule_name"), "short_description": taint.get("short_description"), "cwe_category": taint.get("cwe_category"), "owasp_category": taint.get("owasp_category"), "description": taint.get("description"), "severity": taint.get("severity"), "issue_confidence": "HIGH", "line_from": source.get("line_number"), "line_to": sink.get("line_number"), "filename": source.get("path"), "tags": tags, } ) elif tool_name == "phpstan" or tool_name == "source-php": file_errors = report_data.get("files") for filename, messageobj in file_errors.items(): messages = messageobj.get("messages") for msg in messages: # Create a rule id for phpstan rule_word = msg.get("message", "").split(" ")[0] rule_word = "phpstan-" + rule_word.lower() issues.append( { "rule_id": rule_word, "title": msg.get("message"), "line_number": msg.get("line"), "filename": filename, "severity": "LOW", "issue_confidence": "MEDIUM", } ) elif tool_name == "source-js": njs_findings = report_data.get("nodejs", {}) njs_findings.update(report_data.get("templates", {})) for k, v in njs_findings.items(): # Password detection by njsscan is full of false positives if k == "node_password": continue files = v.get("files", []) metadata = v.get("metadata", {}) if not files or not metadata: continue for afile in files: line_number = 0 if afile.get("match_lines"): line_number = afile.get("match_lines")[0] issues.append( { "rule_id": metadata.get("owasp") .replace(":", "-") .replace(" ", "") .lower(), "title": metadata.get("cwe"), "description": metadata.get("description"), "severity": metadata.get("severity"), "line_number": line_number, "filename": afile.get("file_path"), "issue_confidence": "HIGH", } ) elif tool_name == "checkov": if isinstance(report_data, list): for rd in report_data: issues += rd.get("results", {}).get("failed_checks") else: issues = report_data.get("results", {}).get("failed_checks") elif tool_name == "source-ruby": issues = report_data.get("warnings", []) issues += report_data.get("errors", []) elif isinstance(report_data, list): issues = report_data else: if "sec_issues" in report_data: # NodeJsScan uses sec_issues sec_data = report_data["sec_issues"] for key, value in sec_data.items(): if isinstance(value, list): issues = issues + value else: issues.append(value) elif "Issues" in report_data: tmpL = report_data.get("Issues", []) if tmpL: issues += tmpL else: LOG.debug("%s produced no result" % tool_name) elif "results" in report_data: tmpL = report_data.get("results", []) if tmpL: issues += tmpL else: LOG.debug("%s produced no result" % tool_name) if extn == ".csv": headers, issues = csv_parser.get_report_data(rfile) if extn == ".xml": issues, metrics = xml_parser.get_report_data( rfile, file_path_list=file_path_list, working_dir=working_dir ) return issues, metrics, skips
def convert_sarif(app_name, repo_context, sarif_files, findings_fname): """ Method to convert sarif to findings json :param app_name: Application name :param sarif_file: :param findings_fname: :return: """ finding_id = 1 with open(findings_fname, mode="w") as out_file: for sf in sarif_files: with open(sf, mode="r") as report_file: report_data = json.loads(report_file.read()) # skip this file if the data is empty if not report_data or not report_data.get("runs"): continue # Iterate through all the runs for run in report_data["runs"]: try: rules = { r["id"]: r for r in run["tool"]["driver"]["rules"] } results = run["results"] for result in results: rule = rules.get(result["ruleId"]) for location in result["locations"]: finding = { "app": app_name, "type": "vuln", "title": result["message"]["text"], "description": rule["fullDescription"]["text"], "internal_id": "{}/{}".format( result["ruleId"], utils.calculate_line_hash( location["physicalLocation"] ["region"]["snippet"]["text"]), ), "severity": convert_severity(result["properties"] ["issue_severity"]), "owasp_category": "", "category": result["ruleId"], "details": { "repoContext": repo_context, "name": result["message"]["text"], "tags": ",".join(rule["properties"]["tags"]), "fileName": location["physicalLocation"] ["artifactLocation"]["uri"], "DATA_TYPE": "OSS_SCAN", "lineNumber": location["physicalLocation"]["region"] ["startLine"], }, } out_file.write(json.dumps(finding)) finding_id = finding_id + 1 except Exception as e: LOG.warning( "Unable to convert the run to findings format")
def extract_from_file(tool_name, working_dir, report_file, file_path_list=None): """Extract properties from reports :param tool_name: tool name :param working_dir: Working directory :param report_file: Report file :param file_path_list: Full file path for any manipulation :return issues, metrics, skips information """ issues = [] metrics = None skips = [] # If the tools did not produce any result do not crash if not os.path.isfile(report_file): return issues, metrics, skips extn = pathlib.PurePosixPath(report_file).suffix with io.open(report_file, "r") as rfile: # Static check use jsonlines format, duh if tool_name == "staticcheck": contents = rfile.read() try: issues = [ json.loads(str(item)) for item in contents.strip().split("\n") ] except json.decoder.JSONDecodeError: LOG.warning( "staticcheck produced no result since the project was not built before analysis!" ) return issues, metrics, skips if extn == ".json": try: report_data = json.loads(rfile.read()) except json.decoder.JSONDecodeError: return issues, metrics, skips # Inspect uses vulnerabilities if tool_name == "inspect": file_name_prefix = "" for v in report_data.get("vulnerabilities"): if not v: continue vuln = v["vulnerability"] location = {} if vuln.get("dataFlow") and vuln.get("dataFlow").get( "dataFlow"): for l in vuln["dataFlow"]["dataFlow"]["list"]: if not is_generic_package( l["location"].get("fileName")): location = l["location"] break fileName = location.get("fileName") if fileName == "N/A": continue if not file_name_prefix: file_name_prefix = find_path_prefix( working_dir, fileName) issues.append({ "rule_id": vuln["category"], "title": vuln["title"], "description": vuln["description"], "score": vuln["score"], "severity": vuln["severity"], "line_number": location.get("lineNumber"), "filename": os.path.join(file_name_prefix, fileName), "first_found": vuln["firstVersionDetected"], "issue_confidence": "HIGH", }) elif isinstance(report_data, list): issues = report_data else: if tool_name == "checkov": issues = report_data.get("results", {}).get("failed_checks") elif "sec_issues" in report_data: # NodeJsScan uses sec_issues sec_data = report_data["sec_issues"] for key, value in sec_data.items(): if isinstance(value, list): issues = issues + value else: issues.append(value) elif "Issues" in report_data: tmpL = report_data.get("Issues", []) if tmpL: issues += tmpL else: LOG.debug("%s produced no result" % tool_name) elif "results" in report_data: tmpL = report_data.get("results", []) if tmpL: issues += tmpL else: LOG.debug("%s produced no result" % tool_name) if extn == ".csv": headers, issues = csv_parser.get_report_data(rfile) if extn == ".xml": issues, metrics = xml_parser.get_report_data(rfile, file_path_list) return issues, metrics, skips