def scan(db, project_type, pkg_list, suggest_mode): """ Method to search packages in our vulnerability database :param db: Reference to db :param project_type: Project Type :param pkg_list: List of packages :param suggest_mode: True if package fix version should be normalized across findings """ if not pkg_list: LOG.debug("Empty package search attempted!") else: LOG.info("Scanning {} oss dependencies for issues".format( len(pkg_list))) results, pkg_aliases = utils.search_pkgs(db, project_type, pkg_list) # pkg_aliases is a dict that can be used to find the original vendor and package name # This way we consistently use the same names used by the caller irrespective of how # the result was obtained sug_version_dict = {} if suggest_mode: # From the results identify optimal max version sug_version_dict = suggest_version(results, pkg_aliases) if sug_version_dict: LOG.debug( "Adjusting fix version based on the initial suggestion {}". format(sug_version_dict)) # Recheck packages sug_pkg_list = [] for k, v in sug_version_dict.items(): if not v: continue vendor = "" name = None version = v tmpA = k.split(":") if len(tmpA) == 2: vendor = tmpA[0] name = tmpA[1] else: name = tmpA[0] # De-alias the vendor and package name full_pkg = "{}:{}".format(vendor, name) full_pkg = pkg_aliases.get(full_pkg, full_pkg) vendor, name = full_pkg.split(":") sug_pkg_list.append({ "vendor": vendor, "name": name, "version": version }) LOG.debug( "Re-checking our suggestion to ensure there are no further vulnerabilities" ) override_results, _ = utils.search_pkgs(db, project_type, sug_pkg_list) if override_results: new_sug_dict = suggest_version(override_results) LOG.debug("Received override results: {}".format(new_sug_dict)) for nk, nv in new_sug_dict.items(): sug_version_dict[nk] = nv return results, pkg_aliases, sug_version_dict
def analyse(project_type, results): if not results: LOG.info("No oss vulnerabilities detected ✅") return None summary = { "UNSPECIFIED": 0, "LOW": 0, "MEDIUM": 0, "HIGH": 0, "CRITICAL": 0 } for res in results: summary[res.severity] += 1 return summary
def analyse_licenses(project_type, licenses_results, license_report_file=None): if not licenses_results: return table = Table( title=f"License Scan Summary ({project_type})", box=box.DOUBLE_EDGE, header_style="bold magenta", ) headers = ["Package", "Version", "License Id", "License conditions"] for h in headers: table.add_column(header=h) report_data = [] for pkg, ll in licenses_results.items(): pkg_ver = pkg.split("@") for lic in ll: if not lic: data = [*pkg_ver, "Unknown license"] table.add_row(*data) report_data.append(dict(zip(headers, data))) elif lic["condition_flag"]: conditions_str = ", ".join(lic["conditions"]) if "http" not in conditions_str: conditions_str = (conditions_str.replace( "--", " for ").replace("-", " ").title()) data = [ *pkg_ver, "{}{}".format( "[cyan]" if "GPL" in lic["spdx-id"] or "CC-BY-" in lic["spdx-id"] or "Facebook" in lic["spdx-id"] or "WTFPL" in lic["spdx-id"] else "", lic["spdx-id"], ), conditions_str, ] table.add_row(*data) report_data.append(dict(zip(headers, data))) if report_data: console.print(table) # Store the license scan findings in jsonl format if license_report_file: with open(license_report_file, "w") as outfile: for row in report_data: json.dump(row, outfile) outfile.write("\n") else: LOG.info("No license violation detected ✅")
def create_bom(project_type, bom_file, src_dir="."): """Method to create BOM file by executing cdxgen command :param project_type: Project type :param bom_file: BOM file :param src_dir: Source directory :returns True if the command was executed. False if the executable was not found. """ cdxgen_cmd = os.environ.get("CDXGEN_CMD", "cdxgen") if not shutil.which(cdxgen_cmd): LOG.warning( "{} command not found. Please install using npm install @appthreat/cdxgen or set PATH variable" .format(cdxgen_cmd)) return False args = [cdxgen_cmd, "-r", "-t", project_type, "-o", bom_file, src_dir] exec_tool(args) return os.path.exists(bom_file)
def exec_tool(args, cwd=None, stdout=subprocess.PIPE): """ Convenience method to invoke cli tools Args: args cli command and args """ try: LOG.debug('⚡︎ Executing "{}"'.format(" ".join(args))) subprocess.run( args, stdout=stdout, stderr=subprocess.STDOUT, cwd=cwd, env=os.environ, check=False, shell=False, encoding="utf-8", ) except Exception as e: LOG.exception(e)
def exec_tool(args, cwd=None, stdout=subprocess.PIPE): """ Convenience method to invoke cli tools Args: args cli command and args """ try: LOG.debug('⚡︎ Executing "{}"'.format(" ".join(args))) if os.environ.get("FETCH_LICENSE"): LOG.debug( "License information would be fetched from the registry. This would take several minutes ..." ) subprocess.run( args, stdout=stdout, stderr=subprocess.STDOUT, cwd=cwd, env=os.environ.copy(), check=False, shell=False, encoding="utf-8", ) except Exception as e: LOG.exception(e)
def create_bom(project_type, bom_file, src_dir="."): """Method to create BOM file by executing cdxgen command :param project_type: Project type :param bom_file: BOM file :param src_dir: Source directory :returns True if the command was executed. False if the executable was not found. """ cdxgen_cmd = os.environ.get("CDXGEN_CMD", "cdxgen") if not shutil.which(cdxgen_cmd): LOG.warning( "{} command not found. Please install using npm install @appthreat/cdxgen or set PATH variable" .format(cdxgen_cmd)) return False if project_type in ("docker"): LOG.info( f"Generating Software Bill-of-Materials for container image {src_dir}. This might take a few mins ..." ) args = [cdxgen_cmd, "-r", "-t", project_type, "-o", bom_file, src_dir] exec_tool(args) return os.path.exists(bom_file)
def summarise( project_type, results, pkg_aliases, sug_version_dict, scoped_pkgs={}, report_file=None, console_print=True, ): """ Method to summarise the results :param project_type: Project type :param results: Scan or audit results :param pkg_aliases: Package aliases used :param sug_version_dict: Dictionary containing version suggestions :param scoped_pkgs: Dict containing package scopes :param report_file: Output report file :param print: Boolean to indicate if the results should get printed to the console :return: Summary of the results """ if not results: LOG.info(f"No oss vulnerabilities detected for type {project_type} ✅") return None if report_file: jsonl_report( project_type, results, pkg_aliases, sug_version_dict, scoped_pkgs, report_file, ) if console_print: print_results(project_type, results, pkg_aliases, sug_version_dict, scoped_pkgs) summary = analyse(project_type, results) return summary
def get_pkg_list(xmlfile): """Method to parse the bom xml file and convert into packages list :param xmlfile: BOM xml file to parse :return list of package dict """ if xmlfile.endswith(".json"): return get_pkg_list_json(xmlfile) pkgs = [] try: et = parse(xmlfile) root = et.getroot() for child in root: if child.tag.endswith("components"): for ele in child.iter(): if ele.tag.endswith("component"): licenses = get_licenses(ele) pkgs.append(get_package(ele, licenses)) except xml.etree.ElementTree.ParseError as pe: LOG.debug("Unable to parse {} {}".format(xmlfile, pe)) LOG.warning( "Unable to produce Software Bill-of-Materials for this project. Execute the scan after installing the dependencies!" ) return pkgs
def main(): args = build_args() if not args.no_banner: print(at_logo) src_dir = args.src_dir_image if not src_dir: src_dir = os.getcwd() reports_base_dir = src_dir # Detect the project types and perform the right type of scan if args.project_type: project_types_list = args.project_type.split(",") elif args.bom: project_types_list = ["bom"] else: project_types_list = utils.detect_project_type(src_dir) if ("docker" in project_types_list or "podman" in project_types_list or "container" in project_types_list or "binary" in project_types_list): reports_base_dir = os.getcwd() db = dbLib.get() run_cacher = args.cache areport_file = (args.report_file if args.report_file else os.path.join( reports_base_dir, "reports", "depscan.json")) reports_dir = os.path.dirname(areport_file) # Create reports directory if not os.path.exists(reports_dir): os.makedirs(reports_dir) if len(project_types_list) > 1: LOG.debug( "Multiple project types found: {}".format(project_types_list)) # Enable license scanning if "license" in project_types_list: os.environ["FETCH_LICENSE"] = "true" project_types_list.remove("license") console.print( Panel( "License audit is enabled for this scan. This would increase the time by up to 10 minutes.", title="License Audit", expand=False, )) for project_type in project_types_list: sug_version_dict = {} pkg_aliases = {} results = [] report_file = areport_file.replace(".json", "-{}.json".format(project_type)) risk_report_file = areport_file.replace( ".json", "-risk.{}.json".format(project_type)) LOG.info("=" * 80) creation_status = False if args.bom and os.path.exists(args.bom): bom_file = args.bom creation_status = True else: bom_file = os.path.join(reports_dir, "bom-" + project_type + ".json") creation_status = create_bom(project_type, bom_file, src_dir) if not creation_status: LOG.debug( "Bom file {} was not created successfully".format(bom_file)) continue LOG.debug("Scanning using the bom file {}".format(bom_file)) pkg_list = get_pkg_list(bom_file) if not pkg_list: LOG.debug("No packages found in the project!") continue scoped_pkgs = {} if project_type in ["python"]: all_imports = utils.get_all_imports(src_dir) LOG.debug(f"Identified {len(all_imports)} imports in your project") scoped_pkgs = utils.get_scope_from_imports(project_type, pkg_list, all_imports) else: scoped_pkgs = utils.get_pkgs_by_scope(project_type, pkg_list) if os.getenv("FETCH_LICENSE", "") in (True, "1", "true"): licenses_results = bulk_lookup( build_license_data(license_data_dir, spdx_license_list), pkg_list=pkg_list, ) license_report_file = os.path.join( reports_dir, "license-" + project_type + ".json") analyse_licenses(project_type, licenses_results, license_report_file) if project_type in risk_audit_map.keys(): if args.risk_audit: console.print( Panel( f"Performing OSS Risk Audit for packages from {src_dir}\nNo of packages [bold]{len(pkg_list)}[/bold]. This will take a while ...", title="OSS Risk Audit", expand=False, )) try: risk_results = risk_audit( project_type, scoped_pkgs, args.private_ns, pkg_list, risk_report_file, ) analyse_pkg_risks( project_type, scoped_pkgs, args.private_ns, risk_results, risk_report_file, ) except Exception as e: LOG.error(e) LOG.error("Risk audit was not successful") else: console.print( Panel( "Depscan supports OSS Risk audit for this project.\nTo enable set the environment variable [bold]ENABLE_OSS_RISK=true[/bold]", title="New Feature", expand=False, )) if project_type in type_audit_map.keys(): LOG.info("Performing remote audit for {} of type {}".format( src_dir, project_type)) LOG.debug(f"No of packages {len(pkg_list)}") try: audit_results = audit(project_type, pkg_list, report_file) if audit_results: LOG.debug( f"Remote audit yielded {len(audit_results)} results") results = results + audit_results except Exception as e: LOG.error("Remote audit was not successful") LOG.error(e) results = None # In case of docker, check if there are any npm packages that can be audited remotely if project_type in ("podman", "docker"): npm_pkg_list = get_pkg_by_type(pkg_list, "npm") if npm_pkg_list: LOG.debug(f"No of packages {len(npm_pkg_list)}") try: audit_results = audit("nodejs", npm_pkg_list, report_file) if audit_results: LOG.debug( f"Remote audit yielded {len(audit_results)} results" ) results = results + audit_results except Exception as e: LOG.error("Remote audit was not successful") LOG.error(e) if not dbLib.index_count(db["index_file"]): run_cacher = True else: LOG.debug("Vulnerability database loaded from {}".format( config.vdb_bin_file)) sources_list = [OSVSource(), NvdSource()] if os.environ.get("GITHUB_TOKEN"): sources_list.insert(0, GitHubSource()) if run_cacher: for s in sources_list: LOG.debug("Refreshing {}".format(s.__class__.__name__)) s.refresh() run_cacher = False elif args.sync: for s in sources_list: LOG.debug("Syncing {}".format(s.__class__.__name__)) s.download_recent() run_cacher = False LOG.debug("Vulnerability database contains {} records".format( dbLib.index_count(db["index_file"]))) LOG.info("Performing regular scan for {} using plugin {}".format( src_dir, project_type)) vdb_results, pkg_aliases, sug_version_dict = scan( db, project_type, pkg_list, args.suggest) if vdb_results: results = results + vdb_results # Summarise and print results summary = summarise( project_type, results, pkg_aliases, sug_version_dict, scoped_pkgs, report_file, True, ) if summary and not args.noerror and len(project_types_list) == 1: # Hard coded build break logic for now if summary.get("CRITICAL") > 0: sys.exit(1)
def main(): args = build_args() if not args.no_banner: print(at_logo) src_dir = args.src_dir if not args.src_dir: src_dir = os.getcwd() db = dbLib.get() run_cacher = args.cache areport_file = ( args.report_file if args.report_file else os.path.join(src_dir, "reports", "depscan.json") ) reports_dir = os.path.dirname(areport_file) # Create reports directory if not os.path.exists(reports_dir): os.makedirs(reports_dir) # Detect the project types and perform the right type of scan if args.project_type: project_types_list = args.project_type.split(",") else: project_types_list = utils.detect_project_type(src_dir) if len(project_types_list) > 1: LOG.debug("Multiple project types found: {}".format(project_types_list)) for project_type in project_types_list: sug_version_dict = {} pkg_aliases = {} report_file = areport_file.replace(".json", "-{}.json".format(project_type)) risk_report_file = areport_file.replace( ".json", "-risk.{}.json".format(project_type) ) LOG.info("=" * 80) creation_status = False if args.bom and os.path.exists(args.bom): bom_file = args.bom creation_status = True else: bom_file = os.path.join(reports_dir, "bom-" + project_type + ".json") creation_status = create_bom(project_type, bom_file, src_dir) if not creation_status: LOG.debug("Bom file {} was not created successfully".format(bom_file)) continue LOG.debug("Scanning using the bom file {}".format(bom_file)) pkg_list = get_pkg_list(bom_file) if not pkg_list: LOG.debug("No packages found in the project!") continue scoped_pkgs = utils.get_pkgs_by_scope(pkg_list) if not args.no_license_scan: licenses_results = bulk_lookup( build_license_data(license_data_dir), pkg_list=pkg_list ) license_report_file = os.path.join( reports_dir, "license-" + project_type + ".json" ) analyse_licenses(project_type, licenses_results, license_report_file) if project_type in risk_audit_map.keys(): if args.risk_audit: console.print( Panel( f"Performing OSS Risk Audit for packages from {src_dir}\nNo of packages [bold]{len(pkg_list)}[/bold]. This will take a while ...", title="OSS Risk Audit", expand=False, ) ) try: risk_results = risk_audit( project_type, args.private_ns, pkg_list, risk_report_file ) analyse_pkg_risks( project_type, args.private_ns, risk_results, risk_report_file ) except Exception as e: LOG.error(e) LOG.error("Risk audit was not successful") risk_results = None else: console.print( Panel( "Depscan supports OSS Risk audit for this project.\nTo enable set the environment variable [bold]ENABLE_OSS_RISK=true[/bold]", title="New Feature", expand=False, ) ) if project_type in type_audit_map.keys(): LOG.info( "Performing remote audit for {} of type {}".format( src_dir, project_type ) ) LOG.debug(f"No of packages {len(pkg_list)}") try: results = audit(project_type, pkg_list, report_file) except Exception as e: LOG.error("Remote audit was not successful") LOG.error(e) results = None else: if not dbLib.index_count(db["index_file"]): run_cacher = True else: LOG.debug( "Vulnerability database loaded from {}".format(config.vdb_bin_file) ) sources_list = [NvdSource()] if os.environ.get("GITHUB_TOKEN"): sources_list.insert(0, GitHubSource()) else: LOG.info( "To use GitHub advisory source please set the environment variable GITHUB_TOKEN!" ) if run_cacher: for s in sources_list: LOG.debug("Refreshing {}".format(s.__class__.__name__)) s.refresh() elif args.sync: for s in sources_list: LOG.debug("Syncing {}".format(s.__class__.__name__)) s.download_recent() LOG.debug( "Vulnerability database contains {} records".format( dbLib.index_count(db["index_file"]) ) ) LOG.info( "Performing regular scan for {} using plugin {}".format( src_dir, project_type ) ) results, pkg_aliases, sug_version_dict = scan( db, project_type, pkg_list, args.suggest ) # Summarise and print results summary = summarise( project_type, results, pkg_aliases, sug_version_dict, scoped_pkgs, report_file, True, ) if summary and not args.noerror and len(project_types_list) == 1: # Hard coded build break logic for now if summary.get("CRITICAL") > 0: sys.exit(1)
def analyse_pkg_risks(project_type, scoped_pkgs, private_ns, risk_results, risk_report_file=None): if not risk_results: return table = Table( title=f"Risk Audit Summary ({project_type})", box=box.DOUBLE_EDGE, header_style="bold magenta", ) report_data = [] required_pkgs = scoped_pkgs.get("required", []) optional_pkgs = scoped_pkgs.get("optional", []) excluded_pkgs = scoped_pkgs.get("excluded", []) headers = ["Package", "Used?", "Risk Score", "Identified Risks"] for h in headers: justify = "left" if h == "Risk Score": justify = "right" table.add_column(header=h, justify=justify) for pkg, risk_obj in risk_results.items(): if not risk_obj: continue risk_metrics = risk_obj.get("risk_metrics") scope = risk_obj.get("scope") project_type_pkg = "{}:{}".format(project_type, pkg).lower() if project_type_pkg in required_pkgs: scope = "required" elif project_type_pkg in optional_pkgs: scope = "optional" elif project_type_pkg in excluded_pkgs: scope = "excluded" package_usage = "N/A" package_usage_simple = "N/A" if scope == "required": package_usage = "[bright_green][bold]Yes" package_usage_simple = "Yes" if scope == "optional": package_usage = "[magenta]No" package_usage_simple = "No" if not risk_metrics: continue if risk_metrics.get("risk_score") and ( risk_metrics.get("risk_score") > config.pkg_max_risk_score or risk_metrics.get("pkg_private_on_public_registry_risk")): risk_score = f"""{round(risk_metrics.get("risk_score"), 2)}""" data = [ pkg, package_usage, risk_score, ] edata = [ pkg, package_usage_simple, risk_score, ] risk_categories = [] risk_categories_simple = [] for rk, rv in risk_metrics.items(): if rk.endswith("_risk") and rv is True: rcat = rk.replace("_risk", "") help_text = config.risk_help_text.get(rcat) # Only add texts that are available. if help_text: if rcat in ("pkg_deprecated", "pkg_private_on_public_registry"): risk_categories.append(f":cross_mark: {help_text}") else: risk_categories.append(f":warning: {help_text}") risk_categories_simple.append(help_text) data.append("\n".join(risk_categories)) edata.append(", ".join(risk_categories_simple)) table.add_row(*data) report_data.append(dict(zip(headers, edata))) if report_data: console.print(table) # Store the risk audit findings in jsonl format if risk_report_file: with open(risk_report_file, "w") as outfile: for row in report_data: json.dump(row, outfile) outfile.write("\n") else: LOG.info("No package risks detected ✅")
def metadata_from_registry(registry_type, pkg_list, private_ns=None): """ Method to query registry for the package metadata :param pkg_list: List of packages :param private_ns: Private namespace """ metadata_dict = {} task = None # Circuit breaker flag to break the risk audit in case of many api errors circuit_breaker = False # Track the api failures count failure_count = 0 done_count = 0 with Progress( console=console, transient=True, redirect_stderr=False, redirect_stdout=False, refresh_per_second=1, ) as progress: task = progress.add_task("[green] Auditing packages", total=len(pkg_list), start=True) for pkg in pkg_list: if circuit_breaker: LOG.info( "Risk audited has been interrupted due to frequent api errors. Please try again later." ) progress.stop() return {} scope = pkg.get("scope", "").lower() key, lookup_url = get_lookup_url(registry_type, pkg) if not key or key.startswith("https://"): progress.advance(task) continue progress.update(task, description=f"Checking {key}") try: r = requests.get(url=lookup_url, timeout=config.request_timeout_sec) json_data = r.json() # Npm returns this error if the package is not found if (json_data.get("code") == "MethodNotAllowedError" or r.status_code > 400): continue is_private_pkg = False if private_ns: namespace_prefixes = private_ns.split(",") for ns in namespace_prefixes: if key.lower().startswith(ns.lower()) or key.lower( ).startswith("@" + ns.lower()): is_private_pkg = True break risk_metrics = {} if registry_type == "npm": risk_metrics = npm_pkg_risk(json_data, is_private_pkg, scope) elif registry_type == "pypi": risk_metrics = pypi_pkg_risk(json_data, is_private_pkg, scope) metadata_dict[key] = { "scope": scope, "pkg_metadata": json_data, "risk_metrics": risk_metrics, "is_private_pkg": is_private_pkg, } except Exception as e: LOG.debug(e) failure_count = failure_count + 1 progress.advance(task) done_count = done_count + 1 if failure_count >= config.max_request_failures: circuit_breaker = True LOG.debug( f"Retrieved package metadata for {done_count}/{len(pkg_list)} packages. Failures count {failure_count}" ) return metadata_dict