def parse_filename(self, filename): """ returns package_name/product_name from package_filename of types .rpm, .deb, etc. Example: package_filename = openssh-client_8.4p1-5ubuntu1_amd64.deb here, package_name = openssh-client """ # resolving directory names if sys.platform == "win32": filename = filename.split("\\")[-1] else: filename = filename.split("/")[-1] # if extractable, then parsing for different types of files accordingly if self.extractor.can_extract(filename): if filename.endswith(".tar.xz"): product_name = filename.rsplit("-", 3)[0] version_number = filename.rsplit("-", 3)[1] # example: libarchive-3.5.1-1-aarch64.pkg.tar.xz elif filename.endswith(".deb") or filename.endswith(".ipk"): product_name = filename.rsplit("_")[0] version_number = filename.rsplit("_")[1] # example: varnish_6.4.0-3_amd64.deb else: product_name = filename.rsplit("-", 2)[0] version_number = filename.rsplit("-", 2)[1] LOGGER.debug( f"Parsing file '{self.filename}': Results: product_name='{product_name}', version_number='{version_number}'" ) return product_name, version_number else: # raise error for unknown archive types with ErrorHandler(mode=ErrorMode.NoTrace, logger=LOGGER): raise UnknownArchiveType(filename)
def parse_spdx_rdf(self, sbom_file: str) -> List[List[str]]: """parses SPDX RDF BOM file extracting package name and version""" with open(sbom_file) as f: lines = f.readlines() modules: List[List[str]] = [] package = "" for line in lines: try: if line.strip().startswith("<spdx:name>"): stripped_line = line.strip().rstrip("\n") package_match = re.search( "<spdx:name>(.+?)</spdx:name>", stripped_line ) if not package_match: raise KeyError(f"Could not find package in {stripped_line}") package = package_match.group(1) version = None elif line.strip().startswith("<spdx:versionInfo>"): stripped_line = line.strip().rstrip("\n") version_match = re.search( "<spdx:versionInfo>(.+?)</spdx:versionInfo>", stripped_line ) if not version_match: raise KeyError(f"Could not find version in {stripped_line}") version = version_match.group(1) modules.append([package, version]) except KeyError as e: LOGGER.debug(e, exc_info=True) return modules
def parse_spdx_xml(self, sbom_file: str) -> List[List[str]]: """parses SPDX XML BOM file extracting package name and version""" # XML is experimental in SPDX 2.2 tree = ET.parse(sbom_file) # Find root element root = tree.getroot() # Extract schema schema = root.tag[: root.tag.find("}") + 1] modules: List[List[str]] = [] for component in root.findall(schema + "packages"): try: package_match = component.find(schema + "name") if package_match is None: raise KeyError(f"Could not find package in {component}") package = package_match.text if package is None: raise KeyError(f"Could not find package in {component}") version_match = component.find(schema + "versionInfo") if version_match is None: raise KeyError(f"Could not find version in {component}") version = version_match.text if version is None: raise KeyError(f"Could not find version in {component}") modules.append([package, version]) except KeyError as e: LOGGER.debug(e, exc_info=True) return modules
def search_version_string(self, matched_list): """finds version strings from matched list""" # TODO: add multiline string finding pattern1 = rf"{self.product_name}(.*){self.version_number}" # ^ this does not work for debian packages # pattern2 = rf"{self.product_name}(.*)([0-9]+[.-][0-9]+([.-][0-9]+)?)" # this matches patterns like: # product1.2.3 # product 1.2.3 # product-1.2.3 # product.1.2.3 # product version 1.2.3 # product v1.2.3(1) version_strings = [ i for i in matched_list if re.search(pattern1, i, re.IGNORECASE) if not i.endswith( ".debug") # removes .debug, so, this does not gets printed ] LOGGER.debug(f"found version-string matches = {version_strings}" ) # TODO: regex highlight in these matched strings? return version_strings
def get_data(self, cve_number: str, product: str): try: full_query = f"{RH_CVE_API}/{cve_number}.json" # static https url above response = request.urlopen(full_query).read().decode( "utf-8") # nosec return loads(response) except error.HTTPError as e: LOGGER.debug(e)
def search_pattern(self, file_content, pattern): """find strings for CONTAINS_PATTERNS with product_name in them""" file_content_list = file_content.split("\n") matches = [ i.strip() for i in file_content_list if re.search(pattern, i, re.IGNORECASE) ] LOGGER.debug(f"found matches = {matches}" ) # TODO: regex highlight in these matched strings? return matches
def find_vendor_product(self): """find vendor-product pairs from database""" LOGGER.debug( f"checking for product_name='{self.product_name}' and version_name='{self.version_number}' in the database" ) CVEDB.db_open(self) cursor = self.connection.cursor() # finding out all distinct (vendor, product) pairs with the help of product_name query = """ SELECT distinct vendor, product FROM cve_range WHERE product=(:product); """ cursor.execute(query, {"product": self.product_name}) data = cursor.fetchall() # checking if (vendor, product) was found in the database if data: # warning the user to select the vendor-product pairs manually if multiple pairs are found if len(data) != 1: LOGGER.warning( textwrap.dedent(f""" =============================================================== Multiple ("vendor", "product") pairs found for "{self.product_name}" Please manually select the appropriate pair. =============================================================== """)) return data # [('vendor', 'product')] else: if self.product_name: # removing numeric characters from the product_name if any(char.isdigit() for char in self.product_name): LOGGER.debug( f"removing digits from product_name={self.product_name}" ) self.product_name = "".join( filter(lambda x: not x.isdigit(), self.product_name)) return self.find_vendor_product() else: # raise error and ask for product_name LOGGER.warning( textwrap.dedent(f""" ================================================================= No match was found for "{self.product_name}" in database. Please check your file or try specifying the "product_name" also. ================================================================= """)) return [] CVEDB.db_close(self)
def parse_spdx_json(self, sbom_file: str) -> List[List[str]]: """parses SPDX JSON BOM file extracting package name and version""" data = json.load(open(sbom_file)) modules: List[List[str]] = [] for d in data["packages"]: package = d["name"] try: version = d["versionInfo"] modules.append([package, version]) except KeyError as e: LOGGER.debug(e, exc_info=True) return modules
def extract_and_parse_file(self, filename): """extracts and parses the file for common patterns, version strings and common filename patterns""" with self.extractor as ectx: if ectx.can_extract(filename): binary_string_list = [] for filepath in self.walker([ectx.extract(filename)]): clean_path = self.version_scanner.clean_file_path(filepath) LOGGER.debug(f"checking whether {clean_path} is binary") # see if the file is ELF binary file and parse for strings is_exec = self.version_scanner.is_executable(filepath)[0] if is_exec: LOGGER.debug( f"{clean_path} <--- this is an ELF binary") file_content = self.version_scanner.parse_strings( filepath) matches = self.search_pattern(file_content, self.product_name) # searching for version strings in the found matches version_string = self.search_version_string(matches) self.version_pattern += version_string # if version string is found in file, append it to filename_pattern if version_string: if sys.platform == "win32": self.filename_pattern.append( filepath.split("\\")[-1]) else: self.filename_pattern.append( filepath.split("/")[-1]) LOGGER.info( f"matches for {self.product_name} found in {clean_path}" ) binary_string_list += matches for i in matches: if ("/" not in i and "!" not in i ) and len(i) > self.string_length: self.contains_patterns.append(i) LOGGER.debug(f"{self.filename_pattern}") # to resolve case when there are no strings common with product_name in them if self.contains_patterns: return self.contains_patterns return binary_string_list
def scan_file(self) -> Dict[ProductInfo, TriageData]: LOGGER.info(f"Processing SBOM {self.filename} of type {self.type.upper()}") try: if self.type == "spdx": spdx = SPDXParser() modules = spdx.parse(self.filename) elif self.type == "cyclonedx": cyclone = CycloneParser() modules = cyclone.parse(self.filename) elif self.type == "swid": swid = SWIDParser() modules = swid.parse(self.filename) else: modules = [] except (KeyError, FileNotFoundError, ET.ParseError) as e: LOGGER.debug(e, exc_info=True) modules = [] LOGGER.debug( f"The number of modules identified in SBOM - {len(modules)}\n{modules}" ) # Now process list of modules to create [vendor, product, version] tuples parsed_data: List[ProductInfo] = [] for m in modules: product, version = m[0], m[1] if version != "": # Now add vendor to create product record.... # print (f"Find vendor for {product} {version}") vendor = self.get_vendor(product) if vendor is not None: parsed_data.append(ProductInfo(vendor, product, version)) # print(vendor,product,version) for row in parsed_data: self.sbom_data[row]["default"] = { "remarks": Remarks.NewFound, "comments": "", "severity": "", } self.sbom_data[row]["paths"] = set(map(lambda x: x.strip(), "".split(","))) LOGGER.debug(f"SBOM Data {self.sbom_data}") return self.sbom_data
if not self.years(): raise EmptyCache(self.cachedir) self.LOGGER.debug(f"Years present: {self.years()}") return self def __exit__(self, _exc_type, _exc_value, _traceback): pass def clear_cached_data(self): if os.path.exists(self.cachedir): self.LOGGER.warning(f"Deleting cachedir {self.cachedir}") shutil.rmtree(self.cachedir) def refresh(): with CVEDB(): pass if __name__ == "__main__": LOGGER.debug("Experimenting...") cvedb = CVEDB(os.path.join(os.path.expanduser("~"), ".cache", "cvedb")) # cvedb.refresh() # print(cvedb.years()) # connection = cvedb.init_database() # cvedb.populate_db(connection) # cvedb.supplement_curl() LOGGER.setLevel(logging.INFO) LOGGER.info("Getting cves for curl 7.34.0") LOGGER.info(cvedb.get_cves("haxx", "curl", "7.34.0"))
def main(argv=None): """Scan a binary file for certain open source libraries that may have CVEs""" argv = argv or sys.argv # Reset logger level to info LOGGER.setLevel(logging.INFO) parser = argparse.ArgumentParser( prog="cve-bin-tool", description=textwrap.dedent(""" The CVE Binary Tool scans for a number of common, vulnerable open source components (openssl, libpng, libxml2, expat and a few others) to let you know if a given directory or binary file includes common libraries with known vulnerabilities. """), epilog=textwrap.fill( f'Available checkers: {", ".join(VersionScanner.available_checkers())}' ) + "\n\nPlease disclose issues responsibly!", formatter_class=argparse.RawDescriptionHelpFormatter, ) input_group = parser.add_argument_group("Input") input_group.add_argument("directory", help="directory to scan", nargs="?", default=None) input_group.add_argument( "-e", "--exclude", action=StringToListAction, help="Comma separated Exclude directory path", default=None, ) input_group.add_argument( "-i", "--input-file", action="store", default="", help="provide input filename", ) input_group.add_argument("-C", "--config", action="store", default="", help="provide config file") output_group = parser.add_argument_group("Output") output_group.add_argument("-q", "--quiet", action="store_true", help="suppress output") output_group.add_argument( "-l", "--log", help="log level (default: info)", dest="log_level", action="store", choices=["debug", "info", "warning", "error", "critical"], ) output_group.add_argument( "-o", "--output-file", action="store", help="provide output filename (default: output to stdout)", ) output_group.add_argument( "--html-theme", action="store", help="provide custom theme directory for HTML Report", ) output_group.add_argument( "-f", "--format", action="store", choices=["csv", "json", "console", "html", "pdf"], help="update output format (default: console)", ) output_group.add_argument( "-c", "--cvss", action="store", help= "minimum CVSS score (as integer in range 0 to 10) to report (default: 0)", ) output_group.add_argument( "-S", "--severity", action="store", choices=["low", "medium", "high", "critical"], help="minimum CVE severity to report (default: low)", ) parser.add_argument("-V", "--version", action="version", version=VERSION) parser.add_argument( "-u", "--update", action="store", choices=["now", "daily", "never", "latest"], help="update schedule for NVD database (default: daily)", ) parser.add_argument( "-x", "--extract", action="store_true", help="autoextract compressed files", ) parser.add_argument( "--disable-version-check", action="store_true", help="skips checking for a new version", ) checker_group = parser.add_argument_group("Checkers") checker_group.add_argument( "-s", "--skips", dest="skips", action=StringToListAction, type=str, help="comma-separated list of checkers to disable", ) checker_group.add_argument( "-r", "--runs", dest="runs", action=StringToListAction, type=str, help="comma-separated list of checkers to enable", ) defaults = { "directory": "", "exclude": [], "input_file": "", "log_level": "info", "format": "console", "cvss": 0, "severity": "low", "update": "daily", "extract": True, "disable_version_check": False, "skips": "", "runs": "", "quiet": False, "output_file": "", "html_theme": "", } with ErrorHandler(mode=ErrorMode.NoTrace): raw_args = parser.parse_args(argv[1:]) args = {key: value for key, value in vars(raw_args).items() if value} configs = {} if args.get("config"): conf = ConfigParser(args["config"]) configs = conf.parse_config() args = ChainMap(args, configs, defaults) # logging and error related settings if args["log_level"]: LOGGER.setLevel(args["log_level"].upper()) if args["quiet"]: LOGGER.setLevel(logging.CRITICAL) if 0 < LOGGER.level <= 10: error_mode = ErrorMode.FullTrace elif LOGGER.level >= 50: error_mode = ErrorMode.NoTrace else: error_mode = ErrorMode.TruncTrace if platform.system() != "Linux": warning_nolinux = """ ********************************************** Warning: this utility was developed for Linux. You may need to install additional utilities to use it on other operating systems. ********************************************** """ LOGGER.warning(warning_nolinux) # Database update related settings # Connect to the database cvedb_orig = CVEDB(version_check=not args["disable_version_check"], error_mode=error_mode) # if OLD_CACHE_DIR (from cvedb.py) exists, print warning if os.path.exists(OLD_CACHE_DIR): LOGGER.warning( f"Obsolete cache dir {OLD_CACHE_DIR} is no longer needed and can be removed." ) # Clear data if -u now is set if args["update"] == "now": cvedb_orig.clear_cached_data() if args["update"] == "latest": cvedb_orig.refresh_cache_and_update_db() # update db if needed if args["update"] != "never": cvedb_orig.get_cvelist_if_stale() else: LOGGER.warning("Not verifying CVE DB cache") if not cvedb_orig.nvd_years(): with ErrorHandler(mode=error_mode, logger=LOGGER): raise EmptyCache(cvedb_orig.cachedir) # CVE Database validation if not cvedb_orig.check_cve_entries(): with ErrorHandler(mode=error_mode, logger=LOGGER): raise CVEDataMissing("No data in CVE Database") # Input validation if not args["directory"] and not args["input_file"]: parser.print_usage() with ErrorHandler(logger=LOGGER, mode=ErrorMode.NoTrace): raise InsufficientArgs( "Please specify a directory to scan or an input file required") if args["directory"] and not os.path.exists(args["directory"]): parser.print_usage() with ErrorHandler(logger=LOGGER, mode=ErrorMode.NoTrace): raise FileNotFoundError("Directory/File doesn't exist") # Checkers related settings skips = args["skips"] if args["runs"]: runs = args["runs"] skips = list( map( lambda checker: checker.name, filter( lambda checker: checker.name not in runs, pkg_resources.iter_entry_points("cve_bin_tool.checker"), ), )) # CSVScanner related settings score = 0 if args["severity"]: # Set minimum CVSS score based on severity cvss_score = {"low": 0, "medium": 4, "high": 7, "critical": 9} score = cvss_score[args["severity"]] if int(args["cvss"]) > 0: score = int(args["cvss"]) with CVEScanner(score=score) as cve_scanner: triage_data: TriageData total_files: int = 0 parsed_data: Dict[ProductInfo, TriageData] = {} if args["input_file"]: input_engine = InputEngine(args["input_file"], logger=LOGGER, error_mode=error_mode) parsed_data = input_engine.parse_input() if not args["directory"]: for product_info, triage_data in parsed_data.items(): LOGGER.warning(f"{product_info}, {triage_data}") cve_scanner.get_cves(product_info, triage_data) if args["directory"]: version_scanner = VersionScanner( should_extract=args["extract"], exclude_folders=args["exclude"], error_mode=error_mode, ) version_scanner.remove_skiplist(skips) version_scanner.print_checkers() for scan_info in version_scanner.recursive_scan(args["directory"]): if scan_info: product_info, path = scan_info LOGGER.debug(f"{product_info}: {path}") triage_data = parsed_data.get(product_info, {"default": {}}) # Ignore paths from triage_data if we are scanning directory triage_data["paths"] = {path} cve_scanner.get_cves(product_info, triage_data) total_files = version_scanner.total_scanned_files LOGGER.info("") LOGGER.info("Overall CVE summary: ") if args["input_file"]: LOGGER.info( f"There are {cve_scanner.products_with_cve} products with known CVEs detected" ) else: LOGGER.info( f"There are {cve_scanner.products_with_cve} files with known CVEs detected" ) if cve_scanner.products_with_cve > 0 or (args["format"] == "html" or args["format"] == "pdf"): affected_string = ", ".join( map( lambda product_version: "".join(str(product_version)), cve_scanner.affected(), )) LOGGER.info(f"Known CVEs in {affected_string}:") # Creates a Object for OutputEngine output = OutputEngine( all_cve_data=cve_scanner.all_cve_data, scanned_dir=args["directory"], filename=args["output_file"], themes_dir=args["html_theme"], products_with_cve=cve_scanner.products_with_cve, products_without_cve=cve_scanner.products_without_cve, total_files=total_files, ) if not args["quiet"]: output.output_file(args["format"]) # Use the number of products with known cves as error code # as requested by folk planning to automate use of this script. # If no cves found, then the program exits cleanly. return cve_scanner.products_with_cve
def main(argv=None): argv = argv or sys.argv parser = argparse.ArgumentParser( prog="helper-script", description=textwrap.dedent(""" Helps contributors who want to write a new cve-bin-tool checker find common filenames, version strings, and other necessary data for building a binary checker """), ) # scan directory args parser.add_argument( "filenames", help="files to scan", nargs="+", default=[], ) # product-name args parser.add_argument( "-p", "--product", help="provide product-name that would be searched", dest="product_name", action="store", default=None, ) # version-name args parser.add_argument( "-v", "--version", help="provide version that would be searched", dest="version_number", action="store", default=None, ) # log level args parser.add_argument( "-l", "--log", help="log level (default: warning)", dest="log_level", action="store", choices=["debug", "info", "warning", "error", "critical"], default="warning", ) # contains-patterns string length args parser.add_argument( "--string-length", help= "changes the output string-length for CONTAINS_PATTERNS (default: %(default)s)", type=int, action="store", default=40, ) with ErrorHandler(mode=ErrorMode.NoTrace): raw_args = parser.parse_args(argv[1:]) args = {key: value for key, value in vars(raw_args).items() if value} defaults = {key: parser.get_default(key) for key in vars(raw_args)} args = ChainMap(args, defaults) LOGGER.setLevel(args["log_level"].upper()) LOGGER.debug(f"Given filenames: {args['filenames']}") LOGGER.info(f"Scanning only the first filename: '{args['filenames'][0]}'") hs = HelperScript( args["filenames"][0], product_name=args["product_name"], version_number=args["version_number"], string_length=args["string_length"], ) # Parsing, Extracting and Searching for version-strings hs.extract_and_parse_file(args["filenames"][0]) # output on console hs.output()