def __init__(self, filename, product_name=None, version_number=None, string_length=40): self.filename = filename self.extractor = Extractor() self.product_name, self.version_number = self.parse_filename(filename) if product_name: self.product_name = product_name if version_number: self.version_number = version_number self.string_length = string_length # for setting the database self.connection = None self.dbpath = os.path.join(DISK_LOCATION_DEFAULT, DBNAME) # for extraction self.walker = DirWalk().walk # for output (would use in future) self.contains_patterns = [] self.filename_pattern = [] self.version_pattern = [] self.vendor_product = self.find_vendor_product() # for scanning files versions self.version_scanner = VersionScanner()
def test_exclude(self, caplog): """ Test that the exclude paths are not scanned """ test_path = os.path.abspath(os.path.dirname(__file__)) exclude_path = os.path.join(test_path, "assets/") checkers = list(VersionScanner().checkers.keys()) with caplog.at_level(logging.INFO): main(["cve-bin-tool", test_path, "-e", ",".join(exclude_path)]) self.check_exclude_log(caplog, exclude_path, checkers)
def setup_class(cls): cls.cvedb = CVEDB() if os.getenv("UPDATE_DB") == "1": cls.cvedb.get_cvelist_if_stale() else: print("Skip NVD database updates.") # Instantiate a scanner cls.scanner = VersionScanner(should_extract=True) # temp dir for mapping tests cls.mapping_test_dir = tempfile.mkdtemp(prefix="mapping-test-") # temp dir for tests that require downloads cls.package_test_dir = tempfile.mkdtemp(prefix="package_test-")
def main(argv=None): """Scan a binary file for certain open source libraries that may have CVEs""" argv = argv or sys.argv # Reset logger level to info LOGGER.setLevel(logging.INFO) parser = argparse.ArgumentParser( prog="cve-bin-tool", description=textwrap.dedent(""" The CVE Binary Tool scans for a number of common, vulnerable open source components (openssl, libpng, libxml2, expat and a few others) to let you know if a given directory or binary file includes common libraries with known vulnerabilities. """), epilog=textwrap.fill( f'Available checkers: {", ".join(VersionScanner.available_checkers())}' ) + "\n\nPlease disclose issues responsibly!", formatter_class=argparse.RawDescriptionHelpFormatter, ) input_group = parser.add_argument_group("Input") input_group.add_argument("directory", help="directory to scan", nargs="?", default=None) input_group.add_argument( "-e", "--exclude", action=StringToListAction, help="Comma separated Exclude directory path", default=None, ) input_group.add_argument( "-i", "--input-file", action="store", default="", help="provide input filename", ) input_group.add_argument("-C", "--config", action="store", default="", help="provide config file") output_group = parser.add_argument_group("Output") output_group.add_argument("-q", "--quiet", action="store_true", help="suppress output") output_group.add_argument( "-l", "--log", help="log level (default: info)", dest="log_level", action="store", choices=["debug", "info", "warning", "error", "critical"], ) output_group.add_argument( "-o", "--output-file", action="store", help="provide output filename (default: output to stdout)", ) output_group.add_argument( "--html-theme", action="store", help="provide custom theme directory for HTML Report", ) output_group.add_argument( "-f", "--format", action="store", choices=["csv", "json", "console", "html", "pdf"], help="update output format (default: console)", ) output_group.add_argument( "-c", "--cvss", action="store", help= "minimum CVSS score (as integer in range 0 to 10) to report (default: 0)", ) output_group.add_argument( "-S", "--severity", action="store", choices=["low", "medium", "high", "critical"], help="minimum CVE severity to report (default: low)", ) parser.add_argument("-V", "--version", action="version", version=VERSION) parser.add_argument( "-u", "--update", action="store", choices=["now", "daily", "never", "latest"], help="update schedule for NVD database (default: daily)", ) parser.add_argument( "-x", "--extract", action="store_true", help="autoextract compressed files", ) parser.add_argument( "--disable-version-check", action="store_true", help="skips checking for a new version", ) checker_group = parser.add_argument_group("Checkers") checker_group.add_argument( "-s", "--skips", dest="skips", action=StringToListAction, type=str, help="comma-separated list of checkers to disable", ) checker_group.add_argument( "-r", "--runs", dest="runs", action=StringToListAction, type=str, help="comma-separated list of checkers to enable", ) defaults = { "directory": "", "exclude": [], "input_file": "", "log_level": "info", "format": "console", "cvss": 0, "severity": "low", "update": "daily", "extract": True, "disable_version_check": False, "skips": "", "runs": "", "quiet": False, "output_file": "", "html_theme": "", } with ErrorHandler(mode=ErrorMode.NoTrace): raw_args = parser.parse_args(argv[1:]) args = {key: value for key, value in vars(raw_args).items() if value} configs = {} if args.get("config"): conf = ConfigParser(args["config"]) configs = conf.parse_config() args = ChainMap(args, configs, defaults) # logging and error related settings if args["log_level"]: LOGGER.setLevel(args["log_level"].upper()) if args["quiet"]: LOGGER.setLevel(logging.CRITICAL) if 0 < LOGGER.level <= 10: error_mode = ErrorMode.FullTrace elif LOGGER.level >= 50: error_mode = ErrorMode.NoTrace else: error_mode = ErrorMode.TruncTrace if platform.system() != "Linux": warning_nolinux = """ ********************************************** Warning: this utility was developed for Linux. You may need to install additional utilities to use it on other operating systems. ********************************************** """ LOGGER.warning(warning_nolinux) # Database update related settings # Connect to the database cvedb_orig = CVEDB(version_check=not args["disable_version_check"], error_mode=error_mode) # if OLD_CACHE_DIR (from cvedb.py) exists, print warning if os.path.exists(OLD_CACHE_DIR): LOGGER.warning( f"Obsolete cache dir {OLD_CACHE_DIR} is no longer needed and can be removed." ) # Clear data if -u now is set if args["update"] == "now": cvedb_orig.clear_cached_data() if args["update"] == "latest": cvedb_orig.refresh_cache_and_update_db() # update db if needed if args["update"] != "never": cvedb_orig.get_cvelist_if_stale() else: LOGGER.warning("Not verifying CVE DB cache") if not cvedb_orig.nvd_years(): with ErrorHandler(mode=error_mode, logger=LOGGER): raise EmptyCache(cvedb_orig.cachedir) # CVE Database validation if not cvedb_orig.check_cve_entries(): with ErrorHandler(mode=error_mode, logger=LOGGER): raise CVEDataMissing("No data in CVE Database") # Input validation if not args["directory"] and not args["input_file"]: parser.print_usage() with ErrorHandler(logger=LOGGER, mode=ErrorMode.NoTrace): raise InsufficientArgs( "Please specify a directory to scan or an input file required") if args["directory"] and not os.path.exists(args["directory"]): parser.print_usage() with ErrorHandler(logger=LOGGER, mode=ErrorMode.NoTrace): raise FileNotFoundError("Directory/File doesn't exist") # Checkers related settings skips = args["skips"] if args["runs"]: runs = args["runs"] skips = list( map( lambda checker: checker.name, filter( lambda checker: checker.name not in runs, pkg_resources.iter_entry_points("cve_bin_tool.checker"), ), )) # CSVScanner related settings score = 0 if args["severity"]: # Set minimum CVSS score based on severity cvss_score = {"low": 0, "medium": 4, "high": 7, "critical": 9} score = cvss_score[args["severity"]] if int(args["cvss"]) > 0: score = int(args["cvss"]) with CVEScanner(score=score) as cve_scanner: triage_data: TriageData total_files: int = 0 parsed_data: Dict[ProductInfo, TriageData] = {} if args["input_file"]: input_engine = InputEngine(args["input_file"], logger=LOGGER, error_mode=error_mode) parsed_data = input_engine.parse_input() if not args["directory"]: for product_info, triage_data in parsed_data.items(): LOGGER.warning(f"{product_info}, {triage_data}") cve_scanner.get_cves(product_info, triage_data) if args["directory"]: version_scanner = VersionScanner( should_extract=args["extract"], exclude_folders=args["exclude"], error_mode=error_mode, ) version_scanner.remove_skiplist(skips) version_scanner.print_checkers() for scan_info in version_scanner.recursive_scan(args["directory"]): if scan_info: product_info, path = scan_info LOGGER.debug(f"{product_info}: {path}") triage_data = parsed_data.get(product_info, {"default": {}}) # Ignore paths from triage_data if we are scanning directory triage_data["paths"] = {path} cve_scanner.get_cves(product_info, triage_data) total_files = version_scanner.total_scanned_files LOGGER.info("") LOGGER.info("Overall CVE summary: ") if args["input_file"]: LOGGER.info( f"There are {cve_scanner.products_with_cve} products with known CVEs detected" ) else: LOGGER.info( f"There are {cve_scanner.products_with_cve} files with known CVEs detected" ) if cve_scanner.products_with_cve > 0 or (args["format"] == "html" or args["format"] == "pdf"): affected_string = ", ".join( map( lambda product_version: "".join(str(product_version)), cve_scanner.affected(), )) LOGGER.info(f"Known CVEs in {affected_string}:") # Creates a Object for OutputEngine output = OutputEngine( all_cve_data=cve_scanner.all_cve_data, scanned_dir=args["directory"], filename=args["output_file"], themes_dir=args["html_theme"], products_with_cve=cve_scanner.products_with_cve, products_without_cve=cve_scanner.products_without_cve, total_files=total_files, ) if not args["quiet"]: output.output_file(args["format"]) # Use the number of products with known cves as error code # as requested by folk planning to automate use of this script. # If no cves found, then the program exits cleanly. return cve_scanner.products_with_cve
class HelperScript: """Helps contributors who want to write a new cve-bin-tool checker find common filenames, version strings, and other necessary data for building a binary checker""" CONSOLE = Console() LOGGER = LOGGER.getChild("HelperScript") def __init__(self, filename, product_name=None, version_number=None, string_length=40): self.filename = filename self.extractor = Extractor() self.product_name, self.version_number = self.parse_filename(filename) if product_name: self.product_name = product_name if version_number: self.version_number = version_number self.string_length = string_length # for setting the database self.connection = None self.dbpath = os.path.join(DISK_LOCATION_DEFAULT, DBNAME) # for extraction self.walker = DirWalk().walk # for output (would use in future) self.contains_patterns = [] self.filename_pattern = [] self.version_pattern = [] self.vendor_product = self.find_vendor_product() # for scanning files versions self.version_scanner = VersionScanner() def extract_and_parse_file(self, filename): """extracts and parses the file for common patterns, version strings and common filename patterns""" with self.extractor as ectx: if ectx.can_extract(filename): binary_string_list = [] for filepath in self.walker([ectx.extract(filename)]): clean_path = self.version_scanner.clean_file_path(filepath) LOGGER.debug(f"checking whether {clean_path} is binary") # see if the file is ELF binary file and parse for strings is_exec = self.version_scanner.is_executable(filepath)[0] if is_exec: LOGGER.debug( f"{clean_path} <--- this is an ELF binary") file_content = self.version_scanner.parse_strings( filepath) matches = self.search_pattern(file_content, self.product_name) # searching for version strings in the found matches version_string = self.search_version_string(matches) self.version_pattern += version_string # if version string is found in file, append it to filename_pattern if version_string: if sys.platform == "win32": self.filename_pattern.append( filepath.split("\\")[-1]) else: self.filename_pattern.append( filepath.split("/")[-1]) LOGGER.info( f"matches for {self.product_name} found in {clean_path}" ) binary_string_list += matches for i in matches: if ("/" not in i and "!" not in i ) and len(i) > self.string_length: self.contains_patterns.append(i) LOGGER.debug(f"{self.filename_pattern}") # to resolve case when there are no strings common with product_name in them if self.contains_patterns: return self.contains_patterns return binary_string_list def search_pattern(self, file_content, pattern): """find strings for CONTAINS_PATTERNS with product_name in them""" file_content_list = file_content.split("\n") matches = [ i.strip() for i in file_content_list if re.search(pattern, i, re.IGNORECASE) ] LOGGER.debug(f"found matches = {matches}" ) # TODO: regex highlight in these matched strings? return matches def search_version_string(self, matched_list): """finds version strings from matched list""" # TODO: add multiline string finding pattern1 = rf"{self.product_name}(.*){self.version_number}" # ^ this does not work for debian packages # pattern2 = rf"{self.product_name}(.*)([0-9]+[.-][0-9]+([.-][0-9]+)?)" # this matches patterns like: # product1.2.3 # product 1.2.3 # product-1.2.3 # product.1.2.3 # product version 1.2.3 # product v1.2.3(1) version_strings = [ i for i in matched_list if re.search(pattern1, i, re.IGNORECASE) if not i.endswith( ".debug") # removes .debug, so, this does not gets printed ] LOGGER.debug(f"found version-string matches = {version_strings}" ) # TODO: regex highlight in these matched strings? return version_strings def parse_filename(self, filename): """ returns package_name/product_name from package_filename of types .rpm, .deb, etc. Example: package_filename = openssh-client_8.4p1-5ubuntu1_amd64.deb here, package_name = openssh-client """ # resolving directory names if sys.platform == "win32": filename = filename.split("\\")[-1] else: filename = filename.split("/")[-1] # if extractable, then parsing for different types of files accordingly if self.extractor.can_extract(filename): if filename.endswith(".tar.xz"): product_name = filename.rsplit("-", 3)[0] version_number = filename.rsplit("-", 3)[1] # example: libarchive-3.5.1-1-aarch64.pkg.tar.xz elif filename.endswith(".deb") or filename.endswith(".ipk"): product_name = filename.rsplit("_")[0] version_number = filename.rsplit("_")[1] # example: varnish_6.4.0-3_amd64.deb else: product_name = filename.rsplit("-", 2)[0] version_number = filename.rsplit("-", 2)[1] LOGGER.debug( f"Parsing file '{self.filename}': Results: product_name='{product_name}', version_number='{version_number}'" ) return product_name, version_number else: # raise error for unknown archive types with ErrorHandler(mode=ErrorMode.NoTrace, logger=LOGGER): raise UnknownArchiveType(filename) def find_vendor_product(self): """find vendor-product pairs from database""" LOGGER.debug( f"checking for product_name='{self.product_name}' and version_name='{self.version_number}' in the database" ) CVEDB.db_open(self) cursor = self.connection.cursor() # finding out all distinct (vendor, product) pairs with the help of product_name query = """ SELECT distinct vendor, product FROM cve_range WHERE product=(:product); """ cursor.execute(query, {"product": self.product_name}) data = cursor.fetchall() # checking if (vendor, product) was found in the database if data: # warning the user to select the vendor-product pairs manually if multiple pairs are found if len(data) != 1: LOGGER.warning( textwrap.dedent(f""" =============================================================== Multiple ("vendor", "product") pairs found for "{self.product_name}" Please manually select the appropriate pair. =============================================================== """)) return data # [('vendor', 'product')] else: if self.product_name: # removing numeric characters from the product_name if any(char.isdigit() for char in self.product_name): LOGGER.debug( f"removing digits from product_name={self.product_name}" ) self.product_name = "".join( filter(lambda x: not x.isdigit(), self.product_name)) return self.find_vendor_product() else: # raise error and ask for product_name LOGGER.warning( textwrap.dedent(f""" ================================================================= No match was found for "{self.product_name}" in database. Please check your file or try specifying the "product_name" also. ================================================================= """)) return [] CVEDB.db_close(self) def output(self): """display beautiful output for Helper-Script""" self.CONSOLE.rule( f"[bold dark_magenta]{self.product_name.capitalize()}Checker") rprint( textwrap.dedent(f""" [bright_black]# Copyright (C) 2021 Intel Corporation # SPDX-License-Identifier: GPL-3.0-or-later[/] [yellow]\"\"\" CVE checker for {self.product_name}: <provide reference links here> \"\"\"[/] [magenta]from[/] cve_bin_tool.checkers [magenta]import[/] Checker [red]class[/] [blue]{(self.product_name).capitalize()}Checker[/](Checker):""" )) # output: long human readable strings print("\tCONTAINS_PATTERNS = [") for common_strings in sorted(self.contains_patterns): if ".debug" in common_strings: rprint( f'\t\t[red]r"{common_strings}"[/] <--- not recommended to use this form of strings' ) continue # without this, the else statement was getting printed ;-; if ".so" in common_strings: rprint( f'\t\t[red]r"{common_strings}"[/] <--- not recommended to use this form of strings' ) else: rprint(f'\t\t[green]r"{common_strings}"[/],') print("\t]") """ Using filenames (containing patterns like '.so' etc.) in the binaries as VERSION_PATTERNS aren't ideal. The reason behind this is that these might depend on who packages the file (like it might work on fedora but not on ubuntu) """ # output: filenames, that we search for binary strings print("\tFILENAME_PATTERNS = [") for filename in self.filename_pattern: if self.product_name == filename: rprint( f'\t\t[cyan]r"{filename}"[/], <--- this is a really common filename pattern' ) elif self.product_name in filename: if ".so" in filename: rprint(f'\t\t[green]r"{filename}"[/],') else: rprint( f'\t\t[bright_green]r"{filename}"[/], <--- you could just use "{self.product_name}" to match this file' ) # to single-handedly match filenames of type varnishd, varnishlog, varnishtop, etc. else: rprint(f'\t\t[green]r"{filename}"[/],') print("\t]") # output: version-strings print("\tVERSION_PATTERNS = [") for version_string in self.version_pattern: rprint(f'\t\t[green]r"{version_string}"[/],') print("\t]") # output: vendor-product pair print("\tVENDOR_PRODUCT = ", end="") rprint(self.vendor_product) self.CONSOLE.rule()