async def cache_update(self, session, url, sha, chunk_size=16 * 1024): """ Update the cache for a single year of NVD data. """ filename = url.split("/")[-1] # Ensure we only write to files within the cachedir filepath = os.path.abspath(os.path.join(self.cachedir, filename)) if not filepath.startswith(os.path.abspath(self.cachedir)): with ErrorHandler(mode=self.error_mode, logger=self.LOGGER): raise AttemptedToWriteOutsideCachedir(filepath) # Validate the contents of the cached file if os.path.isfile(filepath): # Validate the sha and write out sha = sha.upper() calculate = hashlib.sha256() async with GzipFile(filepath, "rb") as f: chunk = await f.read(chunk_size) while chunk: calculate.update(chunk) chunk = await f.read(chunk_size) # Validate the sha and exit if it is correct, otherwise update gotsha = calculate.hexdigest().upper() if gotsha != sha: os.unlink(filepath) self.LOGGER.warning( f"SHA mismatch for {filename} (have: {gotsha}, want: {sha})" ) else: self.LOGGER.debug(f"Correct SHA for {filename}") return self.LOGGER.debug(f"Updating CVE cache for {filename}") async with session.get(url) as response: # Raise better error message on ratelimit by NVD if response.status == 403: with ErrorHandler(mode=self.error_mode, logger=self.LOGGER): raise NVDRateLimit( f"{url} : download failed, you may have been rate limited." ) # Raise for all other 4xx errors response.raise_for_status() gzip_data = await response.read() json_data = gzip.decompress(gzip_data) gotsha = hashlib.sha256(json_data).hexdigest().upper() async with FileIO(filepath, "wb") as filepath_handle: await filepath_handle.write(gzip_data) # Raise error if there was an issue with the sha if gotsha != sha: # Remove the file if there was an issue # exit(100) os.unlink(filepath) with ErrorHandler(mode=self.error_mode, logger=self.LOGGER): raise SHAMismatch(f"{url} (have: {gotsha}, want: {sha})")
def parse_data(self, fields: Set[str], data: Iterable) -> None: required_fields = {"vendor", "product", "version"} missing_fields = required_fields - fields if missing_fields != set(): with ErrorHandler(mode=self.error_mode): raise MissingFieldsError( f"{missing_fields} are required fields") for row in data: product_info = ProductInfo(row["vendor"].strip(), row["product"].strip(), row["version"].strip()) self.parsed_data[product_info][row.get("cve_number", "").strip() or "default"] = { "remarks": Remarks( str(row.get("remarks", "")).strip()), "comments": row.get("comments", "").strip(), "severity": row.get("severity", "").strip(), } self.parsed_data[product_info]["paths"] = set( map(lambda x: x.strip(), row.get("paths", "").split(",")))
def parse_filename(self, filename): """ returns package_name/product_name from package_filename of types .rpm, .deb, etc. Example: package_filename = openssh-client_8.4p1-5ubuntu1_amd64.deb here, package_name = openssh-client """ # resolving directory names if sys.platform == "win32": filename = filename.split("\\")[-1] else: filename = filename.split("/")[-1] # if extractable, then parsing for different types of files accordingly if self.extractor.can_extract(filename): if filename.endswith(".tar.xz"): product_name = filename.rsplit("-", 3)[0] version_number = filename.rsplit("-", 3)[1] # example: libarchive-3.5.1-1-aarch64.pkg.tar.xz elif filename.endswith(".deb") or filename.endswith(".ipk"): product_name = filename.rsplit("_")[0] version_number = filename.rsplit("_")[1] # example: varnish_6.4.0-3_amd64.deb else: product_name = filename.rsplit("-", 2)[0] version_number = filename.rsplit("-", 2)[1] LOGGER.debug( f"Parsing file '{self.filename}': Results: product_name='{product_name}', version_number='{version_number}'" ) return product_name, version_number else: # raise error for unknown archive types with ErrorHandler(mode=ErrorMode.NoTrace, logger=LOGGER): raise UnknownArchiveType(filename)
def input_json(self) -> None: with open(self.filename) as json_file: json_data = json.load(json_file) if not json_data or not isinstance(json_data, list): with ErrorHandler(mode=self.error_mode): raise InvalidJsonError(self.filename) self.parse_data(set(json_data[0].keys()), json_data)
def input_csv(self) -> None: with open(self.filename) as csv_file: csvdata = csv.DictReader(csv_file) if csvdata is None or csvdata.fieldnames is None: with ErrorHandler(mode=self.error_mode): raise InvalidCsvError(self.filename) self.parse_data(set(csvdata.fieldnames), csvdata)
def main(argv=None): logger = LOGGER.getChild("CSV2CVE") argv = argv or sys.argv if len(argv) < 2: with ErrorHandler(logger=logger): raise InsufficientArgs("csv file required") flag = False for idx, arg in enumerate(argv): if arg.endswith(".csv"): argv[idx] = f"-i={arg}" flag = True if flag: return cli.main(argv) else: with ErrorHandler(logger=logger): raise InsufficientArgs("csv file required")
def parse_config(self) -> Mapping[str, Any]: if not os.path.isfile(self.filename): with ErrorHandler(mode=self.error_mode): raise FileNotFoundError(self.filename) if self.filename.endswith(".toml"): with open(self.filename) as f: raw_config_data = toml.load(f) self.config_data = ChainMap(*raw_config_data.values()) elif self.filename.endswith(".yaml"): with open(self.filename) as f: raw_config_data = yaml.safe_load(f) self.config_data = ChainMap(*raw_config_data.values()) else: with ErrorHandler(mode=self.error_mode): raise UnknownConfigType( f"config file: {self.filename} is not supported.") return self.config_data
def parse_input(self) -> DefaultDict[ProductInfo, TriageData]: if not os.path.isfile(self.filename): with ErrorHandler(mode=self.error_mode): raise FileNotFoundError(self.filename) if self.filename.endswith(".csv"): self.input_csv() elif self.filename.endswith(".json"): self.input_json() return self.parsed_data
def load_curl_version(self, version): """ Return the dict of CVE data for the given curl version. """ filename = os.path.join( self.cachedir, self.CURL_CVE_FILENAME_TEMPLATE.format(version)) # Check if file exists if not os.path.isfile(filename): with ErrorHandler(mode=self.error_mode, logger=self.LOGGER): raise CVEDataForCurlVersionNotInCache(version) # Open the file and load the JSON data, log the number of CVEs loaded with open(filename, "rb") as fileobj: cves_for_version = json.load(fileobj) self.LOGGER.debug( f"Curl Version {version} has {len(cves_for_version)} CVEs in dataset" ) return cves_for_version
def load_nvd_year(self, year): """ Return the dict of CVE data for the given year. """ filename = os.path.join(self.cachedir, self.NVDCVE_FILENAME_TEMPLATE.format(year)) # Check if file exists if not os.path.isfile(filename): with ErrorHandler(mode=self.error_mode, logger=self.LOGGER): raise CVEDataForYearNotInCache(year) # Open the file and load the JSON data, log the number of CVEs loaded with gzip.open(filename, "rb") as fileobj: cves_for_year = json.load(fileobj) self.LOGGER.debug( f'Year {year} has {len(cves_for_year["CVE_Items"])} CVEs in dataset' ) return cves_for_year
def main(argv=None): """Scan a binary file for certain open source libraries that may have CVEs""" argv = argv or sys.argv # Reset logger level to info LOGGER.setLevel(logging.INFO) parser = argparse.ArgumentParser( prog="cve-bin-tool", description=textwrap.dedent(""" The CVE Binary Tool scans for a number of common, vulnerable open source components (openssl, libpng, libxml2, expat and a few others) to let you know if a given directory or binary file includes common libraries with known vulnerabilities. """), epilog=textwrap.fill( f'Available checkers: {", ".join(VersionScanner.available_checkers())}' ) + "\n\nPlease disclose issues responsibly!", formatter_class=argparse.RawDescriptionHelpFormatter, ) input_group = parser.add_argument_group("Input") input_group.add_argument("directory", help="directory to scan", nargs="?", default=None) input_group.add_argument( "-e", "--exclude", action=StringToListAction, help="Comma separated Exclude directory path", default=None, ) input_group.add_argument( "-i", "--input-file", action="store", default="", help="provide input filename", ) input_group.add_argument("-C", "--config", action="store", default="", help="provide config file") output_group = parser.add_argument_group("Output") output_group.add_argument("-q", "--quiet", action="store_true", help="suppress output") output_group.add_argument( "-l", "--log", help="log level (default: info)", dest="log_level", action="store", choices=["debug", "info", "warning", "error", "critical"], ) output_group.add_argument( "-o", "--output-file", action="store", help="provide output filename (default: output to stdout)", ) output_group.add_argument( "--html-theme", action="store", help="provide custom theme directory for HTML Report", ) output_group.add_argument( "-f", "--format", action="store", choices=["csv", "json", "console", "html", "pdf"], help="update output format (default: console)", ) output_group.add_argument( "-c", "--cvss", action="store", help= "minimum CVSS score (as integer in range 0 to 10) to report (default: 0)", ) output_group.add_argument( "-S", "--severity", action="store", choices=["low", "medium", "high", "critical"], help="minimum CVE severity to report (default: low)", ) parser.add_argument("-V", "--version", action="version", version=VERSION) parser.add_argument( "-u", "--update", action="store", choices=["now", "daily", "never", "latest"], help="update schedule for NVD database (default: daily)", ) parser.add_argument( "-x", "--extract", action="store_true", help="autoextract compressed files", ) parser.add_argument( "--disable-version-check", action="store_true", help="skips checking for a new version", ) checker_group = parser.add_argument_group("Checkers") checker_group.add_argument( "-s", "--skips", dest="skips", action=StringToListAction, type=str, help="comma-separated list of checkers to disable", ) checker_group.add_argument( "-r", "--runs", dest="runs", action=StringToListAction, type=str, help="comma-separated list of checkers to enable", ) defaults = { "directory": "", "exclude": [], "input_file": "", "log_level": "info", "format": "console", "cvss": 0, "severity": "low", "update": "daily", "extract": True, "disable_version_check": False, "skips": "", "runs": "", "quiet": False, "output_file": "", "html_theme": "", } with ErrorHandler(mode=ErrorMode.NoTrace): raw_args = parser.parse_args(argv[1:]) args = {key: value for key, value in vars(raw_args).items() if value} configs = {} if args.get("config"): conf = ConfigParser(args["config"]) configs = conf.parse_config() args = ChainMap(args, configs, defaults) # logging and error related settings if args["log_level"]: LOGGER.setLevel(args["log_level"].upper()) if args["quiet"]: LOGGER.setLevel(logging.CRITICAL) if 0 < LOGGER.level <= 10: error_mode = ErrorMode.FullTrace elif LOGGER.level >= 50: error_mode = ErrorMode.NoTrace else: error_mode = ErrorMode.TruncTrace if platform.system() != "Linux": warning_nolinux = """ ********************************************** Warning: this utility was developed for Linux. You may need to install additional utilities to use it on other operating systems. ********************************************** """ LOGGER.warning(warning_nolinux) # Database update related settings # Connect to the database cvedb_orig = CVEDB(version_check=not args["disable_version_check"], error_mode=error_mode) # if OLD_CACHE_DIR (from cvedb.py) exists, print warning if os.path.exists(OLD_CACHE_DIR): LOGGER.warning( f"Obsolete cache dir {OLD_CACHE_DIR} is no longer needed and can be removed." ) # Clear data if -u now is set if args["update"] == "now": cvedb_orig.clear_cached_data() if args["update"] == "latest": cvedb_orig.refresh_cache_and_update_db() # update db if needed if args["update"] != "never": cvedb_orig.get_cvelist_if_stale() else: LOGGER.warning("Not verifying CVE DB cache") if not cvedb_orig.nvd_years(): with ErrorHandler(mode=error_mode, logger=LOGGER): raise EmptyCache(cvedb_orig.cachedir) # CVE Database validation if not cvedb_orig.check_cve_entries(): with ErrorHandler(mode=error_mode, logger=LOGGER): raise CVEDataMissing("No data in CVE Database") # Input validation if not args["directory"] and not args["input_file"]: parser.print_usage() with ErrorHandler(logger=LOGGER, mode=ErrorMode.NoTrace): raise InsufficientArgs( "Please specify a directory to scan or an input file required") if args["directory"] and not os.path.exists(args["directory"]): parser.print_usage() with ErrorHandler(logger=LOGGER, mode=ErrorMode.NoTrace): raise FileNotFoundError("Directory/File doesn't exist") # Checkers related settings skips = args["skips"] if args["runs"]: runs = args["runs"] skips = list( map( lambda checker: checker.name, filter( lambda checker: checker.name not in runs, pkg_resources.iter_entry_points("cve_bin_tool.checker"), ), )) # CSVScanner related settings score = 0 if args["severity"]: # Set minimum CVSS score based on severity cvss_score = {"low": 0, "medium": 4, "high": 7, "critical": 9} score = cvss_score[args["severity"]] if int(args["cvss"]) > 0: score = int(args["cvss"]) with CVEScanner(score=score) as cve_scanner: triage_data: TriageData total_files: int = 0 parsed_data: Dict[ProductInfo, TriageData] = {} if args["input_file"]: input_engine = InputEngine(args["input_file"], logger=LOGGER, error_mode=error_mode) parsed_data = input_engine.parse_input() if not args["directory"]: for product_info, triage_data in parsed_data.items(): LOGGER.warning(f"{product_info}, {triage_data}") cve_scanner.get_cves(product_info, triage_data) if args["directory"]: version_scanner = VersionScanner( should_extract=args["extract"], exclude_folders=args["exclude"], error_mode=error_mode, ) version_scanner.remove_skiplist(skips) version_scanner.print_checkers() for scan_info in version_scanner.recursive_scan(args["directory"]): if scan_info: product_info, path = scan_info LOGGER.debug(f"{product_info}: {path}") triage_data = parsed_data.get(product_info, {"default": {}}) # Ignore paths from triage_data if we are scanning directory triage_data["paths"] = {path} cve_scanner.get_cves(product_info, triage_data) total_files = version_scanner.total_scanned_files LOGGER.info("") LOGGER.info("Overall CVE summary: ") if args["input_file"]: LOGGER.info( f"There are {cve_scanner.products_with_cve} products with known CVEs detected" ) else: LOGGER.info( f"There are {cve_scanner.products_with_cve} files with known CVEs detected" ) if cve_scanner.products_with_cve > 0 or (args["format"] == "html" or args["format"] == "pdf"): affected_string = ", ".join( map( lambda product_version: "".join(str(product_version)), cve_scanner.affected(), )) LOGGER.info(f"Known CVEs in {affected_string}:") # Creates a Object for OutputEngine output = OutputEngine( all_cve_data=cve_scanner.all_cve_data, scanned_dir=args["directory"], filename=args["output_file"], themes_dir=args["html_theme"], products_with_cve=cve_scanner.products_with_cve, products_without_cve=cve_scanner.products_without_cve, total_files=total_files, ) if not args["quiet"]: output.output_file(args["format"]) # Use the number of products with known cves as error code # as requested by folk planning to automate use of this script. # If no cves found, then the program exits cleanly. return cve_scanner.products_with_cve
def check_file(self): input_file = self.input_file error_mode = self.error_mode if not isfile(input_file): with ErrorHandler(mode=error_mode): raise FileNotFoundError(input_file) if getsize(input_file) == 0: with ErrorHandler(mode=error_mode): raise EmptyTxtError(input_file) if not input_file.endswith(".txt"): with ErrorHandler(mode=error_mode): raise InvalidListError( "Invalid Package list file format (should be .txt)") if not input_file.endswith("requirements.txt"): if distro.id() not in SUPPORTED_DISTROS: LOGGER.warning( f"Package list support only available for {','.join(SUPPORTED_DISTROS)}!" ) with ErrorHandler(mode=error_mode): raise InvalidListError( f"{distro.id().capitalize()} is not supported") elif distro.id() in DEB_DISTROS: # Simulate installation on Debian based system using apt-get to check if the file is valid output = run( ["xargs", "-a", input_file, "apt-get", "install", "-s"], capture_output=True, ) if output.returncode != 0: invalid_packages = re.findall( r"E: Unable to locate package (.+)", output.stderr.decode("utf-8"), ) LOGGER.warning( f"Invalid Package found: {','.join(invalid_packages)}") elif distro.id() in RPM_DISTROS: output = run( ["xargs", "-a", input_file, "rpm", "-qi"], capture_output=True, ) not_installed_packages = re.findall( r"package (.+) is not installed", output.stdout.decode("utf-8")) if not_installed_packages: LOGGER.warning( f"The packages {','.join(not_installed_packages)} seems to be not installed.\nIt is either an invalid package or not installed.\nUse `sudo yum install $(cat package-list)` to install all packages" ) elif distro.id() in PACMAN_DISTROS: output = run( ["xargs", "-a", input_file, "pacman", "-Qk"], capture_output=True, ) not_installed_packages = re.findall( r"error: package '(.+)' was not found", output.stderr.decode("utf-8"), ) if not_installed_packages: LOGGER.warning( f"The packages {','.join(not_installed_packages)} seems to be not installed.\nIt is either an invalid package or not installed.\nUse `sudo pacman -S $(cat package-list)` to install all packages" ) else: # TODO: Replace below error handling with a proper pip install dry run # See: https://github.com/pypa/pip/issues/53 with ErrorHandler(mode=error_mode): raise InvalidListError("Invalid Package list")
def main(argv=None): argv = argv or sys.argv parser = argparse.ArgumentParser( prog="helper-script", description=textwrap.dedent(""" Helps contributors who want to write a new cve-bin-tool checker find common filenames, version strings, and other necessary data for building a binary checker """), ) # scan directory args parser.add_argument( "filenames", help="files to scan", nargs="+", default=[], ) # product-name args parser.add_argument( "-p", "--product", help="provide product-name that would be searched", dest="product_name", action="store", default=None, ) # version-name args parser.add_argument( "-v", "--version", help="provide version that would be searched", dest="version_number", action="store", default=None, ) # log level args parser.add_argument( "-l", "--log", help="log level (default: warning)", dest="log_level", action="store", choices=["debug", "info", "warning", "error", "critical"], default="warning", ) # contains-patterns string length args parser.add_argument( "--string-length", help= "changes the output string-length for CONTAINS_PATTERNS (default: %(default)s)", type=int, action="store", default=40, ) with ErrorHandler(mode=ErrorMode.NoTrace): raw_args = parser.parse_args(argv[1:]) args = {key: value for key, value in vars(raw_args).items() if value} defaults = {key: parser.get_default(key) for key in vars(raw_args)} args = ChainMap(args, defaults) LOGGER.setLevel(args["log_level"].upper()) LOGGER.debug(f"Given filenames: {args['filenames']}") LOGGER.info(f"Scanning only the first filename: '{args['filenames'][0]}'") hs = HelperScript( args["filenames"][0], product_name=args["product_name"], version_number=args["version_number"], string_length=args["string_length"], ) # Parsing, Extracting and Searching for version-strings hs.extract_and_parse_file(args["filenames"][0]) # output on console hs.output()