def __init__( self, should_extract=False, exclude_folders=[], checkers=None, logger=None, error_mode=ErrorMode.TruncTrace, score=0, ): self.logger = logger or LOGGER.getChild(self.__class__.__name__) # Update egg if installed in development mode if IS_DEVELOP(): self.logger.debug("Updating egg_info") update_egg() # Load checkers if not given self.checkers = checkers or self.load_checkers() self.score = score self.total_scanned_files = 0 self.exclude_folders = exclude_folders + [".git"] self.walker = DirWalk(folder_exclude_pattern=";".join( exclude if exclude.endswith("*") else exclude + "*" for exclude in exclude_folders)).walk self.should_extract = should_extract self.file_stack = [] self.error_mode = error_mode self.cve_db = CVEDB()
def run_python_package_checkers(self, filename, lines): """ This generator runs only for python packages. There are no actual checkers. The ProductInfo is computed without the help of any checkers from PKG-INFO or METADATA. """ try: product = search(compile(r"^Name: (.+)$", MULTILINE), lines).group(1) version = search(compile(r"^Version: (.+)$", MULTILINE), lines).group(1) cve_db = CVEDB() vendor_package_pair = cve_db.get_vendor_product_pairs(product) if vendor_package_pair != []: vendor = vendor_package_pair[0]["vendor"] file_path = "".join(self.file_stack) self.logger.info(f"{file_path} is {product} {version}") yield ProductInfo(vendor, product, version), file_path # There are packages with a METADATA file in them containing different data from what the tool expects except AttributeError: self.logger.debug(f"{filename} is an invalid METADATA/PKG-INFO") self.logger.debug(f"Done scanning file: {filename}")
def __init__( self, filename: str, sbom_type: str = "spdx", logger: Optional[Logger] = None ): self.filename = filename self.sbom_data = defaultdict(dict) self.type = "unknown" if sbom_type in self.SBOMtype: self.type = sbom_type self.logger = logger or LOGGER.getChild(self.__class__.__name__) # Connect to the database self.cvedb = CVEDB(version_check=False)
def find_vendor_product(self): """find vendor-product pairs from database""" LOGGER.debug( f"checking for product_name='{self.product_name}' and version_name='{self.version_number}' in the database" ) CVEDB.db_open(self) cursor = self.connection.cursor() # finding out all distinct (vendor, product) pairs with the help of product_name query = """ SELECT distinct vendor, product FROM cve_range WHERE product=(:product); """ cursor.execute(query, {"product": self.product_name}) data = cursor.fetchall() # checking if (vendor, product) was found in the database if data: # warning the user to select the vendor-product pairs manually if multiple pairs are found if len(data) != 1: LOGGER.warning( textwrap.dedent(f""" =============================================================== Multiple ("vendor", "product") pairs found for "{self.product_name}" Please manually select the appropriate pair. =============================================================== """)) return data # [('vendor', 'product')] else: if self.product_name: # removing numeric characters from the product_name if any(char.isdigit() for char in self.product_name): LOGGER.debug( f"removing digits from product_name={self.product_name}" ) self.product_name = "".join( filter(lambda x: not x.isdigit(), self.product_name)) return self.find_vendor_product() else: # raise error and ask for product_name LOGGER.warning( textwrap.dedent(f""" ================================================================= No match was found for "{self.product_name}" in database. Please check your file or try specifying the "product_name" also. ================================================================= """)) return [] CVEDB.db_close(self)
async def test_nvd_incremental_update(self): """Test to check whether we are able to fetch and save the nvd entries using time_of_last_update""" nvd_api = NVD_API(incremental_update=True) await nvd_api.get_nvd_params( time_of_last_update=datetime.now() - timedelta(days=4) ) await nvd_api.get() cvedb = CVEDB(cachedir=self.outdir, nvd_type="api") cvedb.all_cve_entries = nvd_api.all_cve_entries cvedb.init_database() cvedb.populate_db() cvedb.check_cve_entries() assert cvedb.cve_count == nvd_api.total_results
def setup_class(cls): cls.cvedb = CVEDB() if os.getenv("UPDATE_DB") == "1": cls.cvedb.get_cvelist_if_stale() else: print("Skip NVD database updates.") # Instantiate a scanner cls.scanner = VersionScanner(should_extract=True) # temp dir for mapping tests cls.mapping_test_dir = tempfile.mkdtemp(prefix="mapping-test-") # temp dir for tests that require downloads cls.package_test_dir = tempfile.mkdtemp(prefix="package_test-")
def setUpClass(cls): # Run makefile to build faked binaries (in python 3 or 2) if platform == "linux" or platform == "linux2": subprocess.call(["make", "clean-linux"], cwd=BINARIES_PATH) elif platform == "win32": subprocess.call(["make", "clean-windows"], cwd=BINARIES_PATH) subprocess.call(["make", "all"], cwd=BINARIES_PATH) # Instantiate the NVD database cls.cvedb = CVEDB() if os.getenv("UPDATE_DB") == "1": cls.cvedb.get_cvelist_if_stale() else: print("Skip NVD database updates.") # Instantiate a scanner cls.scanner = Scanner(cls.cvedb) # temp dir for tests that require downloads cls.tempdir = tempfile.mkdtemp(prefix="cve-bin-tool-")
def setup_class(cls): cls.cvedb = CVEDB(cachedir=tempfile.mkdtemp(prefix="cvedb-"))
def main(argv=None): """Scan a binary file for certain open source libraries that may have CVEs""" argv = argv or sys.argv # Reset logger level to info LOGGER.setLevel(logging.INFO) parser = argparse.ArgumentParser( prog="cve-bin-tool", description=textwrap.dedent(""" The CVE Binary Tool scans for a number of common, vulnerable open source components (openssl, libpng, libxml2, expat and a few others) to let you know if a given directory or binary file includes common libraries with known vulnerabilities. """), epilog=textwrap.fill( f'Available checkers: {", ".join(VersionScanner.available_checkers())}' ) + "\n\nPlease disclose issues responsibly!", formatter_class=argparse.RawDescriptionHelpFormatter, ) input_group = parser.add_argument_group("Input") input_group.add_argument("directory", help="directory to scan", nargs="?", default=None) input_group.add_argument( "-e", "--exclude", action=StringToListAction, help="Comma separated Exclude directory path", default=None, ) input_group.add_argument( "-i", "--input-file", action="store", default="", help="provide input filename", ) input_group.add_argument("-C", "--config", action="store", default="", help="provide config file") output_group = parser.add_argument_group("Output") output_group.add_argument("-q", "--quiet", action="store_true", help="suppress output") output_group.add_argument( "-l", "--log", help="log level (default: info)", dest="log_level", action="store", choices=["debug", "info", "warning", "error", "critical"], ) output_group.add_argument( "-o", "--output-file", action="store", help="provide output filename (default: output to stdout)", ) output_group.add_argument( "--html-theme", action="store", help="provide custom theme directory for HTML Report", ) output_group.add_argument( "-f", "--format", action="store", choices=["csv", "json", "console", "html", "pdf"], help="update output format (default: console)", ) output_group.add_argument( "-c", "--cvss", action="store", help= "minimum CVSS score (as integer in range 0 to 10) to report (default: 0)", ) output_group.add_argument( "-S", "--severity", action="store", choices=["low", "medium", "high", "critical"], help="minimum CVE severity to report (default: low)", ) parser.add_argument("-V", "--version", action="version", version=VERSION) parser.add_argument( "-u", "--update", action="store", choices=["now", "daily", "never", "latest"], help="update schedule for NVD database (default: daily)", ) parser.add_argument( "-x", "--extract", action="store_true", help="autoextract compressed files", ) parser.add_argument( "--disable-version-check", action="store_true", help="skips checking for a new version", ) checker_group = parser.add_argument_group("Checkers") checker_group.add_argument( "-s", "--skips", dest="skips", action=StringToListAction, type=str, help="comma-separated list of checkers to disable", ) checker_group.add_argument( "-r", "--runs", dest="runs", action=StringToListAction, type=str, help="comma-separated list of checkers to enable", ) defaults = { "directory": "", "exclude": [], "input_file": "", "log_level": "info", "format": "console", "cvss": 0, "severity": "low", "update": "daily", "extract": True, "disable_version_check": False, "skips": "", "runs": "", "quiet": False, "output_file": "", "html_theme": "", } with ErrorHandler(mode=ErrorMode.NoTrace): raw_args = parser.parse_args(argv[1:]) args = {key: value for key, value in vars(raw_args).items() if value} configs = {} if args.get("config"): conf = ConfigParser(args["config"]) configs = conf.parse_config() args = ChainMap(args, configs, defaults) # logging and error related settings if args["log_level"]: LOGGER.setLevel(args["log_level"].upper()) if args["quiet"]: LOGGER.setLevel(logging.CRITICAL) if 0 < LOGGER.level <= 10: error_mode = ErrorMode.FullTrace elif LOGGER.level >= 50: error_mode = ErrorMode.NoTrace else: error_mode = ErrorMode.TruncTrace if platform.system() != "Linux": warning_nolinux = """ ********************************************** Warning: this utility was developed for Linux. You may need to install additional utilities to use it on other operating systems. ********************************************** """ LOGGER.warning(warning_nolinux) # Database update related settings # Connect to the database cvedb_orig = CVEDB(version_check=not args["disable_version_check"], error_mode=error_mode) # if OLD_CACHE_DIR (from cvedb.py) exists, print warning if os.path.exists(OLD_CACHE_DIR): LOGGER.warning( f"Obsolete cache dir {OLD_CACHE_DIR} is no longer needed and can be removed." ) # Clear data if -u now is set if args["update"] == "now": cvedb_orig.clear_cached_data() if args["update"] == "latest": cvedb_orig.refresh_cache_and_update_db() # update db if needed if args["update"] != "never": cvedb_orig.get_cvelist_if_stale() else: LOGGER.warning("Not verifying CVE DB cache") if not cvedb_orig.nvd_years(): with ErrorHandler(mode=error_mode, logger=LOGGER): raise EmptyCache(cvedb_orig.cachedir) # CVE Database validation if not cvedb_orig.check_cve_entries(): with ErrorHandler(mode=error_mode, logger=LOGGER): raise CVEDataMissing("No data in CVE Database") # Input validation if not args["directory"] and not args["input_file"]: parser.print_usage() with ErrorHandler(logger=LOGGER, mode=ErrorMode.NoTrace): raise InsufficientArgs( "Please specify a directory to scan or an input file required") if args["directory"] and not os.path.exists(args["directory"]): parser.print_usage() with ErrorHandler(logger=LOGGER, mode=ErrorMode.NoTrace): raise FileNotFoundError("Directory/File doesn't exist") # Checkers related settings skips = args["skips"] if args["runs"]: runs = args["runs"] skips = list( map( lambda checker: checker.name, filter( lambda checker: checker.name not in runs, pkg_resources.iter_entry_points("cve_bin_tool.checker"), ), )) # CSVScanner related settings score = 0 if args["severity"]: # Set minimum CVSS score based on severity cvss_score = {"low": 0, "medium": 4, "high": 7, "critical": 9} score = cvss_score[args["severity"]] if int(args["cvss"]) > 0: score = int(args["cvss"]) with CVEScanner(score=score) as cve_scanner: triage_data: TriageData total_files: int = 0 parsed_data: Dict[ProductInfo, TriageData] = {} if args["input_file"]: input_engine = InputEngine(args["input_file"], logger=LOGGER, error_mode=error_mode) parsed_data = input_engine.parse_input() if not args["directory"]: for product_info, triage_data in parsed_data.items(): LOGGER.warning(f"{product_info}, {triage_data}") cve_scanner.get_cves(product_info, triage_data) if args["directory"]: version_scanner = VersionScanner( should_extract=args["extract"], exclude_folders=args["exclude"], error_mode=error_mode, ) version_scanner.remove_skiplist(skips) version_scanner.print_checkers() for scan_info in version_scanner.recursive_scan(args["directory"]): if scan_info: product_info, path = scan_info LOGGER.debug(f"{product_info}: {path}") triage_data = parsed_data.get(product_info, {"default": {}}) # Ignore paths from triage_data if we are scanning directory triage_data["paths"] = {path} cve_scanner.get_cves(product_info, triage_data) total_files = version_scanner.total_scanned_files LOGGER.info("") LOGGER.info("Overall CVE summary: ") if args["input_file"]: LOGGER.info( f"There are {cve_scanner.products_with_cve} products with known CVEs detected" ) else: LOGGER.info( f"There are {cve_scanner.products_with_cve} files with known CVEs detected" ) if cve_scanner.products_with_cve > 0 or (args["format"] == "html" or args["format"] == "pdf"): affected_string = ", ".join( map( lambda product_version: "".join(str(product_version)), cve_scanner.affected(), )) LOGGER.info(f"Known CVEs in {affected_string}:") # Creates a Object for OutputEngine output = OutputEngine( all_cve_data=cve_scanner.all_cve_data, scanned_dir=args["directory"], filename=args["output_file"], themes_dir=args["html_theme"], products_with_cve=cve_scanner.products_with_cve, products_without_cve=cve_scanner.products_without_cve, total_files=total_files, ) if not args["quiet"]: output.output_file(args["format"]) # Use the number of products with known cves as error code # as requested by folk planning to automate use of this script. # If no cves found, then the program exits cleanly. return cve_scanner.products_with_cve
class SBOMManager: SBOMtype = ["spdx", "cyclonedx", "swid"] sbom_data: DefaultDict[ProductInfo, TriageData] def __init__( self, filename: str, sbom_type: str = "spdx", logger: Optional[Logger] = None ): self.filename = filename self.sbom_data = defaultdict(dict) self.type = "unknown" if sbom_type in self.SBOMtype: self.type = sbom_type self.logger = logger or LOGGER.getChild(self.__class__.__name__) # Connect to the database self.cvedb = CVEDB(version_check=False) def scan_file(self) -> Dict[ProductInfo, TriageData]: LOGGER.info(f"Processing SBOM {self.filename} of type {self.type.upper()}") try: if self.type == "spdx": spdx = SPDXParser() modules = spdx.parse(self.filename) elif self.type == "cyclonedx": cyclone = CycloneParser() modules = cyclone.parse(self.filename) elif self.type == "swid": swid = SWIDParser() modules = swid.parse(self.filename) else: modules = [] except (KeyError, FileNotFoundError, ET.ParseError) as e: LOGGER.debug(e, exc_info=True) modules = [] LOGGER.debug( f"The number of modules identified in SBOM - {len(modules)}\n{modules}" ) # Now process list of modules to create [vendor, product, version] tuples parsed_data: List[ProductInfo] = [] for m in modules: product, version = m[0], m[1] if version != "": # Now add vendor to create product record.... # print (f"Find vendor for {product} {version}") vendor = self.get_vendor(product) if vendor is not None: parsed_data.append(ProductInfo(vendor, product, version)) # print(vendor,product,version) for row in parsed_data: self.sbom_data[row]["default"] = { "remarks": Remarks.NewFound, "comments": "", "severity": "", } self.sbom_data[row]["paths"] = set(map(lambda x: x.strip(), "".split(","))) LOGGER.debug(f"SBOM Data {self.sbom_data}") return self.sbom_data def get_vendor(self, product: str) -> Optional[str]: vendor_package_pair = self.cvedb.get_vendor_product_pairs(product) if vendor_package_pair != []: vendor = vendor_package_pair[0]["vendor"] return vendor return None
def parse_list(self): input_file = self.input_file self.check_file() if not input_file.endswith("requirements.txt"): if distro.id() not in SUPPORTED_DISTROS: LOGGER.warning( f"Package list support only available on {','.join(SUPPORTED_DISTROS)}!" ) return {} system_packages = [] LOGGER.info(f"Scanning {distro.id().capitalize()} package list.") if distro.id() in DEB_DISTROS: installed_packages = run( [ "dpkg-query", "--show", '--showformat={"name": "${binary:Package}", "version": "${Version}"}, ', ], stdout=PIPE, ) installed_packages = json.loads( f"[{installed_packages.stdout.decode('utf-8')[0:-2]}]") elif distro.id() in RPM_DISTROS: installed_packages = run( [ "rpm", "--query", "--all", "--queryformat", '{"name": "%{NAME}", "version": "%{VERSION}"\\}, ', ], stdout=PIPE, ) installed_packages = json.loads( f"[{installed_packages.stdout.decode('utf-8')[0:-2]}]") elif distro.id() in PACMAN_DISTROS: installed_packages = [] installed_packages_output = run( ["pacman", "--query", "--explicit"], stdout=PIPE, ) installed_packages_output = installed_packages_output.stdout.decode( "utf-8").splitlines() dict_keys = ["name", "version"] for installed_package in installed_packages_output: package_details = installed_package.split(" ") installed_package_dict = dict( zip(dict_keys, package_details)) installed_packages.append(installed_package_dict) with open(input_file) as req: lines = req.readlines() for line in lines: system_packages.append(re.split("\n", line)[0]) for installed_package in installed_packages: if installed_package["name"] in system_packages: self.package_names_without_vendor.append(installed_package) else: LOGGER.info("Scanning python package list.") txt_package_names = [] installed_packages_json = run( ["pip", "list", "--format", "json"], stdout=PIPE, ) installed_packages = json.loads( installed_packages_json.stdout.decode("utf-8")) with open(input_file) as txtfile: lines = txtfile.readlines() for line in lines: txt_package_names.append(re.split(">|\\[|;|=|\n", line)[0]) for installed_package in installed_packages: package_name = installed_package["name"].lower() if package_name in txt_package_names: self.package_names_without_vendor.append( installed_package) cve_db = CVEDB() vendor_package_pairs = cve_db.get_vendor_product_pairs( self.package_names_without_vendor) self.add_vendor(vendor_package_pairs) self.parse_data() return self.parsed_data_with_vendor
class VersionScanner: """ "Scans files for CVEs using CVE checkers""" CHECKER_ENTRYPOINT = "cve_bin_tool.checker" def __init__( self, should_extract=False, exclude_folders=[], checkers=None, logger=None, error_mode=ErrorMode.TruncTrace, score=0, ): self.logger = logger or LOGGER.getChild(self.__class__.__name__) # Update egg if installed in development mode if IS_DEVELOP(): self.logger.debug("Updating egg_info") update_egg() # Load checkers if not given self.checkers = checkers or self.load_checkers() self.score = score self.total_scanned_files = 0 self.exclude_folders = exclude_folders + [".git"] self.walker = DirWalk(folder_exclude_pattern=";".join( exclude if exclude.endswith("*") else exclude + "*" for exclude in exclude_folders)).walk self.should_extract = should_extract self.file_stack = [] self.error_mode = error_mode self.cve_db = CVEDB() # self.logger.info("Checkers loaded: %s" % (", ".join(self.checkers.keys()))) @classmethod def load_checkers(cls): """Loads CVE checkers""" checkers = dict( map( lambda checker: (checker.name, checker.load()), importlib_metadata.entry_points()[cls.CHECKER_ENTRYPOINT], )) return checkers @classmethod def available_checkers(cls): checkers = importlib_metadata.entry_points()[cls.CHECKER_ENTRYPOINT] checker_list = [item.name for item in checkers] return checker_list def remove_skiplist(self, skips): # Take out any checkers that are on the skip list # (string of comma-delimited checker names) skiplist = skips for skipme in skiplist: if skipme in self.checkers: del self.checkers[skipme] self.logger.debug(f"Skipping checker: {skipme}") else: self.logger.error( f"Checker {skipme} is not a valid checker name") def print_checkers(self): self.logger.info(f'Checkers: {", ".join(self.checkers.keys())}') def number_of_checkers(self): return len(self.checkers) def is_executable(self, filename): """check if file is an ELF binary file""" output = None if inpath("file"): # use system file if available (for performance reasons) output = subprocess.check_output(["file", filename]) output = output.decode(sys.stdout.encoding) if "cannot open" in output: self.logger.warning( f"Unopenable file {filename} cannot be scanned") return False, None if (("LSB " not in output) and ("LSB shared" not in output) and ("LSB executable" not in output) and ("PE32 executable" not in output) and ("PE32+ executable" not in output) and ("Mach-O" not in output) and ("PKG-INFO: " not in output) and ("METADATA: " not in output) and ("pom.xml" not in output)): return False, None # otherwise use python implementation of file elif not is_binary(filename): return False, None return True, output def parse_strings(self, filename): """parse binary file's strings""" if inpath("strings"): # use "strings" on system if available (for performance) lines = subprocess.check_output(["strings", filename]).decode("utf-8") else: # Otherwise, use python implementation s = Strings(filename) lines = s.parse() return lines def scan_file(self, filename): """Scans a file to see if it contains any of the target libraries, and whether any of those contain CVEs""" self.logger.debug(f"Scanning file: {filename}") self.total_scanned_files += 1 # Do not try to scan symlinks if os.path.islink(filename): return None # Ensure filename is a file if not os.path.isfile(filename): self.logger.debug(f"Invalid file {filename} cannot be scanned") return None # check if it's an ELF binary file is_exec, output = self.is_executable(filename) if not is_exec: return None # parse binary file's strings lines = self.parse_strings(filename) # Check for Java package if output and "pom.xml" in output: java_lines = "\n".join(lines.splitlines()) yield from self.run_java_checker(filename, java_lines) # If python package then strip the lines to avoid detecting other product strings if output and ("PKG-INFO: " in output or "METADATA: " in output): py_lines = "\n".join(lines.splitlines()[:3]) yield from self.run_python_package_checkers(filename, py_lines) yield from self.run_checkers(filename, lines) def find_java_vendor(self, product, version): """Find vendor for Java product""" vendor_package_pair = self.cve_db.get_vendor_product_pairs(product) # If no match, try alternative product name. # Apache product names are stored as A_B in NVD database but often called A-B # Some packages have -parent appended to product which is not in NVD database if vendor_package_pair == [] and "-" in product: self.logger.debug(f"Try alternative product {product}") # Remove parent appendage if "-parent" in product: product = product.replace("-parent", "") product = product.replace("-", "_") vendor_package_pair = self.cve_db.get_vendor_product_pairs(product) if vendor_package_pair != []: vendor = vendor_package_pair[0]["vendor"] file_path = "".join(self.file_stack) self.logger.debug(f"{file_path} {product} {version} by {vendor}") return ProductInfo(vendor, product, version), file_path return None, None def run_java_checker(self, filename, lines): """Process maven pom.xml file and extract product and dependency details""" tree = ET.parse(filename) # Find root element root = tree.getroot() # Extract schema schema = root.tag[:root.tag.find("}") + 1] parent = root.find(schema + "parent") version = None product = None file_path = "".join(self.file_stack) # Parent tag is optional. if parent is None: product = root.find(schema + "artifactId").text version = root.find(schema + "version").text if version is None: version = parent.find(schema + "version").text # Check valid version identifier (i.e. starts with a digit) if not version[0].isdigit(): self.logger.debug(f"Invalid {version} detected in {filename}") version = None if product is None: product = parent.find(schema + "artifactId").text if product is not None and version is not None: product_info, file_path = self.find_java_vendor(product, version) if file_path is not None: yield product_info, file_path # Scan for any dependencies referenced in file dependencies = root.find(schema + "dependencies") if dependencies is not None: for dependency in dependencies.findall(schema + "dependency"): product = dependency.find(schema + "artifactId") if product is not None: version = dependency.find(schema + "version") if version is not None: version = version.text self.logger.debug( f"{file_path} {product.text} {version}") if version[0].isdigit(): # Valid version identifier product_info, file_path = self.find_java_vendor( product.text, version) if file_path is not None: yield product_info, file_path self.logger.debug(f"Done scanning file: {filename}") def run_python_package_checkers(self, filename, lines): """ This generator runs only for python packages. There are no actual checkers. The ProductInfo is computed without the help of any checkers from PKG-INFO or METADATA. """ try: product = search(compile(r"^Name: (.+)$", MULTILINE), lines).group(1) version = search(compile(r"^Version: (.+)$", MULTILINE), lines).group(1) cve_db = CVEDB() vendor_package_pair = cve_db.get_vendor_product_pairs(product) if vendor_package_pair != []: vendor = vendor_package_pair[0]["vendor"] file_path = "".join(self.file_stack) self.logger.info(f"{file_path} is {product} {version}") yield ProductInfo(vendor, product, version), file_path # There are packages with a METADATA file in them containing different data from what the tool expects except AttributeError: self.logger.debug(f"{filename} is an invalid METADATA/PKG-INFO") self.logger.debug(f"Done scanning file: {filename}") def run_checkers(self, filename, lines): # tko for (dummy_checker_name, checker) in self.checkers.items(): checker = checker() result = checker.get_version(lines, filename) # do some magic so we can iterate over all results, even the ones that just return 1 hit if "is_or_contains" in result: results = [dict()] results[0] = result else: results = result for result in results: if "is_or_contains" in result: version = "UNKNOWN" if "version" in result and result["version"] != "UNKNOWN": version = result["version"] elif result["version"] == "UNKNOWN": file_path = "".join(self.file_stack) self.logger.debug( f"{dummy_checker_name} was detected with version UNKNOWN in file {file_path}" ) else: self.logger.error( f"No version info for {dummy_checker_name}") if version != "UNKNOWN": file_path = "".join(self.file_stack) self.logger.debug( f'{file_path} {result["is_or_contains"]} {dummy_checker_name} {version}' ) for vendor, product in checker.VENDOR_PRODUCT: yield ProductInfo(vendor, product, version), file_path self.logger.debug(f"Done scanning file: {filename}") @staticmethod def clean_file_path(filepath): """Returns a cleaner filepath by removing temp path from filepath""" # we'll recieve a filepath similar to # /temp/anything/extractable_filename.extracted/folders/inside/file # We'll return /folders/inside/file to be scanned # start_point is the point from we want to start trimming # len("extracted") = 9 start_point = filepath.find("extracted") + 9 return filepath[start_point:] def scan_and_or_extract_file(self, ectx, filepath): """Runs extraction if possible and desired otherwise scans.""" # Scan the file yield from self.scan_file(filepath) # Attempt to extract the file and scan the contents if ectx.can_extract(filepath): if not self.should_extract: LOGGER.warning( f"{filepath} is an archive. Pass -x option to auto-extract" ) return None for filename in self.walker([ectx.extract(filepath)]): clean_path = self.clean_file_path(filename) self.file_stack.append(f" contains {clean_path}") yield from self.scan_and_or_extract_file(ectx, filename) self.file_stack.pop() def recursive_scan(self, scan_path): with Extractor(logger=self.logger, error_mode=self.error_mode) as ectx: if os.path.isdir(scan_path): for filepath in self.walker([scan_path]): self.file_stack.append(filepath) yield from self.scan_and_or_extract_file(ectx, filepath) self.file_stack.pop() elif os.path.isfile(scan_path): self.file_stack.append(scan_path) yield from self.scan_and_or_extract_file(ectx, scan_path) self.file_stack.pop()