def get_version_map(): """ Read changelog and get SQLITE_SOURCE_ID to use for versions """ version_map = [] changeurl = "https://www.sqlite.org/changes.html" version_pattern = re.compile( r"<h3>\d{4}-\d{2}-\d{2} \((\d+\.\d+[.\d]*)\)</h3>") id_patterns = [ re.compile(r'SQLITE_SOURCE_ID: "([^"]+)"'), re.compile(r'"*(\d{4}-\d{2}-\d{2} \d+:\d+:\d+ [\w]+)"*'), ] try: response = request.urlopen(changeurl) lines = response.readlines() last_version = "UNKNOWN" for line_encoded in lines: line = line_encoded.decode("UTF-8") ver_match = version_pattern.search(line) if ver_match: last_version = ver_match.group(1) for id_pattern in id_patterns: id_match = id_pattern.search(line) if id_match: version_map.append([last_version, id_match.group(1)]) break except error.URLError as err: LOGGER.error("Could not fetch " + changeurl + ", " + str(err)) return version_map
def parse_spdx_xml(self, sbom_file: str) -> List[List[str]]: """parses SPDX XML BOM file extracting package name and version""" # XML is experimental in SPDX 2.2 tree = ET.parse(sbom_file) # Find root element root = tree.getroot() # Extract schema schema = root.tag[: root.tag.find("}") + 1] modules: List[List[str]] = [] for component in root.findall(schema + "packages"): try: package_match = component.find(schema + "name") if package_match is None: raise KeyError(f"Could not find package in {component}") package = package_match.text if package is None: raise KeyError(f"Could not find package in {component}") version_match = component.find(schema + "versionInfo") if version_match is None: raise KeyError(f"Could not find version in {component}") version = version_match.text if version is None: raise KeyError(f"Could not find version in {component}") modules.append([package, version]) except KeyError as e: LOGGER.debug(e, exc_info=True) return modules
def search_version_string(self, matched_list): """finds version strings from matched list""" # TODO: add multiline string finding pattern1 = rf"{self.product_name}(.*){self.version_number}" # ^ this does not work for debian packages # pattern2 = rf"{self.product_name}(.*)([0-9]+[.-][0-9]+([.-][0-9]+)?)" # this matches patterns like: # product1.2.3 # product 1.2.3 # product-1.2.3 # product.1.2.3 # product version 1.2.3 # product v1.2.3(1) version_strings = [ i for i in matched_list if re.search(pattern1, i, re.IGNORECASE) if not i.endswith( ".debug") # removes .debug, so, this does not gets printed ] LOGGER.debug(f"found version-string matches = {version_strings}" ) # TODO: regex highlight in these matched strings? return version_strings
def parse_spdx_rdf(self, sbom_file: str) -> List[List[str]]: """parses SPDX RDF BOM file extracting package name and version""" with open(sbom_file) as f: lines = f.readlines() modules: List[List[str]] = [] package = "" for line in lines: try: if line.strip().startswith("<spdx:name>"): stripped_line = line.strip().rstrip("\n") package_match = re.search( "<spdx:name>(.+?)</spdx:name>", stripped_line ) if not package_match: raise KeyError(f"Could not find package in {stripped_line}") package = package_match.group(1) version = None elif line.strip().startswith("<spdx:versionInfo>"): stripped_line = line.strip().rstrip("\n") version_match = re.search( "<spdx:versionInfo>(.+?)</spdx:versionInfo>", stripped_line ) if not version_match: raise KeyError(f"Could not find version in {stripped_line}") version = version_match.group(1) modules.append([package, version]) except KeyError as e: LOGGER.debug(e, exc_info=True) return modules
def __init__( self, logger: Logger = LOGGER.getChild("NVD_API"), feed=FEED, session=None, page_size: int = PAGESIZE, max_fail: int = MAX_FAIL, interval: int = INTERVAL_PERIOD, error_mode: ErrorMode = ErrorMode.TruncTrace, incremental_update=False, api_key: str = "", ): self.logger = logger or LOGGER.getChild(self.__class__.__name__) self.feed = feed self.session = session self.params: Dict = dict() self.page_size = page_size self.max_fail = max_fail self.interval = interval self.error_mode = error_mode self.incremental_update = incremental_update self.total_results = -1 self.failed_count = 0 self.all_cve_entries: List = [] if api_key: self.params["apiKey"] = api_key
def parse_filename(self, filename): """ returns package_name/product_name from package_filename of types .rpm, .deb, etc. Example: package_filename = openssh-client_8.4p1-5ubuntu1_amd64.deb here, package_name = openssh-client """ # resolving directory names if sys.platform == "win32": filename = filename.split("\\")[-1] else: filename = filename.split("/")[-1] # if extractable, then parsing for different types of files accordingly if self.extractor.can_extract(filename): if filename.endswith(".tar.xz"): product_name = filename.rsplit("-", 3)[0] version_number = filename.rsplit("-", 3)[1] # example: libarchive-3.5.1-1-aarch64.pkg.tar.xz elif filename.endswith(".deb") or filename.endswith(".ipk"): product_name = filename.rsplit("_")[0] version_number = filename.rsplit("_")[1] # example: varnish_6.4.0-3_amd64.deb else: product_name = filename.rsplit("-", 2)[0] version_number = filename.rsplit("-", 2)[1] LOGGER.debug( f"Parsing file '{self.filename}': Results: product_name='{product_name}', version_number='{version_number}'" ) return product_name, version_number else: # raise error for unknown archive types with ErrorHandler(mode=ErrorMode.NoTrace, logger=LOGGER): raise UnknownArchiveType(filename)
def get_data(self, cve_number: str, product: str): try: full_query = f"{RH_CVE_API}/{cve_number}.json" # static https url above response = request.urlopen(full_query).read().decode( "utf-8") # nosec return loads(response) except error.HTTPError as e: LOGGER.debug(e)
def update_json(): """Update the Debian CVE JSON file""" LOGGER.info("Updating Debian CVE JSON file for checking available fixes.") response = request.urlopen(JSON_URL).read().decode( "utf-8") # nosec - static url response = loads(response) with open(DEB_CVE_JSON_PATH, "w") as debian_json: dump(response, debian_json, indent=4) LOGGER.info( "Debian CVE JSON file for checking available fixes is updated.")
def search_pattern(self, file_content, pattern): """find strings for CONTAINS_PATTERNS with product_name in them""" file_content_list = file_content.split("\n") matches = [ i.strip() for i in file_content_list if re.search(pattern, i, re.IGNORECASE) ] LOGGER.debug(f"found matches = {matches}" ) # TODO: regex highlight in these matched strings? return matches
def parse_spdx_json(self, sbom_file: str) -> List[List[str]]: """parses SPDX JSON BOM file extracting package name and version""" data = json.load(open(sbom_file)) modules: List[List[str]] = [] for d in data["packages"]: package = d["name"] try: version = d["versionInfo"] modules.append([package, version]) except KeyError as e: LOGGER.debug(e, exc_info=True) return modules
def check_latest_version(): """Checks for the latest version available at PyPI.""" name = "cve-bin-tool" url = f"https://pypi.org/pypi/{name}/json" try: with request.urlopen(url) as resp: package_json = json.load(resp) pypi_version = package_json["info"]["version"] if pypi_version == VERSION: LOGGER.info( textwrap.dedent(""" ********************************************************* Yay! you are running the latest version. But you can try the latest development version at GitHub. URL: https://github.com/intel/cve-bin-tool ********************************************************* """)) else: # TODO In future mark me with some color ( prefer yellow or red ) LOGGER.info( f"You are running version {VERSION} of {name} but the latest PyPI Version is {pypi_version}." ) if version.parse(VERSION) < version.parse(pypi_version): LOGGER.info( "Alert: We recommend using the latest stable release.") except Exception as error: LOGGER.warning( textwrap.dedent(f""" -------------------------- Can't check for the latest version --------------------------- warning: unable to access 'https://pypi.org/pypi/{name}' Exception details: {error} Please make sure you have a working internet connection or try again later. """))
def cve_info( self, all_cve_data: Dict[ProductInfo, CVEData], ): """Produces the Backported fixes' info""" cve_data = format_output(all_cve_data) json_data = self.get_data() for cve in cve_data: try: cve_fix = json_data[cve["product"]][ cve["cve_number"]]["releases"][self.compute_distro()] if cve_fix["status"] == "resolved": if self.is_backport: if cve_fix["fixed_version"].startswith(cve["version"]): LOGGER.info( f'{cve["product"]}: {cve["cve_number"]} has backported fix in v{cve_fix["fixed_version"]} release.' ) else: LOGGER.info( f'{cve["product"]}: No known backported fix for {cve["cve_number"]}.' ) else: LOGGER.info( f'{cve["product"]}: {cve["cve_number"]} has available fix in v{cve_fix["fixed_version"]} release.' ) except KeyError: if cve["cve_number"] != "UNKNOWN": LOGGER.info( f'{cve["product"]}: No known fix for {cve["cve_number"]}.' )
class TestJSON: # Download the schema SCHEMA = json.loads(urlopen(NVD_SCHEMA).read().decode("utf-8")) LOGGER.info("Schema loaded successfully") @unittest.skipUnless(LONG_TESTS() > 0, "Skipping long tests") @pytest.mark.parametrize("year", list(range(2002, datetime.datetime.now().year + 1))) # NVD database started in 2002, so range then to now. def test_json_validation(self, year): """ Validate latest nvd json file against their published schema """ # Open the latest nvd file on disk with gzip.open( os.path.join(DISK_LOCATION_DEFAULT, f"nvdcve-1.1-{year}.json.gz"), "rb", ) as json_file: nvd_json = json.loads(json_file.read()) LOGGER.info( f"Loaded json for year {year}: nvdcve-1.1-{year}.json.gz") # Validate -- will raise a ValidationError if not valid try: validate(nvd_json, self.SCHEMA) LOGGER.info("Validation complete") except ValidationError as ve: LOGGER.error(ve) pytest.fail("Validation error occurred")
def scan_and_or_extract_file(self, ectx, filepath): """ Runs extraction if possible and desired otherwise scans.""" # Scan the file yield from self.scan_file(filepath) # Attempt to extract the file and scan the contents if ectx.can_extract(filepath): if not self.should_extract: LOGGER.warning( f"{filepath} is an archive. Pass -x option to auto-extract" ) return None for filename in self.walker([ectx.extract(filepath)]): clean_path = self.clean_file_path(filename) self.file_stack.append(f" contains {clean_path}") yield from self.scan_and_or_extract_file(ectx, filename) self.file_stack.pop()
def __init__( self, should_extract=False, exclude_folders=[], checkers=None, logger=None, error_mode=ErrorMode.TruncTrace, score=0, ): self.logger = logger or LOGGER.getChild(self.__class__.__name__) # Update egg if installed in development mode if IS_DEVELOP(): self.logger.info("Updating egg_info") update_egg() # Load checkers if not given self.checkers = checkers or self.load_checkers() self.score = score self.total_scanned_files = 0 self.exclude_folders = exclude_folders + [".git"] self.walker = DirWalk(folder_exclude_pattern=";".join( exclude if exclude.endswith("*") else exclude + "*" for exclude in exclude_folders)).walk self.should_extract = should_extract self.file_stack = [] self.error_mode = error_mode
def __init__(self, filename: str, logger: Logger = None, error_mode=ErrorMode.TruncTrace): self.filename = os.path.abspath(filename) self.logger = logger or LOGGER.getChild(self.__class__.__name__) self.error_mode = error_mode self.parsed_data = defaultdict(dict)
def find_vendor_product(self): """find vendor-product pairs from database""" LOGGER.debug( f"checking for product_name='{self.product_name}' and version_name='{self.version_number}' in the database" ) CVEDB.db_open(self) cursor = self.connection.cursor() # finding out all distinct (vendor, product) pairs with the help of product_name query = """ SELECT distinct vendor, product FROM cve_range WHERE product=(:product); """ cursor.execute(query, {"product": self.product_name}) data = cursor.fetchall() # checking if (vendor, product) was found in the database if data: # warning the user to select the vendor-product pairs manually if multiple pairs are found if len(data) != 1: LOGGER.warning( textwrap.dedent(f""" =============================================================== Multiple ("vendor", "product") pairs found for "{self.product_name}" Please manually select the appropriate pair. =============================================================== """)) return data # [('vendor', 'product')] else: if self.product_name: # removing numeric characters from the product_name if any(char.isdigit() for char in self.product_name): LOGGER.debug( f"removing digits from product_name={self.product_name}" ) self.product_name = "".join( filter(lambda x: not x.isdigit(), self.product_name)) return self.find_vendor_product() else: # raise error and ask for product_name LOGGER.warning( textwrap.dedent(f""" ================================================================= No match was found for "{self.product_name}" in database. Please check your file or try specifying the "product_name" also. ================================================================= """)) return [] CVEDB.db_close(self)
def __init__(self, input_file: str, logger: Logger = None, error_mode=ErrorMode.TruncTrace) -> None: self.input_file = input_file self.logger = logger or LOGGER.getChild(self.__class__.__name__) self.error_mode = error_mode self.parsed_data_without_vendor = defaultdict(dict) self.parsed_data_with_vendor = defaultdict(dict) self.package_names_with_vendor = [] self.package_names_without_vendor = []
def cve_info( self, all_cve_data: Dict[ProductInfo, CVEData], ): """Produces the available fixes' info""" cve_data = format_output(all_cve_data) for cve in cve_data: if cve["cve_number"] != "UNKNOWN": json_data = self.get_data(cve["cve_number"], cve["product"]) try: if not json_data: raise KeyError package_state = json_data["package_state"] affected_releases = json_data["affected_release"] no_fix = True for package in affected_releases: if (package["product_name"] == f"Red Hat Enterprise Linux {self.distro_codename}" ): package_data = self.parse_package_data( package["package"]) LOGGER.info( f'{cve["product"]}: {cve["cve_number"]} - Status: Fixed - Fixed package: {package_data}' ) no_fix = False for package in package_state: if (package["product_name"] == f"Red Hat Enterprise Linux {self.distro_codename}" ): package_data = self.parse_package_data( package["package_name"]) LOGGER.info( f'{cve["product"]}: {cve["cve_number"]} - Status: {package["fix_state"]} - Related package: {package_data}' ) no_fix = False if no_fix: LOGGER.info( f'{cve["product"]}: No known fix for {cve["cve_number"]}.' ) except (KeyError, TypeError): if cve["cve_number"] != "UNKNOWN": LOGGER.info( f'{cve["product"]}: No known fix for {cve["cve_number"]}.' )
def __init__( self, score: int = 0, logger: Logger = None, error_mode: ErrorMode = ErrorMode.TruncTrace, ): self.logger = logger or LOGGER.getChild(self.__class__.__name__) self.error_mode = error_mode self.score = score self.products_with_cve = 0 self.products_without_cve = 0 self.all_cve_data = defaultdict(CVEData)
def __init__( self, filename: str, sbom_type: str = "spdx", logger: Optional[Logger] = None ): self.filename = filename self.sbom_data = defaultdict(dict) self.type = "unknown" if sbom_type in self.SBOMtype: self.type = sbom_type self.logger = logger or LOGGER.getChild(self.__class__.__name__) # Connect to the database self.cvedb = CVEDB(version_check=False)
def check_latest_version(): """Checks for the latest version available at PyPI.""" name: str = "cve-bin-tool" url: str = f"https://pypi.org/pypi/{name}/json" try: with request.urlopen(url) as resp: # nosec - static url above package_json = json.load(resp) pypi_version = package_json["info"]["version"] if pypi_version != VERSION: LOGGER.info( f"[bold red]You are running version {VERSION} of {name} but the latest PyPI Version is {pypi_version}.[/]", extra={"markup": True}, ) if version.parse(VERSION) < version.parse(pypi_version): LOGGER.info( "[bold yellow]Alert: We recommend using the latest stable release.[/]", extra={"markup": True}, ) except Exception as error: LOGGER.warning( textwrap.dedent( f""" -------------------------- Can't check for the latest version --------------------------- warning: unable to access 'https://pypi.org/pypi/{name}' Exception details: {error} Please make sure you have a working internet connection or try again later. """ ) )
def extract_and_parse_file(self, filename): """extracts and parses the file for common patterns, version strings and common filename patterns""" with self.extractor as ectx: if ectx.can_extract(filename): binary_string_list = [] for filepath in self.walker([ectx.extract(filename)]): clean_path = self.version_scanner.clean_file_path(filepath) LOGGER.debug(f"checking whether {clean_path} is binary") # see if the file is ELF binary file and parse for strings is_exec = self.version_scanner.is_executable(filepath)[0] if is_exec: LOGGER.debug( f"{clean_path} <--- this is an ELF binary") file_content = self.version_scanner.parse_strings( filepath) matches = self.search_pattern(file_content, self.product_name) # searching for version strings in the found matches version_string = self.search_version_string(matches) self.version_pattern += version_string # if version string is found in file, append it to filename_pattern if version_string: if sys.platform == "win32": self.filename_pattern.append( filepath.split("\\")[-1]) else: self.filename_pattern.append( filepath.split("/")[-1]) LOGGER.info( f"matches for {self.product_name} found in {clean_path}" ) binary_string_list += matches for i in matches: if ("/" not in i and "!" not in i ) and len(i) > self.string_length: self.contains_patterns.append(i) LOGGER.debug(f"{self.filename_pattern}") # to resolve case when there are no strings common with product_name in them if self.contains_patterns: return self.contains_patterns return binary_string_list
def check_available_fix(self): if self.distro_info != "local": distro_name, distro_codename = self.distro_info.split("-") else: distro_name = distro.id() distro_codename = distro.codename() if distro_name in DEBIAN_DISTROS: debian_tracker = DebianCVETracker(distro_name, distro_codename, self.is_backport) debian_tracker.cve_info(self.all_cve_data) elif distro_name in REDHAT_DISTROS: redhat_tracker = RedhatCVETracker(distro_name, distro_codename) redhat_tracker.cve_info(self.all_cve_data) elif self.is_backport: LOGGER.info( f"CVE Binary Tool doesn't support Backported Fix Utility for {distro_name.capitalize()} at the moment." ) else: LOGGER.info( f"CVE Binary Tool doesn't support Available Fix Utility for {distro_name.capitalize()} at the moment." )
def __init__(self, logger=None): """Sets up logger and if we should extract files or just report""" if logger is None: logger = LOGGER.getChild(self.__class__.__name__) self.logger = logger self.file_extractors = { self.extract_file_tar: [".tgz", ".tar.gz", ".tar", ".tar.xz", ".tar.bz2"], self.extract_file_rpm: [".rpm"], self.extract_file_deb: [".deb", ".ipk"], self.extract_file_cab: [".cab"], self.extract_file_zip: [".exe", ".zip", ".jar", ".apk"], }
def scan_file(self) -> Dict[ProductInfo, TriageData]: LOGGER.info(f"Processing SBOM {self.filename} of type {self.type.upper()}") try: if self.type == "spdx": spdx = SPDXParser() modules = spdx.parse(self.filename) elif self.type == "cyclonedx": cyclone = CycloneParser() modules = cyclone.parse(self.filename) elif self.type == "swid": swid = SWIDParser() modules = swid.parse(self.filename) else: modules = [] except (KeyError, FileNotFoundError, ET.ParseError) as e: LOGGER.debug(e, exc_info=True) modules = [] LOGGER.debug( f"The number of modules identified in SBOM - {len(modules)}\n{modules}" ) # Now process list of modules to create [vendor, product, version] tuples parsed_data: List[ProductInfo] = [] for m in modules: product, version = m[0], m[1] if version != "": # Now add vendor to create product record.... # print (f"Find vendor for {product} {version}") vendor = self.get_vendor(product) if vendor is not None: parsed_data.append(ProductInfo(vendor, product, version)) # print(vendor,product,version) for row in parsed_data: self.sbom_data[row]["default"] = { "remarks": Remarks.NewFound, "comments": "", "severity": "", } self.sbom_data[row]["paths"] = set(map(lambda x: x.strip(), "".split(","))) LOGGER.debug(f"SBOM Data {self.sbom_data}") return self.sbom_data
def main(argv=None): logger = LOGGER.getChild("CSV2CVE") argv = argv or sys.argv if len(argv) < 2: with ErrorHandler(logger=logger): raise InsufficientArgs("csv file required") flag = False for idx, arg in enumerate(argv): if arg.endswith(".csv"): argv[idx] = f"-i={arg}" flag = True if flag: return cli.main(argv) else: with ErrorHandler(logger=logger): raise InsufficientArgs("csv file required")
def test_json_validation(self, year): """ Validate latest nvd json file against their published schema """ # Open the latest nvd file on disk with gzip.open( os.path.join(DISK_LOCATION_DEFAULT, f"nvdcve-1.1-{year}.json.gz"), "rb", ) as json_file: nvd_json = json.loads(json_file.read()) LOGGER.info( f"Loaded json for year {year}: nvdcve-1.1-{year}.json.gz") # Validate -- will raise a ValidationError if not valid try: validate(nvd_json, self.SCHEMA) LOGGER.info("Validation complete") except ValidationError as ve: LOGGER.error(ve) pytest.fail("Validation error occurred")
class CVEDB: """ Downloads NVD data in json form and stores it on disk in a cache. """ CACHEDIR = DISK_LOCATION_DEFAULT FEED = "https://nvd.nist.gov/vuln/data-feeds" LOGGER = LOGGER.getChild("CVEDB") NVDCVE_FILENAME_TEMPLATE = "nvdcve-1.1-{}.json.gz" CURL_CVE_FILENAME_TEMPLATE = "curlcve-{}.json" META_LINK = "https://nvd.nist.gov" META_REGEX = re.compile(r"\/feeds\/json\/.*-[0-9]*\.[0-9]*-[0-9]*\.meta") RANGE_UNSET = "" def __init__( self, feed=None, cachedir=None, version_check=True, session=None, error_mode=ErrorMode.TruncTrace, ): self.feed = feed if feed is not None else self.FEED self.cachedir = cachedir if cachedir is not None else self.CACHEDIR self.error_mode = error_mode # Will be true if refresh was successful self.was_updated = False # version update self.version_check = version_check # set up the db if needed self.dbpath = os.path.join(self.cachedir, DBNAME) self.connection = None self.session = session self.cve_count = -1 def get_cve_count(self): if self.cve_count == -1: # Force update self.check_cve_entries() return self.cve_count def get_db_update_date(self): return os.path.getmtime(self.dbpath) async def getmeta(self, session, meta_url): async with session.get(meta_url) as response: response.raise_for_status() return ( meta_url.replace(".meta", ".json.gz"), dict([ line.split(":", maxsplit=1) for line in (await response.text()).splitlines() if ":" in line ]), ) async def nist_scrape(self, session): async with session.get(self.feed) as response: response.raise_for_status() page = await response.text() json_meta_links = self.META_REGEX.findall(page) return dict(await asyncio.gather(*[ self.getmeta(session, f"{self.META_LINK}{meta_url}") for meta_url in json_meta_links ])) async def cache_update(self, session, url, sha, chunk_size=16 * 1024): """ Update the cache for a single year of NVD data. """ filename = url.split("/")[-1] # Ensure we only write to files within the cachedir filepath = os.path.abspath(os.path.join(self.cachedir, filename)) if not filepath.startswith(os.path.abspath(self.cachedir)): with ErrorHandler(mode=self.error_mode, logger=self.LOGGER): raise AttemptedToWriteOutsideCachedir(filepath) # Validate the contents of the cached file if os.path.isfile(filepath): # Validate the sha and write out sha = sha.upper() calculate = hashlib.sha256() async with GzipFile(filepath, "rb") as f: chunk = await f.read(chunk_size) while chunk: calculate.update(chunk) chunk = await f.read(chunk_size) # Validate the sha and exit if it is correct, otherwise update gotsha = calculate.hexdigest().upper() if gotsha != sha: os.unlink(filepath) self.LOGGER.warning( f"SHA mismatch for {filename} (have: {gotsha}, want: {sha})" ) else: self.LOGGER.debug(f"Correct SHA for {filename}") return self.LOGGER.debug(f"Updating CVE cache for {filename}") async with session.get(url) as response: # Raise better error message on ratelimit by NVD if response.status == 403: with ErrorHandler(mode=self.error_mode, logger=self.LOGGER): raise NVDRateLimit( f"{url} : download failed, you may have been rate limited." ) # Raise for all other 4xx errors response.raise_for_status() gzip_data = await response.read() json_data = gzip.decompress(gzip_data) gotsha = hashlib.sha256(json_data).hexdigest().upper() async with FileIO(filepath, "wb") as filepath_handle: await filepath_handle.write(gzip_data) # Raise error if there was an issue with the sha if gotsha != sha: # Remove the file if there was an issue # exit(100) os.unlink(filepath) with ErrorHandler(mode=self.error_mode, logger=self.LOGGER): raise SHAMismatch(f"{url} (have: {gotsha}, want: {sha})") @staticmethod async def get_curl_versions(session): regex = re.compile(r"vuln-(\d+.\d+.\d+)\.html") async with session.get( "https://curl.haxx.se/docs/vulnerabilities.html") as response: response.raise_for_status() html = await response.text() matches = regex.finditer(html) return [match.group(1) for match in matches] async def download_curl_version(self, session, version): async with session.get( f"https://curl.haxx.se/docs/vuln-{version}.html") as response: response.raise_for_status() html = await response.text() soup = BeautifulSoup(html, "html.parser") table = soup.find("table") if not table: return headers = table.find_all("th") headers = list(map(lambda x: x.text.strip().lower(), headers)) self.LOGGER.debug(headers) rows = table.find_all("tr") json_data = [] for row in rows: cols = row.find_all("td") values = (ele.text.strip() for ele in cols) data = dict(zip(headers, values)) if data: json_data.append(data) filepath = os.path.abspath( os.path.join(self.cachedir, f"curlcve-{version}.json")) async with FileIO(filepath, "w") as f: await f.write(json.dumps(json_data, indent=4)) async def refresh(self): """ Refresh the cve database and check for new version. """ # refresh the database if not os.path.isdir(self.cachedir): os.makedirs(self.cachedir) # check for the latest version if self.version_check: self.LOGGER.info("Checking if there is a newer version.") check_latest_version() if not self.session: connector = aiohttp.TCPConnector(limit_per_host=19) self.session = aiohttp.ClientSession(connector=connector, trust_env=True) self.LOGGER.info("Downloading CVE data...") nvd_metadata, curl_metadata = await asyncio.gather( self.nist_scrape(self.session), self.get_curl_versions(self.session)) tasks = [ self.cache_update(self.session, url, meta["sha256"]) for url, meta in nvd_metadata.items() if meta is not None ] # We use gather to create a single task from a set of tasks # which download CVEs for each version of curl. Otherwise # the progress bar would show that we are closer to # completion than we think, because lots of curl CVEs (for # each version) have been downloaded tasks.append( asyncio.gather(*[ self.download_curl_version(self.session, version) for version in curl_metadata ])) total_tasks = len(tasks) # error_mode.value will only be greater than 1 if quiet mode. if self.error_mode.value > 1: iter_tasks = track( asyncio.as_completed(tasks), description="Downloading CVEs...", total=total_tasks, ) else: iter_tasks = asyncio.as_completed(tasks) for task in iter_tasks: await task self.was_updated = True await self.session.close() self.session = None def refresh_cache_and_update_db(self): self.LOGGER.info("Updating CVE data. This will take a few minutes.") # refresh the nvd cache run_coroutine(self.refresh()) # if the database isn't open, open it self.init_database() self.populate_db() def get_cvelist_if_stale(self): """Update if the local db is more than one day old. This avoids the full slow update with every execution. """ if not os.path.isfile(self.dbpath) or ( datetime.datetime.today() - datetime.datetime.fromtimestamp(os.path.getmtime( self.dbpath))) > datetime.timedelta(hours=24): self.refresh_cache_and_update_db() else: self.LOGGER.info( "Using cached CVE data (<24h old). Use -u now to update immediately." ) def latest_schema(self, cursor): """ Check database is using latest schema """ self.LOGGER.info("Check database is using latest schema") schema_check = "SELECT * FROM cve_severity WHERE 1=0" result = cursor.execute(schema_check) schema_latest = False # Look through column names and check for column added in latest schema for col_name in result.description: if col_name[0] == "description": schema_latest = True return schema_latest def check_cve_entries(self): """ Report if database has some CVE entries """ self.db_open() cursor = self.connection.cursor() cve_entries_check = "SELECT COUNT(*) FROM cve_severity" cursor.execute(cve_entries_check) # Find number of entries cve_entries = cursor.fetchone()[0] self.LOGGER.info( f"There are {cve_entries} CVE entries in the database") self.db_close() self.cve_count = cve_entries return cve_entries > 0 def init_database(self): """ Initialize db tables used for storing cve/version data """ self.db_open() cursor = self.connection.cursor() cve_data_create = """ CREATE TABLE IF NOT EXISTS cve_severity ( cve_number TEXT, severity TEXT, description TEXT, score INTEGER, cvss_version INTEGER, PRIMARY KEY(cve_number) ) """ version_range_create = """ CREATE TABLE IF NOT EXISTS cve_range ( cve_number TEXT, vendor TEXT, product TEXT, version TEXT, versionStartIncluding TEXT, versionStartExcluding TEXT, versionEndIncluding TEXT, versionEndExcluding TEXT, FOREIGN KEY(cve_number) REFERENCES cve_severity(cve_number) ) """ index_range = "CREATE INDEX IF NOT EXISTS product_index ON cve_range (cve_number, vendor, product)" cursor.execute(cve_data_create) cursor.execute(version_range_create) cursor.execute(index_range) # Check that latest schema is being used if not self.latest_schema(cursor): # Recreate table using latest schema self.LOGGER.info("Upgrading database to latest schema") cursor.execute("DROP TABLE cve_severity") cursor.execute(cve_data_create) self.clear_cached_data() self.connection.commit() def populate_db(self): """Function that populates the database from the JSON. WARNING: After some inspection of the data, we are assuming that start/end ranges are kept together in single nodes. This isn't *required* by the json so may not be true everywhere. If that's the case, we'll need a better parser to match those together. """ self.db_open() cursor = self.connection.cursor() insert_severity = """ INSERT or REPLACE INTO cve_severity( CVE_number, severity, description, score, cvss_version ) VALUES (?, ?, ?, ?, ?) """ insert_cve_range = """ INSERT or REPLACE INTO cve_range( cve_number, vendor, product, version, versionStartIncluding, versionStartExcluding, versionEndIncluding, versionEndExcluding ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """ del_cve_range = "DELETE from cve_range where CVE_number=?" # error_mode.value will only be greater than 1 if quiet mode. if self.error_mode.value > 1: years = track(self.nvd_years(), description="Updating CVEs from NVD...") else: years = self.nvd_years() for year in years: cve_data = self.load_nvd_year(year) self.LOGGER.debug( f'Time = {datetime.datetime.today().strftime("%H:%M:%S")}') for cve_item in cve_data["CVE_Items"]: # the information we want: # CVE ID, Severity, Score -> # affected {Vendor(s), Product(s), Version(s)} cve = { "ID": cve_item["cve"]["CVE_data_meta"]["ID"], "description": cve_item["cve"]["description"]["description_data"][0] ["value"], "severity": "unknown", "score": "unknown", "CVSS_version": "unknown", } # Get CVSSv3 or CVSSv2 score for output. # Details are left as an exercise to the user. if "baseMetricV3" in cve_item["impact"]: cve["severity"] = cve_item["impact"]["baseMetricV3"][ "cvssV3"]["baseSeverity"] cve["score"] = cve_item["impact"]["baseMetricV3"][ "cvssV3"]["baseScore"] cve["CVSS_version"] = 3 elif "baseMetricV2" in cve_item["impact"]: cve["severity"] = cve_item["impact"]["baseMetricV2"][ "severity"] cve["score"] = cve_item["impact"]["baseMetricV2"][ "cvssV2"]["baseScore"] cve["CVSS_version"] = 2 # self.LOGGER.debug( # "Severity: {} ({}) v{}".format( # CVE["severity"], CVE["score"], CVE["CVSS_version"] # ) # ) cursor.execute( insert_severity, [ cve["ID"], cve["severity"], cve["description"], cve["score"], cve["CVSS_version"], ], ) # Delete any old range entries for this CVE_number cursor.execute(del_cve_range, (cve["ID"], )) # walk the nodes with version data # return list of versions affects_list = [] if "configurations" in cve_item: for node in cve_item["configurations"]["nodes"]: # self.LOGGER.debug("NODE: {}".format(node)) affects_list.extend(self.parse_node(node)) if "children" in node: for child in node["children"]: affects_list.extend(self.parse_node(child)) # self.LOGGER.debug("Affects: {}".format(affects_list)) cursor.executemany( insert_cve_range, [( cve["ID"], affected["vendor"], affected["product"], affected["version"], affected["versionStartIncluding"], affected["versionStartExcluding"], affected["versionEndIncluding"], affected["versionEndExcluding"], ) for affected in affects_list], ) self.connection.commit() # supplemental data gets added here self.supplement_curl() self.db_close() def parse_node(self, node): affects_list = [] if "cpe_match" in node: for cpe_match in node["cpe_match"]: # self.LOGGER.debug(cpe_match["cpe23Uri"]) cpe_split = cpe_match["cpe23Uri"].split(":") affects = { "vendor": cpe_split[3], "product": cpe_split[4], "version": cpe_split[5], } # self.LOGGER.debug( # "Vendor: {} Product: {} Version: {}".format( # affects["vendor"], affects["product"], affects["version"] # ) # ) # if we have a range (e.g. version is *) fill it out, and put blanks where needed range_fields = [ "versionStartIncluding", "versionStartExcluding", "versionEndIncluding", "versionEndExcluding", ] for field in range_fields: if field in cpe_match: affects[field] = cpe_match[field] else: affects[field] = self.RANGE_UNSET affects_list.append(affects) return affects_list def supplement_curl(self): """ Get additional CVE data directly from the curl website amd add it to the cvedb """ self.db_open() insert_cve_range = """ INSERT or REPLACE INTO cve_range( cve_number, vendor, product, version, versionStartIncluding, versionStartExcluding, versionEndIncluding, versionEndExcluding ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """ cursor = self.connection.cursor() # No need to track this. It is very fast! for version in self.curl_versions(): cve_list = self.load_curl_version(version) # for cve in cve_list: cursor.executemany( insert_cve_range, [( cve["cve"], "haxx", "curl", version, cve["from version"], "", cve["to and including"], "", ) for cve in cve_list], ) self.connection.commit() def load_nvd_year(self, year): """ Return the dict of CVE data for the given year. """ filename = os.path.join(self.cachedir, self.NVDCVE_FILENAME_TEMPLATE.format(year)) # Check if file exists if not os.path.isfile(filename): with ErrorHandler(mode=self.error_mode, logger=self.LOGGER): raise CVEDataForYearNotInCache(year) # Open the file and load the JSON data, log the number of CVEs loaded with gzip.open(filename, "rb") as fileobj: cves_for_year = json.load(fileobj) self.LOGGER.debug( f'Year {year} has {len(cves_for_year["CVE_Items"])} CVEs in dataset' ) return cves_for_year def nvd_years(self): """ Return the years we have NVD data for. """ return sorted([ int(filename.split(".")[-3].split("-")[-1]) for filename in glob.glob( os.path.join(self.cachedir, "nvdcve-1.1-*.json.gz")) ]) def load_curl_version(self, version): """ Return the dict of CVE data for the given curl version. """ filename = os.path.join( self.cachedir, self.CURL_CVE_FILENAME_TEMPLATE.format(version)) # Check if file exists if not os.path.isfile(filename): with ErrorHandler(mode=self.error_mode, logger=self.LOGGER): raise CVEDataForCurlVersionNotInCache(version) # Open the file and load the JSON data, log the number of CVEs loaded with open(filename, "rb") as fileobj: cves_for_version = json.load(fileobj) self.LOGGER.debug( f"Curl Version {version} has {len(cves_for_version)} CVEs in dataset" ) return cves_for_version def curl_versions(self): """ Return the versions we have Curl data for. """ regex = re.compile(r"curlcve-(\d+.\d+.\d).json") return [ regex.search(filename).group(1) for filename in glob.glob( os.path.join(self.cachedir, "curlcve-*.json")) ] def clear_cached_data(self): if os.path.exists(self.cachedir): self.LOGGER.warning(f"Deleting cachedir {self.cachedir}") shutil.rmtree(self.cachedir) # Remove files associated with pre-1.0 development tree if os.path.exists(OLD_CACHE_DIR): self.LOGGER.warning(f"Deleting old cachedir {OLD_CACHE_DIR}") shutil.rmtree(OLD_CACHE_DIR) def db_open(self): """ Opens connection to sqlite database.""" if not self.connection: self.connection = sqlite3.connect(self.dbpath) def db_close(self): """ Closes connection to sqlite database.""" if self.connection: self.connection.close() self.connection = None
class CVEDB(object): """ Downloads NVD data in json form and stores it on disk in a cache. """ CACHEDIR = os.path.join(os.path.expanduser("~"), ".cache", "cvedb") FEED = "https://nvd.nist.gov/vuln/data-feeds" LOGGER = LOGGER.getChild("CVEDB") NVDCVE_FILENAME_TEMPLATE = "nvdcve-1.1-{}.json" META_REGEX = re.compile( r"https:\/\/.*\/json\/.*-[0-9]*\.[0-9]*-[0-9]*\.meta") RANGE_UNSET = "" def __init__(self, verify=True, feed=None, cachedir=None): self.verify = verify self.feed = feed if feed is not None else self.FEED self.cachedir = cachedir if cachedir is not None else self.CACHEDIR # Will be true if refresh was successful self.was_updated = False # set up the db if needed self.disk_location = DISK_LOCATION_DEFAULT self.dbname = os.path.join(self.disk_location, DBNAME) self.connection = None def nist_scrape(self, feed): with contextlib.closing(request.urlopen(feed)) as response: page = response.read().decode() jsonmetalinks = self.META_REGEX.findall(page) pool = multiprocessing.Pool() try: metadata = dict( pool.map(functools.partial(log_traceback, getmeta), tuple(jsonmetalinks))) pool.close() return metadata except: pool.terminate() raise finally: pool.join() def init_database(self): """ Initialize db tables used for storing cve/version data """ conn = sqlite3.connect(self.dbname) db_cursor = conn.cursor() cve_data_create = """CREATE TABLE IF NOT EXISTS cve_severity ( cve_number TEXT, severity TEXT, score INTEGER, cvss_version INTEGER, PRIMARY KEY(cve_number) ) """ db_cursor.execute(cve_data_create) version_range_create = """ CREATE TABLE IF NOT EXISTS cve_range ( cve_number TEXT, vendor TEXT, product TEXT, version TEXT, versionStartIncluding TEXT, versionStartExcluding TEXT, versionEndIncluding TEXT, versionEndExcluding TEXT ) """ db_cursor.execute(version_range_create) index_range = """CREATE INDEX IF NOT EXISTS product_index ON cve_range (cve_number, vendor, product)""" db_cursor.execute(index_range) conn.commit() return conn def open(self): """ Opens connection to sqlite database.""" self.connection = sqlite3.connect(self.dbname, check_same_thread=False) def close(self): """ Closes connection to sqlite database.""" self.connection.close() self.connection = None def __enter__(self): """ Opens connection to sqlite database.""" self.open() def __exit__(self, exc_type, exc, exc_tb): """ Closes connection to sqlite database.""" self.close() def get_cvelist_if_stale(self): """ Update if the local db is more than one day old. This avoids the full slow update with every execution. """ if not os.path.isfile(self.dbname) or ( datetime.datetime.today() - datetime.datetime.fromtimestamp(os.path.getmtime( self.dbname))) > datetime.timedelta(hours=24): self.refresh_cache_and_update_db() else: self.LOGGER.info( "Using cached CVE data (<24h old). Use -u now to update immediately." ) def refresh_cache_and_update_db(self): self.LOGGER.info("Updating CVE data. This will take a few minutes.") # refresh the nvd cache self.refresh() # if the database isn't open, open it if self.connection is None: self.connection = self.init_database() self.populate_db() def get_cves(self, vendor, product, version): """ Get CVEs against a specific version of a package. Example: nvd.get_cves('haxx', 'curl', '7.34.0') """ if self.connection is None: self.open() cursor = self.connection.cursor() # Check for anything directly marked query = """SELECT CVE_number FROM cve_range WHERE vendor=? AND product=? AND version=?""" cursor.execute(query, [vendor, product, version]) cve_list = list(map(lambda x: x[0], cursor.fetchall())) # Check for any ranges query = """SELECT CVE_number, versionStartIncluding, versionStartExcluding, versionEndIncluding, versionEndExcluding FROM cve_range WHERE vendor=? AND product=? AND version=?""" cursor.execute(query, [vendor, product, "*"]) for cve_range in cursor: ( cve_number, versionStartIncluding, versionStartExcluding, versionEndIncluding, versionEndExcluding, ) = cve_range # pep-440 doesn't include versions of the type 1.1.0g used by openssl # so if this is openssl, convert the last letter to a .number if product == "openssl": # if last character is a letter, convert it to .number version = self.openssl_convert(version) versionStartIncluding = self.openssl_convert( versionStartIncluding) versionStartExcluding = self.openssl_convert( versionStartExcluding) versionEndIncluding = self.openssl_convert(versionEndIncluding) versionEndExcluding = self.openssl_convert(versionEndExcluding) parsed_version = parse_version(version) # check the start range passes_start = False if (versionStartIncluding is not self.RANGE_UNSET and parsed_version >= parse_version(versionStartIncluding)): passes_start = True if (versionStartExcluding is not self.RANGE_UNSET and parsed_version > parse_version(versionStartExcluding)): passes_start = True if (versionStartIncluding is self.RANGE_UNSET and versionStartExcluding is self.RANGE_UNSET): # then there is no start range so just say true passes_start = True # check the end range passes_end = False if (versionEndIncluding is not self.RANGE_UNSET and parsed_version <= parse_version(versionEndIncluding)): passes_end = True if (versionEndExcluding is not self.RANGE_UNSET and parsed_version < parse_version(versionEndExcluding)): passes_end = True if (versionEndIncluding is self.RANGE_UNSET and versionEndExcluding is self.RANGE_UNSET): # then there is no end range so it passes passes_end = True # if it fits into both ends of the range, add the cve number if passes_start and passes_end: cve_list.append(cve_number) # Go through and get all the severities if cve_list: query = f'SELECT CVE_number, severity from cve_severity where CVE_number IN ({",".join(["?"]*len(cve_list))}) ORDER BY CVE_number ASC' cursor.execute(query, cve_list) # Everything expects a data structure of cve[number] = severity so you can search through keys # and do other easy manipulations return dict(cursor) return cve_list def openssl_convert(self, version): """ pkg_resources follows pep-440 which doesn't expect openssl style 1.1.0g version numbering So to fake it, if the last character is a letter, replace it with .number before comparing """ if len(version) < 1: return version lastchar = version[len(version) - 1] letters = dict(zip(ascii_lowercase, range(26))) if lastchar in letters: version = f"{version[0 : len(version) - 1]}.{letters[lastchar]}" return version def populate_db(self): """ Function that populates the database from the JSON. WARNING: After some inspection of the data, we are assuming that start/end ranges are kept together in single nodes. This isn't *required* by the json so may not be true everywhere. If that's the case, we'll need a better parser to match those together. """ if self.connection is None: self.connection = self.open() cursor = self.connection.cursor() # Do only years with updates? for year in self.years(): cve_data = self.year(year) self.LOGGER.debug( f'Time = {datetime.datetime.today().strftime("%H:%M:%S")}') for cve_item in cve_data["CVE_Items"]: # the information we want: # CVE ID, Severity, Score -> # affected {Vendor(s), Product(s), Version(s)} CVE = dict() CVE["ID"] = cve_item["cve"]["CVE_data_meta"]["ID"] # Get CVSSv3 or CVSSv2 score for output. # Details are left as an exercise to the user. CVE["severity"] = "unknown" CVE["score"] = "unknown" CVE["CVSS_version"] = "unknown" if "baseMetricV3" in cve_item["impact"]: CVE["severity"] = cve_item["impact"]["baseMetricV3"][ "cvssV3"]["baseSeverity"] CVE["score"] = cve_item["impact"]["baseMetricV3"][ "cvssV3"]["baseScore"] CVE["CVSS_version"] = 3 elif "baseMetricV2" in cve_item["impact"]: CVE["severity"] = cve_item["impact"]["baseMetricV2"][ "severity"] CVE["score"] = cve_item["impact"]["baseMetricV2"][ "cvssV2"]["baseScore"] CVE["CVSS_version"] = 2 # self.LOGGER.debug( # "Severity: {} ({}) v{}".format( # CVE["severity"], CVE["score"], CVE["CVSS_version"] # ) # ) q = "INSERT or REPLACE INTO cve_severity(CVE_number, severity, score, cvss_version) \ VALUES (?, ?, ?, ?)" cursor.execute(q, [ CVE["ID"], CVE["severity"], CVE["score"], CVE["CVSS_version"] ]) # Delete any old range entries for this CVE_number q_del = "DELETE from cve_range where CVE_number=?" cursor.execute(q_del, (CVE["ID"], )) # walk the nodes with version data # return list of versions affects_list = [] if "configurations" in cve_item: for node in cve_item["configurations"]["nodes"]: # self.LOGGER.debug("NODE: {}".format(node)) affects_list.extend(self.parse_node(node)) if "children" in node: for child in node["children"]: affects_list.extend(self.parse_node(child)) # self.LOGGER.debug("Affects: {}".format(affects_list)) q = "INSERT or REPLACE INTO cve_range(cve_number, vendor, product, version, versionStartIncluding, versionStartExcluding, versionEndIncluding, versionEndExcluding) \ VALUES (?, ?, ?, ?, ?, ?, ?, ?)" for affected in affects_list: cursor.execute( q, [ CVE["ID"], affected["vendor"], affected["product"], affected["version"], affected["versionStartIncluding"], affected["versionStartExcluding"], affected["versionEndIncluding"], affected["versionEndExcluding"], ], ) self.connection.commit() # supplemental data gets added here self.supplement_curl() def parse_node(self, node): affects_list = [] if "cpe_match" in node: for cpe_match in node["cpe_match"]: # self.LOGGER.debug(cpe_match["cpe23Uri"]) cpe_split = cpe_match["cpe23Uri"].split(":") affects = dict() affects["vendor"] = cpe_split[3] affects["product"] = cpe_split[4] affects["version"] = cpe_split[5] # self.LOGGER.debug( # "Vendor: {} Product: {} Version: {}".format( # affects["vendor"], affects["product"], affects["version"] # ) # ) # if we have a range (e.g. version is *) fill it out, and put blanks where needed range_fields = [ "versionStartIncluding", "versionStartExcluding", "versionEndIncluding", "versionEndExcluding", ] for field in range_fields: if field in cpe_match: affects[field] = cpe_match[field] else: affects[field] = self.RANGE_UNSET affects_list.append(affects) return affects_list def refresh(self): if not os.path.isdir(self.cachedir): os.makedirs(self.cachedir) update = self.nist_scrape(self.feed) pool = multiprocessing.Pool() try: for result in [ pool.apply_async( functools.partial(log_traceback, cache_update), (self.cachedir, url, meta["sha256"]), ) for url, meta in update.items() ]: result.get() pool.close() self.was_updated = True except: pool.terminate() raise finally: pool.join() def supplement_curl(self): """ Get additional CVE data directly from the curl website amd add it to the cvedb """ if not self.connection: self.open() cursor = self.connection.cursor() cve_pattern = re.compile('name=(CVE-[^"]*)') nextver_pattern = re.compile(r"the subsequent release: ([\d.]+)") # 6.0 is the oldest available so start there version = "6.0" cve_dict = {} while version: # get data from curl.haxx.se and parse url = f"https://curl.haxx.se/docs/vuln-{version}.html" response = request.urlopen(url) html = response.read() text = html.decode("utf-8") # insert each CVE separately into the range table # note: no deduplication against existing data cves = re.findall(cve_pattern, text) query = "INSERT INTO cve_range (CVE_Number, vendor, product, version) VALUES (?, ?, ?, ?)" for cve_number in cves: cursor.execute(query, [cve_number, "haxx", "curl", version]) # check for next page of vulnerabilities nextversion = re.findall(nextver_pattern, text) if nextversion: version = nextversion[0] else: version = None self.connection.commit() def year(self, year): """ Return the dict of CVE data for the given year. """ filename = os.path.join(self.cachedir, self.NVDCVE_FILENAME_TEMPLATE.format(year)) # Check if file exists if not os.path.isfile(filename): raise CVEDataForYearNotInCache(year) # Open the file and load the JSON data, log the number of CVEs loaded with open(filename, "rb") as fileobj: cves_for_year = json.load(fileobj) self.LOGGER.debug( f'Year {year} has {len(cves_for_year["CVE_Items"])} CVEs in dataset' ) return cves_for_year def years(self): """ Return the years we have NVD data for. """ return sorted([ int(filename.split(".")[-2].split("-")[-1]) for filename in glob.glob( os.path.join(self.cachedir, "nvdcve-1.1-*.json")) ]) def __enter__(self): if not self.verify: self.LOGGER.error("Not verifying CVE DB cache") if not self.years(): raise EmptyCache(self.cachedir) self.LOGGER.debug(f"Years present: {self.years()}") return self def __exit__(self, _exc_type, _exc_value, _traceback): pass def clear_cached_data(self): if os.path.exists(self.cachedir): self.LOGGER.warning(f"Deleting cachedir {self.cachedir}") shutil.rmtree(self.cachedir)