コード例 #1
0
ファイル: nvd_api.py プロジェクト: intel/cve-bin-tool
 def __init__(
     self,
     logger: Logger = LOGGER.getChild("NVD_API"),
     feed=FEED,
     session=None,
     page_size: int = PAGESIZE,
     max_fail: int = MAX_FAIL,
     interval: int = INTERVAL_PERIOD,
     error_mode: ErrorMode = ErrorMode.TruncTrace,
     incremental_update=False,
     api_key: str = "",
 ):
     self.logger = logger or LOGGER.getChild(self.__class__.__name__)
     self.feed = feed
     self.session = session
     self.params: Dict = dict()
     self.page_size = page_size
     self.max_fail = max_fail
     self.interval = interval
     self.error_mode = error_mode
     self.incremental_update = incremental_update
     self.total_results = -1
     self.failed_count = 0
     self.all_cve_entries: List = []
     if api_key:
         self.params["apiKey"] = api_key
コード例 #2
0
    def __init__(
        self,
        should_extract=False,
        exclude_folders=[],
        checkers=None,
        logger=None,
        error_mode=ErrorMode.TruncTrace,
        score=0,
    ):
        self.logger = logger or LOGGER.getChild(self.__class__.__name__)
        # Update egg if installed in development mode
        if IS_DEVELOP():
            self.logger.info("Updating egg_info")
            update_egg()

        # Load checkers if not given
        self.checkers = checkers or self.load_checkers()
        self.score = score
        self.total_scanned_files = 0
        self.exclude_folders = exclude_folders + [".git"]

        self.walker = DirWalk(folder_exclude_pattern=";".join(
            exclude if exclude.endswith("*") else exclude + "*"
            for exclude in exclude_folders)).walk
        self.should_extract = should_extract
        self.file_stack = []
        self.error_mode = error_mode
コード例 #3
0
 def __init__(self,
              filename: str,
              logger: Logger = None,
              error_mode=ErrorMode.TruncTrace):
     self.filename = os.path.abspath(filename)
     self.logger = logger or LOGGER.getChild(self.__class__.__name__)
     self.error_mode = error_mode
     self.parsed_data = defaultdict(dict)
コード例 #4
0
 def __init__(self,
              input_file: str,
              logger: Logger = None,
              error_mode=ErrorMode.TruncTrace) -> None:
     self.input_file = input_file
     self.logger = logger or LOGGER.getChild(self.__class__.__name__)
     self.error_mode = error_mode
     self.parsed_data_without_vendor = defaultdict(dict)
     self.parsed_data_with_vendor = defaultdict(dict)
     self.package_names_with_vendor = []
     self.package_names_without_vendor = []
コード例 #5
0
 def __init__(
     self,
     score: int = 0,
     logger: Logger = None,
     error_mode: ErrorMode = ErrorMode.TruncTrace,
 ):
     self.logger = logger or LOGGER.getChild(self.__class__.__name__)
     self.error_mode = error_mode
     self.score = score
     self.products_with_cve = 0
     self.products_without_cve = 0
     self.all_cve_data = defaultdict(CVEData)
コード例 #6
0
ファイル: __init__.py プロジェクト: intel/cve-bin-tool
    def __init__(
        self, filename: str, sbom_type: str = "spdx", logger: Optional[Logger] = None
    ):
        self.filename = filename
        self.sbom_data = defaultdict(dict)
        self.type = "unknown"
        if sbom_type in self.SBOMtype:
            self.type = sbom_type
        self.logger = logger or LOGGER.getChild(self.__class__.__name__)

        # Connect to the database
        self.cvedb = CVEDB(version_check=False)
コード例 #7
0
 def __init__(self, logger=None):
     """Sets up logger and if we should extract files or just report"""
     if logger is None:
         logger = LOGGER.getChild(self.__class__.__name__)
     self.logger = logger
     self.file_extractors = {
         self.extract_file_tar:
         [".tgz", ".tar.gz", ".tar", ".tar.xz", ".tar.bz2"],
         self.extract_file_rpm: [".rpm"],
         self.extract_file_deb: [".deb", ".ipk"],
         self.extract_file_cab: [".cab"],
         self.extract_file_zip: [".exe", ".zip", ".jar", ".apk"],
     }
コード例 #8
0
ファイル: csv2cve.py プロジェクト: westonsteimel/cve-bin-tool
def main(argv=None):
    logger = LOGGER.getChild("CSV2CVE")
    argv = argv or sys.argv
    if len(argv) < 2:
        with ErrorHandler(logger=logger):
            raise InsufficientArgs("csv file required")

    flag = False
    for idx, arg in enumerate(argv):
        if arg.endswith(".csv"):
            argv[idx] = f"-i={arg}"
            flag = True
    if flag:
        return cli.main(argv)
    else:
        with ErrorHandler(logger=logger):
            raise InsufficientArgs("csv file required")
コード例 #9
0
ファイル: cvedb.py プロジェクト: anthonyharrison/cve-bin-tool
class CVEDB:
    """
    Downloads NVD data in json form and stores it on disk in a cache.
    """

    CACHEDIR = DISK_LOCATION_DEFAULT
    FEED = "https://nvd.nist.gov/vuln/data-feeds"
    LOGGER = LOGGER.getChild("CVEDB")
    NVDCVE_FILENAME_TEMPLATE = "nvdcve-1.1-{}.json.gz"
    CURL_CVE_FILENAME_TEMPLATE = "curlcve-{}.json"
    META_LINK = "https://nvd.nist.gov"
    META_REGEX = re.compile(r"\/feeds\/json\/.*-[0-9]*\.[0-9]*-[0-9]*\.meta")
    RANGE_UNSET = ""

    def __init__(
        self,
        feed=None,
        cachedir=None,
        version_check=True,
        session=None,
        error_mode=ErrorMode.TruncTrace,
    ):
        self.feed = feed if feed is not None else self.FEED
        self.cachedir = cachedir if cachedir is not None else self.CACHEDIR
        self.error_mode = error_mode
        # Will be true if refresh was successful
        self.was_updated = False

        # version update
        self.version_check = version_check

        # set up the db if needed
        self.dbpath = os.path.join(self.cachedir, DBNAME)
        self.connection = None
        self.session = session
        self.cve_count = -1

    def get_cve_count(self):
        if self.cve_count == -1:
            # Force update
            self.check_cve_entries()
        return self.cve_count

    def get_db_update_date(self):
        return os.path.getmtime(self.dbpath)

    async def getmeta(self, session, meta_url):
        async with session.get(meta_url) as response:
            response.raise_for_status()
            return (
                meta_url.replace(".meta", ".json.gz"),
                dict([
                    line.split(":", maxsplit=1)
                    for line in (await response.text()).splitlines()
                    if ":" in line
                ]),
            )

    async def nist_scrape(self, session):
        async with session.get(self.feed) as response:
            response.raise_for_status()
            page = await response.text()
            json_meta_links = self.META_REGEX.findall(page)
            return dict(await asyncio.gather(*[
                self.getmeta(session, f"{self.META_LINK}{meta_url}")
                for meta_url in json_meta_links
            ]))

    async def cache_update(self, session, url, sha, chunk_size=16 * 1024):
        """
        Update the cache for a single year of NVD data.
        """
        filename = url.split("/")[-1]
        # Ensure we only write to files within the cachedir
        filepath = os.path.abspath(os.path.join(self.cachedir, filename))
        if not filepath.startswith(os.path.abspath(self.cachedir)):
            with ErrorHandler(mode=self.error_mode, logger=self.LOGGER):
                raise AttemptedToWriteOutsideCachedir(filepath)
        # Validate the contents of the cached file
        if os.path.isfile(filepath):
            # Validate the sha and write out
            sha = sha.upper()
            calculate = hashlib.sha256()
            async with GzipFile(filepath, "rb") as f:
                chunk = await f.read(chunk_size)
                while chunk:
                    calculate.update(chunk)
                    chunk = await f.read(chunk_size)
            # Validate the sha and exit if it is correct, otherwise update
            gotsha = calculate.hexdigest().upper()
            if gotsha != sha:
                os.unlink(filepath)
                self.LOGGER.warning(
                    f"SHA mismatch for {filename} (have: {gotsha}, want: {sha})"
                )
            else:
                self.LOGGER.debug(f"Correct SHA for {filename}")
                return
        self.LOGGER.debug(f"Updating CVE cache for {filename}")

        async with session.get(url) as response:
            # Raise better error message on ratelimit by NVD
            if response.status == 403:
                with ErrorHandler(mode=self.error_mode, logger=self.LOGGER):
                    raise NVDRateLimit(
                        f"{url} : download failed, you may have been rate limited."
                    )
            # Raise for all other 4xx errors
            response.raise_for_status()
            gzip_data = await response.read()
        json_data = gzip.decompress(gzip_data)
        gotsha = hashlib.sha256(json_data).hexdigest().upper()
        async with FileIO(filepath, "wb") as filepath_handle:
            await filepath_handle.write(gzip_data)
        # Raise error if there was an issue with the sha
        if gotsha != sha:
            # Remove the file if there was an issue
            # exit(100)
            os.unlink(filepath)
            with ErrorHandler(mode=self.error_mode, logger=self.LOGGER):
                raise SHAMismatch(f"{url} (have: {gotsha}, want: {sha})")

    @staticmethod
    async def get_curl_versions(session):
        regex = re.compile(r"vuln-(\d+.\d+.\d+)\.html")
        async with session.get(
                "https://curl.haxx.se/docs/vulnerabilities.html") as response:
            response.raise_for_status()
            html = await response.text()
        matches = regex.finditer(html)
        return [match.group(1) for match in matches]

    async def download_curl_version(self, session, version):
        async with session.get(
                f"https://curl.haxx.se/docs/vuln-{version}.html") as response:
            response.raise_for_status()
            html = await response.text()
        soup = BeautifulSoup(html, "html.parser")
        table = soup.find("table")
        if not table:
            return
        headers = table.find_all("th")
        headers = list(map(lambda x: x.text.strip().lower(), headers))
        self.LOGGER.debug(headers)
        rows = table.find_all("tr")
        json_data = []
        for row in rows:
            cols = row.find_all("td")
            values = (ele.text.strip() for ele in cols)
            data = dict(zip(headers, values))
            if data:
                json_data.append(data)
        filepath = os.path.abspath(
            os.path.join(self.cachedir, f"curlcve-{version}.json"))
        async with FileIO(filepath, "w") as f:
            await f.write(json.dumps(json_data, indent=4))

    async def refresh(self):
        """ Refresh the cve database and check for new version. """
        # refresh the database
        if not os.path.isdir(self.cachedir):
            os.makedirs(self.cachedir)
        # check for the latest version
        if self.version_check:
            self.LOGGER.info("Checking if there is a newer version.")
            check_latest_version()
        if not self.session:
            connector = aiohttp.TCPConnector(limit_per_host=19)
            self.session = aiohttp.ClientSession(connector=connector,
                                                 trust_env=True)
        self.LOGGER.info("Downloading CVE data...")
        nvd_metadata, curl_metadata = await asyncio.gather(
            self.nist_scrape(self.session),
            self.get_curl_versions(self.session))
        tasks = [
            self.cache_update(self.session, url, meta["sha256"])
            for url, meta in nvd_metadata.items() if meta is not None
        ]
        # We use gather to create a single task from a set of tasks
        # which download CVEs for each version of curl. Otherwise
        # the progress bar would show that we are closer to
        # completion than we think, because lots of curl CVEs (for
        # each version) have been downloaded
        tasks.append(
            asyncio.gather(*[
                self.download_curl_version(self.session, version)
                for version in curl_metadata
            ]))
        total_tasks = len(tasks)

        # error_mode.value will only be greater than 1 if quiet mode.
        if self.error_mode.value > 1:
            iter_tasks = track(
                asyncio.as_completed(tasks),
                description="Downloading CVEs...",
                total=total_tasks,
            )
        else:
            iter_tasks = asyncio.as_completed(tasks)

        for task in iter_tasks:
            await task
        self.was_updated = True
        await self.session.close()
        self.session = None

    def refresh_cache_and_update_db(self):
        self.LOGGER.info("Updating CVE data. This will take a few minutes.")
        # refresh the nvd cache
        run_coroutine(self.refresh())

        # if the database isn't open, open it
        self.init_database()
        self.populate_db()

    def get_cvelist_if_stale(self):
        """Update if the local db is more than one day old.
        This avoids the full slow update with every execution.
        """
        if not os.path.isfile(self.dbpath) or (
                datetime.datetime.today() -
                datetime.datetime.fromtimestamp(os.path.getmtime(
                    self.dbpath))) > datetime.timedelta(hours=24):
            self.refresh_cache_and_update_db()
        else:
            self.LOGGER.info(
                "Using cached CVE data (<24h old). Use -u now to update immediately."
            )

    def latest_schema(self, cursor):
        """ Check database is using latest schema """
        self.LOGGER.info("Check database is using latest schema")
        schema_check = "SELECT * FROM cve_severity WHERE 1=0"
        result = cursor.execute(schema_check)
        schema_latest = False
        # Look through column names and check for column added in latest schema
        for col_name in result.description:
            if col_name[0] == "description":
                schema_latest = True
        return schema_latest

    def check_cve_entries(self):
        """ Report if database has some CVE entries """
        self.db_open()
        cursor = self.connection.cursor()
        cve_entries_check = "SELECT COUNT(*) FROM cve_severity"
        cursor.execute(cve_entries_check)
        # Find number of entries
        cve_entries = cursor.fetchone()[0]
        self.LOGGER.info(
            f"There are {cve_entries} CVE entries in the database")
        self.db_close()
        self.cve_count = cve_entries
        return cve_entries > 0

    def init_database(self):
        """ Initialize db tables used for storing cve/version data """
        self.db_open()
        cursor = self.connection.cursor()
        cve_data_create = """
        CREATE TABLE IF NOT EXISTS cve_severity (
            cve_number TEXT,
            severity TEXT,
            description TEXT,
            score INTEGER,
            cvss_version INTEGER,
            PRIMARY KEY(cve_number)
        )
        """
        version_range_create = """
        CREATE TABLE IF NOT EXISTS cve_range (
            cve_number TEXT,
            vendor TEXT,
            product TEXT,
            version TEXT,
            versionStartIncluding TEXT,
            versionStartExcluding TEXT,
            versionEndIncluding TEXT,
            versionEndExcluding TEXT,
            FOREIGN KEY(cve_number) REFERENCES cve_severity(cve_number)
        )
        """
        index_range = "CREATE INDEX IF NOT EXISTS product_index ON cve_range (cve_number, vendor, product)"
        cursor.execute(cve_data_create)
        cursor.execute(version_range_create)
        cursor.execute(index_range)

        # Check that latest schema is being used
        if not self.latest_schema(cursor):
            # Recreate table using latest schema
            self.LOGGER.info("Upgrading database to latest schema")
            cursor.execute("DROP TABLE cve_severity")
            cursor.execute(cve_data_create)
            self.clear_cached_data()
        self.connection.commit()

    def populate_db(self):
        """Function that populates the database from the JSON.

        WARNING: After some inspection of the data, we are assuming that start/end ranges are kept together
        in single nodes.  This isn't *required* by the json so may not be true everywhere.  If that's the case,
        we'll need a better parser to match those together.
        """
        self.db_open()
        cursor = self.connection.cursor()

        insert_severity = """
        INSERT or REPLACE INTO cve_severity(
            CVE_number,
            severity,
            description,
            score,
            cvss_version
        )
        VALUES (?, ?, ?, ?, ?)
        """
        insert_cve_range = """
        INSERT or REPLACE INTO cve_range(
            cve_number,
            vendor,
            product,
            version,
            versionStartIncluding,
            versionStartExcluding,
            versionEndIncluding,
            versionEndExcluding
        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
        """
        del_cve_range = "DELETE from cve_range where CVE_number=?"

        # error_mode.value will only be greater than 1 if quiet mode.
        if self.error_mode.value > 1:
            years = track(self.nvd_years(),
                          description="Updating CVEs from NVD...")
        else:
            years = self.nvd_years()

        for year in years:
            cve_data = self.load_nvd_year(year)
            self.LOGGER.debug(
                f'Time = {datetime.datetime.today().strftime("%H:%M:%S")}')
            for cve_item in cve_data["CVE_Items"]:
                # the information we want:
                # CVE ID, Severity, Score ->
                # affected {Vendor(s), Product(s), Version(s)}
                cve = {
                    "ID":
                    cve_item["cve"]["CVE_data_meta"]["ID"],
                    "description":
                    cve_item["cve"]["description"]["description_data"][0]
                    ["value"],
                    "severity":
                    "unknown",
                    "score":
                    "unknown",
                    "CVSS_version":
                    "unknown",
                }
                # Get CVSSv3 or CVSSv2 score for output.
                # Details are left as an exercise to the user.
                if "baseMetricV3" in cve_item["impact"]:
                    cve["severity"] = cve_item["impact"]["baseMetricV3"][
                        "cvssV3"]["baseSeverity"]
                    cve["score"] = cve_item["impact"]["baseMetricV3"][
                        "cvssV3"]["baseScore"]
                    cve["CVSS_version"] = 3
                elif "baseMetricV2" in cve_item["impact"]:
                    cve["severity"] = cve_item["impact"]["baseMetricV2"][
                        "severity"]
                    cve["score"] = cve_item["impact"]["baseMetricV2"][
                        "cvssV2"]["baseScore"]
                    cve["CVSS_version"] = 2

                # self.LOGGER.debug(
                #    "Severity: {} ({}) v{}".format(
                #        CVE["severity"], CVE["score"], CVE["CVSS_version"]
                #    )
                # )

                cursor.execute(
                    insert_severity,
                    [
                        cve["ID"],
                        cve["severity"],
                        cve["description"],
                        cve["score"],
                        cve["CVSS_version"],
                    ],
                )

                # Delete any old range entries for this CVE_number
                cursor.execute(del_cve_range, (cve["ID"], ))

                # walk the nodes with version data
                # return list of versions
                affects_list = []
                if "configurations" in cve_item:
                    for node in cve_item["configurations"]["nodes"]:
                        # self.LOGGER.debug("NODE: {}".format(node))
                        affects_list.extend(self.parse_node(node))
                        if "children" in node:
                            for child in node["children"]:
                                affects_list.extend(self.parse_node(child))
                # self.LOGGER.debug("Affects: {}".format(affects_list))
                cursor.executemany(
                    insert_cve_range,
                    [(
                        cve["ID"],
                        affected["vendor"],
                        affected["product"],
                        affected["version"],
                        affected["versionStartIncluding"],
                        affected["versionStartExcluding"],
                        affected["versionEndIncluding"],
                        affected["versionEndExcluding"],
                    ) for affected in affects_list],
                )
            self.connection.commit()

        # supplemental data gets added here
        self.supplement_curl()

        self.db_close()

    def parse_node(self, node):
        affects_list = []
        if "cpe_match" in node:
            for cpe_match in node["cpe_match"]:
                # self.LOGGER.debug(cpe_match["cpe23Uri"])
                cpe_split = cpe_match["cpe23Uri"].split(":")
                affects = {
                    "vendor": cpe_split[3],
                    "product": cpe_split[4],
                    "version": cpe_split[5],
                }

                # self.LOGGER.debug(
                #    "Vendor: {} Product: {} Version: {}".format(
                #        affects["vendor"], affects["product"], affects["version"]
                #    )
                # )
                # if we have a range (e.g. version is *) fill it out, and put blanks where needed
                range_fields = [
                    "versionStartIncluding",
                    "versionStartExcluding",
                    "versionEndIncluding",
                    "versionEndExcluding",
                ]
                for field in range_fields:
                    if field in cpe_match:
                        affects[field] = cpe_match[field]
                    else:
                        affects[field] = self.RANGE_UNSET

                affects_list.append(affects)
        return affects_list

    def supplement_curl(self):
        """
        Get additional CVE data directly from the curl website amd add it to the cvedb
        """
        self.db_open()
        insert_cve_range = """
        INSERT or REPLACE INTO cve_range(
            cve_number,
            vendor,
            product,
            version,
            versionStartIncluding,
            versionStartExcluding,
            versionEndIncluding,
            versionEndExcluding
        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
        """
        cursor = self.connection.cursor()
        # No need to track this. It is very fast!
        for version in self.curl_versions():
            cve_list = self.load_curl_version(version)
            # for cve in cve_list:
            cursor.executemany(
                insert_cve_range,
                [(
                    cve["cve"],
                    "haxx",
                    "curl",
                    version,
                    cve["from version"],
                    "",
                    cve["to and including"],
                    "",
                ) for cve in cve_list],
            )
            self.connection.commit()

    def load_nvd_year(self, year):
        """
        Return the dict of CVE data for the given year.
        """
        filename = os.path.join(self.cachedir,
                                self.NVDCVE_FILENAME_TEMPLATE.format(year))
        # Check if file exists
        if not os.path.isfile(filename):
            with ErrorHandler(mode=self.error_mode, logger=self.LOGGER):
                raise CVEDataForYearNotInCache(year)
        # Open the file and load the JSON data, log the number of CVEs loaded
        with gzip.open(filename, "rb") as fileobj:
            cves_for_year = json.load(fileobj)
            self.LOGGER.debug(
                f'Year {year} has {len(cves_for_year["CVE_Items"])} CVEs in dataset'
            )
            return cves_for_year

    def nvd_years(self):
        """
        Return the years we have NVD data for.
        """
        return sorted([
            int(filename.split(".")[-3].split("-")[-1])
            for filename in glob.glob(
                os.path.join(self.cachedir, "nvdcve-1.1-*.json.gz"))
        ])

    def load_curl_version(self, version):
        """
        Return the dict of CVE data for the given curl version.
        """
        filename = os.path.join(
            self.cachedir, self.CURL_CVE_FILENAME_TEMPLATE.format(version))
        # Check if file exists
        if not os.path.isfile(filename):
            with ErrorHandler(mode=self.error_mode, logger=self.LOGGER):
                raise CVEDataForCurlVersionNotInCache(version)
        # Open the file and load the JSON data, log the number of CVEs loaded
        with open(filename, "rb") as fileobj:
            cves_for_version = json.load(fileobj)
            self.LOGGER.debug(
                f"Curl Version {version} has {len(cves_for_version)} CVEs in dataset"
            )
            return cves_for_version

    def curl_versions(self):
        """
        Return the versions we have Curl data for.
        """
        regex = re.compile(r"curlcve-(\d+.\d+.\d).json")
        return [
            regex.search(filename).group(1) for filename in glob.glob(
                os.path.join(self.cachedir, "curlcve-*.json"))
        ]

    def clear_cached_data(self):
        if os.path.exists(self.cachedir):
            self.LOGGER.warning(f"Deleting cachedir {self.cachedir}")
            shutil.rmtree(self.cachedir)
        # Remove files associated with pre-1.0 development tree
        if os.path.exists(OLD_CACHE_DIR):
            self.LOGGER.warning(f"Deleting old cachedir {OLD_CACHE_DIR}")
            shutil.rmtree(OLD_CACHE_DIR)

    def db_open(self):
        """ Opens connection to sqlite database."""
        if not self.connection:
            self.connection = sqlite3.connect(self.dbpath)

    def db_close(self):
        """ Closes connection to sqlite database."""
        if self.connection:
            self.connection.close()
            self.connection = None
コード例 #10
0
class CVEDB(object):
    """
    Downloads NVD data in json form and stores it on disk in a cache.
    """

    CACHEDIR = os.path.join(os.path.expanduser("~"), ".cache", "cvedb")
    FEED = "https://nvd.nist.gov/vuln/data-feeds"
    LOGGER = LOGGER.getChild("CVEDB")
    NVDCVE_FILENAME_TEMPLATE = "nvdcve-1.1-{}.json"
    META_REGEX = re.compile(
        r"https:\/\/.*\/json\/.*-[0-9]*\.[0-9]*-[0-9]*\.meta")
    RANGE_UNSET = ""

    def __init__(self, verify=True, feed=None, cachedir=None):
        self.verify = verify
        self.feed = feed if feed is not None else self.FEED
        self.cachedir = cachedir if cachedir is not None else self.CACHEDIR
        # Will be true if refresh was successful
        self.was_updated = False

        # set up the db if needed
        self.disk_location = DISK_LOCATION_DEFAULT
        self.dbname = os.path.join(self.disk_location, DBNAME)
        self.connection = None

    def nist_scrape(self, feed):
        with contextlib.closing(request.urlopen(feed)) as response:
            page = response.read().decode()
            jsonmetalinks = self.META_REGEX.findall(page)
            pool = multiprocessing.Pool()
            try:
                metadata = dict(
                    pool.map(functools.partial(log_traceback, getmeta),
                             tuple(jsonmetalinks)))
                pool.close()
                return metadata
            except:
                pool.terminate()
                raise
            finally:
                pool.join()

    def init_database(self):
        """ Initialize db tables used for storing cve/version data """
        conn = sqlite3.connect(self.dbname)
        db_cursor = conn.cursor()
        cve_data_create = """CREATE TABLE IF NOT EXISTS cve_severity (
        cve_number TEXT,
        severity TEXT,
        score INTEGER,
        cvss_version INTEGER,
        PRIMARY KEY(cve_number)
        )
        """
        db_cursor.execute(cve_data_create)

        version_range_create = """ CREATE TABLE IF NOT EXISTS cve_range (
        cve_number TEXT,
        vendor TEXT,
        product TEXT,
        version TEXT,
        versionStartIncluding TEXT,
        versionStartExcluding TEXT,
        versionEndIncluding TEXT,
        versionEndExcluding TEXT
        )
        """
        db_cursor.execute(version_range_create)

        index_range = """CREATE INDEX IF NOT EXISTS product_index ON cve_range (cve_number, vendor, product)"""
        db_cursor.execute(index_range)
        conn.commit()
        return conn

    def open(self):
        """ Opens connection to sqlite database."""
        self.connection = sqlite3.connect(self.dbname, check_same_thread=False)

    def close(self):
        """ Closes connection to sqlite database."""
        self.connection.close()
        self.connection = None

    def __enter__(self):
        """ Opens connection to sqlite database."""
        self.open()

    def __exit__(self, exc_type, exc, exc_tb):
        """ Closes connection to sqlite database."""
        self.close()

    def get_cvelist_if_stale(self):
        """ Update if the local db is more than one day old.
        This avoids the full slow update with every execution.
        """
        if not os.path.isfile(self.dbname) or (
                datetime.datetime.today() -
                datetime.datetime.fromtimestamp(os.path.getmtime(
                    self.dbname))) > datetime.timedelta(hours=24):
            self.refresh_cache_and_update_db()
        else:
            self.LOGGER.info(
                "Using cached CVE data (<24h old). Use -u now to update immediately."
            )

    def refresh_cache_and_update_db(self):
        self.LOGGER.info("Updating CVE data. This will take a few minutes.")
        # refresh the nvd cache
        self.refresh()
        # if the database isn't open, open it
        if self.connection is None:
            self.connection = self.init_database()
        self.populate_db()

    def get_cves(self, vendor, product, version):
        """ Get CVEs against a specific version of a package.

        Example:
            nvd.get_cves('haxx', 'curl', '7.34.0')
        """
        if self.connection is None:
            self.open()
        cursor = self.connection.cursor()

        # Check for anything directly marked
        query = """SELECT CVE_number FROM cve_range WHERE
        vendor=? AND product=? AND version=?"""
        cursor.execute(query, [vendor, product, version])
        cve_list = list(map(lambda x: x[0], cursor.fetchall()))

        # Check for any ranges
        query = """SELECT CVE_number, versionStartIncluding, versionStartExcluding, versionEndIncluding, versionEndExcluding FROM cve_range WHERE
        vendor=? AND product=? AND version=?"""
        cursor.execute(query, [vendor, product, "*"])
        for cve_range in cursor:
            (
                cve_number,
                versionStartIncluding,
                versionStartExcluding,
                versionEndIncluding,
                versionEndExcluding,
            ) = cve_range

            # pep-440 doesn't include versions of the type 1.1.0g used by openssl
            # so if this is openssl, convert the last letter to a .number
            if product == "openssl":
                # if last character is a letter, convert it to .number
                version = self.openssl_convert(version)
                versionStartIncluding = self.openssl_convert(
                    versionStartIncluding)
                versionStartExcluding = self.openssl_convert(
                    versionStartExcluding)
                versionEndIncluding = self.openssl_convert(versionEndIncluding)
                versionEndExcluding = self.openssl_convert(versionEndExcluding)

            parsed_version = parse_version(version)

            # check the start range
            passes_start = False
            if (versionStartIncluding is not self.RANGE_UNSET and
                    parsed_version >= parse_version(versionStartIncluding)):
                passes_start = True
            if (versionStartExcluding is not self.RANGE_UNSET
                    and parsed_version > parse_version(versionStartExcluding)):
                passes_start = True

            if (versionStartIncluding is self.RANGE_UNSET
                    and versionStartExcluding is self.RANGE_UNSET):
                # then there is no start range so just say true
                passes_start = True

            # check the end range
            passes_end = False
            if (versionEndIncluding is not self.RANGE_UNSET
                    and parsed_version <= parse_version(versionEndIncluding)):
                passes_end = True

            if (versionEndExcluding is not self.RANGE_UNSET
                    and parsed_version < parse_version(versionEndExcluding)):
                passes_end = True
            if (versionEndIncluding is self.RANGE_UNSET
                    and versionEndExcluding is self.RANGE_UNSET):
                # then there is no end range so it passes
                passes_end = True
            # if it fits into both ends of the range, add the cve number
            if passes_start and passes_end:
                cve_list.append(cve_number)

        # Go through and get all the severities
        if cve_list:
            query = f'SELECT CVE_number, severity from cve_severity where CVE_number IN ({",".join(["?"]*len(cve_list))}) ORDER BY CVE_number ASC'
            cursor.execute(query, cve_list)
            # Everything expects a data structure of cve[number] = severity so you can search through keys
            # and do other easy manipulations
            return dict(cursor)

        return cve_list

    def openssl_convert(self, version):
        """ pkg_resources follows pep-440 which doesn't expect openssl style 1.1.0g version numbering
        So to fake it, if the last character is a letter, replace it with .number before comparing """
        if len(version) < 1:
            return version

        lastchar = version[len(version) - 1]
        letters = dict(zip(ascii_lowercase, range(26)))

        if lastchar in letters:
            version = f"{version[0 : len(version) - 1]}.{letters[lastchar]}"
        return version

    def populate_db(self):
        """ Function that populates the database from the JSON.

        WARNING: After some inspection of the data, we are assuming that start/end ranges are kept together
        in single nodes.  This isn't *required* by the json so may not be true everywhere.  If that's the case,
        we'll need a better parser to match those together.
        """
        if self.connection is None:
            self.connection = self.open()

        cursor = self.connection.cursor()

        # Do only years with updates?
        for year in self.years():
            cve_data = self.year(year)
            self.LOGGER.debug(
                f'Time = {datetime.datetime.today().strftime("%H:%M:%S")}')
            for cve_item in cve_data["CVE_Items"]:
                # the information we want:
                # CVE ID, Severity, Score ->
                # affected {Vendor(s), Product(s), Version(s)}
                CVE = dict()
                CVE["ID"] = cve_item["cve"]["CVE_data_meta"]["ID"]

                # Get CVSSv3 or CVSSv2 score for output.
                # Details are left as an exercise to the user.
                CVE["severity"] = "unknown"
                CVE["score"] = "unknown"
                CVE["CVSS_version"] = "unknown"
                if "baseMetricV3" in cve_item["impact"]:
                    CVE["severity"] = cve_item["impact"]["baseMetricV3"][
                        "cvssV3"]["baseSeverity"]
                    CVE["score"] = cve_item["impact"]["baseMetricV3"][
                        "cvssV3"]["baseScore"]
                    CVE["CVSS_version"] = 3
                elif "baseMetricV2" in cve_item["impact"]:
                    CVE["severity"] = cve_item["impact"]["baseMetricV2"][
                        "severity"]
                    CVE["score"] = cve_item["impact"]["baseMetricV2"][
                        "cvssV2"]["baseScore"]
                    CVE["CVSS_version"] = 2

                # self.LOGGER.debug(
                #    "Severity: {} ({}) v{}".format(
                #        CVE["severity"], CVE["score"], CVE["CVSS_version"]
                #    )
                # )

                q = "INSERT or REPLACE INTO cve_severity(CVE_number, severity, score, cvss_version) \
                VALUES (?, ?, ?, ?)"

                cursor.execute(q, [
                    CVE["ID"], CVE["severity"], CVE["score"],
                    CVE["CVSS_version"]
                ])

                # Delete any old range entries for this CVE_number
                q_del = "DELETE from cve_range where CVE_number=?"
                cursor.execute(q_del, (CVE["ID"], ))

                # walk the nodes with version data
                # return list of versions
                affects_list = []
                if "configurations" in cve_item:
                    for node in cve_item["configurations"]["nodes"]:
                        # self.LOGGER.debug("NODE: {}".format(node))
                        affects_list.extend(self.parse_node(node))
                        if "children" in node:
                            for child in node["children"]:
                                affects_list.extend(self.parse_node(child))
                # self.LOGGER.debug("Affects: {}".format(affects_list))

                q = "INSERT or REPLACE INTO cve_range(cve_number, vendor, product, version, versionStartIncluding, versionStartExcluding, versionEndIncluding, versionEndExcluding) \
                VALUES (?, ?, ?, ?, ?, ?, ?, ?)"

                for affected in affects_list:
                    cursor.execute(
                        q,
                        [
                            CVE["ID"],
                            affected["vendor"],
                            affected["product"],
                            affected["version"],
                            affected["versionStartIncluding"],
                            affected["versionStartExcluding"],
                            affected["versionEndIncluding"],
                            affected["versionEndExcluding"],
                        ],
                    )
            self.connection.commit()

        # supplemental data gets added here
        self.supplement_curl()

    def parse_node(self, node):
        affects_list = []
        if "cpe_match" in node:
            for cpe_match in node["cpe_match"]:
                # self.LOGGER.debug(cpe_match["cpe23Uri"])
                cpe_split = cpe_match["cpe23Uri"].split(":")
                affects = dict()
                affects["vendor"] = cpe_split[3]
                affects["product"] = cpe_split[4]
                affects["version"] = cpe_split[5]

                # self.LOGGER.debug(
                #    "Vendor: {} Product: {} Version: {}".format(
                #        affects["vendor"], affects["product"], affects["version"]
                #    )
                # )
                # if we have a range (e.g. version is *) fill it out, and put blanks where needed
                range_fields = [
                    "versionStartIncluding",
                    "versionStartExcluding",
                    "versionEndIncluding",
                    "versionEndExcluding",
                ]
                for field in range_fields:
                    if field in cpe_match:
                        affects[field] = cpe_match[field]
                    else:
                        affects[field] = self.RANGE_UNSET

                affects_list.append(affects)
        return affects_list

    def refresh(self):
        if not os.path.isdir(self.cachedir):
            os.makedirs(self.cachedir)
        update = self.nist_scrape(self.feed)
        pool = multiprocessing.Pool()
        try:
            for result in [
                    pool.apply_async(
                        functools.partial(log_traceback, cache_update),
                        (self.cachedir, url, meta["sha256"]),
                    ) for url, meta in update.items()
            ]:
                result.get()
            pool.close()
            self.was_updated = True
        except:
            pool.terminate()
            raise
        finally:
            pool.join()

    def supplement_curl(self):
        """
        Get additional CVE data directly from the curl website amd add it to the cvedb
        """
        if not self.connection:
            self.open()

        cursor = self.connection.cursor()

        cve_pattern = re.compile('name=(CVE-[^"]*)')
        nextver_pattern = re.compile(r"the subsequent release: ([\d.]+)")

        # 6.0 is the oldest available so start there
        version = "6.0"
        cve_dict = {}
        while version:
            # get data from curl.haxx.se and parse
            url = f"https://curl.haxx.se/docs/vuln-{version}.html"
            response = request.urlopen(url)
            html = response.read()
            text = html.decode("utf-8")

            # insert each CVE separately into the range table
            # note: no deduplication against existing data
            cves = re.findall(cve_pattern, text)
            query = "INSERT INTO cve_range (CVE_Number, vendor, product, version) VALUES (?, ?, ?, ?)"
            for cve_number in cves:
                cursor.execute(query, [cve_number, "haxx", "curl", version])
            # check for next page of vulnerabilities
            nextversion = re.findall(nextver_pattern, text)
            if nextversion:
                version = nextversion[0]
            else:
                version = None
        self.connection.commit()

    def year(self, year):
        """
        Return the dict of CVE data for the given year.
        """
        filename = os.path.join(self.cachedir,
                                self.NVDCVE_FILENAME_TEMPLATE.format(year))
        # Check if file exists
        if not os.path.isfile(filename):
            raise CVEDataForYearNotInCache(year)
        # Open the file and load the JSON data, log the number of CVEs loaded
        with open(filename, "rb") as fileobj:
            cves_for_year = json.load(fileobj)
            self.LOGGER.debug(
                f'Year {year} has {len(cves_for_year["CVE_Items"])} CVEs in dataset'
            )
            return cves_for_year

    def years(self):
        """
        Return the years we have NVD data for.
        """
        return sorted([
            int(filename.split(".")[-2].split("-")[-1])
            for filename in glob.glob(
                os.path.join(self.cachedir, "nvdcve-1.1-*.json"))
        ])

    def __enter__(self):
        if not self.verify:
            self.LOGGER.error("Not verifying CVE DB cache")
            if not self.years():
                raise EmptyCache(self.cachedir)
        self.LOGGER.debug(f"Years present: {self.years()}")
        return self

    def __exit__(self, _exc_type, _exc_value, _traceback):
        pass

    def clear_cached_data(self):
        if os.path.exists(self.cachedir):
            self.LOGGER.warning(f"Deleting cachedir {self.cachedir}")
            shutil.rmtree(self.cachedir)
コード例 #11
0
ファイル: helper_script.py プロジェクト: intel/cve-bin-tool
class HelperScript:
    """Helps contributors who want to write a new cve-bin-tool checker find common filenames, version strings, and other necessary data for building a binary checker"""

    CONSOLE = Console()
    LOGGER = LOGGER.getChild("HelperScript")

    def __init__(self,
                 filename,
                 product_name=None,
                 version_number=None,
                 string_length=40):
        self.filename = filename
        self.extractor = Extractor()
        self.product_name, self.version_number = self.parse_filename(filename)
        if product_name:
            self.product_name = product_name
        if version_number:
            self.version_number = version_number
        self.string_length = string_length

        # for setting the database
        self.connection = None
        self.dbpath = os.path.join(DISK_LOCATION_DEFAULT, DBNAME)

        # for extraction
        self.walker = DirWalk().walk

        # for output (would use in future)
        self.contains_patterns = []
        self.filename_pattern = []
        self.version_pattern = []
        self.vendor_product = self.find_vendor_product()

        # for scanning files versions
        self.version_scanner = VersionScanner()

    def extract_and_parse_file(self, filename):
        """extracts and parses the file for common patterns, version strings and common filename patterns"""

        with self.extractor as ectx:
            if ectx.can_extract(filename):
                binary_string_list = []
                for filepath in self.walker([ectx.extract(filename)]):
                    clean_path = self.version_scanner.clean_file_path(filepath)
                    LOGGER.debug(f"checking whether {clean_path} is binary")

                    # see if the file is ELF binary file and parse for strings
                    is_exec = self.version_scanner.is_executable(filepath)[0]
                    if is_exec:
                        LOGGER.debug(
                            f"{clean_path} <--- this is an ELF binary")
                        file_content = self.version_scanner.parse_strings(
                            filepath)

                        matches = self.search_pattern(file_content,
                                                      self.product_name)

                        # searching for version strings in the found matches
                        version_string = self.search_version_string(matches)
                        self.version_pattern += version_string

                        # if version string is found in file, append it to filename_pattern
                        if version_string:
                            if sys.platform == "win32":
                                self.filename_pattern.append(
                                    filepath.split("\\")[-1])
                            else:
                                self.filename_pattern.append(
                                    filepath.split("/")[-1])
                            LOGGER.info(
                                f"matches for {self.product_name} found in {clean_path}"
                            )

                            binary_string_list += matches

                            for i in matches:
                                if ("/" not in i and "!" not in i
                                    ) and len(i) > self.string_length:
                                    self.contains_patterns.append(i)

                        LOGGER.debug(f"{self.filename_pattern}")

                # to resolve case when there are no strings common with product_name in them
                if self.contains_patterns:
                    return self.contains_patterns
                return binary_string_list

    def search_pattern(self, file_content, pattern):
        """find strings for CONTAINS_PATTERNS with product_name in them"""

        file_content_list = file_content.split("\n")
        matches = [
            i.strip() for i in file_content_list
            if re.search(pattern, i, re.IGNORECASE)
        ]
        LOGGER.debug(f"found matches = {matches}"
                     )  # TODO: regex highlight in these matched strings?
        return matches

    def search_version_string(self, matched_list):
        """finds version strings from matched list"""

        # TODO: add multiline string finding

        pattern1 = rf"{self.product_name}(.*){self.version_number}"
        # ^ this does not work for debian packages

        # pattern2 = rf"{self.product_name}(.*)([0-9]+[.-][0-9]+([.-][0-9]+)?)"
        # this matches patterns like:
        # product1.2.3
        # product 1.2.3
        # product-1.2.3
        # product.1.2.3
        # product version 1.2.3
        # product v1.2.3(1)

        version_strings = [
            i for i in matched_list
            if re.search(pattern1, i, re.IGNORECASE) if not i.endswith(
                ".debug")  # removes .debug, so, this does not gets printed
        ]
        LOGGER.debug(f"found version-string matches = {version_strings}"
                     )  # TODO: regex highlight in these matched strings?
        return version_strings

    def parse_filename(self, filename):
        """
        returns package_name/product_name from package_filename of types .rpm, .deb, etc.
        Example: package_filename = openssh-client_8.4p1-5ubuntu1_amd64.deb
            here, package_name = openssh-client
        """

        # resolving directory names
        if sys.platform == "win32":
            filename = filename.split("\\")[-1]
        else:
            filename = filename.split("/")[-1]

        # if extractable, then parsing for different types of files accordingly
        if self.extractor.can_extract(filename):
            if filename.endswith(".tar.xz"):
                product_name = filename.rsplit("-", 3)[0]
                version_number = filename.rsplit("-", 3)[1]
                # example: libarchive-3.5.1-1-aarch64.pkg.tar.xz
            elif filename.endswith(".deb") or filename.endswith(".ipk"):
                product_name = filename.rsplit("_")[0]
                version_number = filename.rsplit("_")[1]
                # example: varnish_6.4.0-3_amd64.deb
            else:
                product_name = filename.rsplit("-", 2)[0]
                version_number = filename.rsplit("-", 2)[1]

            LOGGER.debug(
                f"Parsing file '{self.filename}': Results: product_name='{product_name}', version_number='{version_number}'"
            )
            return product_name, version_number
        else:
            # raise error for unknown archive types
            with ErrorHandler(mode=ErrorMode.NoTrace, logger=LOGGER):
                raise UnknownArchiveType(filename)

    def find_vendor_product(self):
        """find vendor-product pairs from database"""

        LOGGER.debug(
            f"checking for product_name='{self.product_name}' and version_name='{self.version_number}' in the database"
        )

        CVEDB.db_open(self)
        cursor = self.connection.cursor()

        # finding out all distinct (vendor, product) pairs with the help of product_name
        query = """
            SELECT distinct vendor, product FROM cve_range
            WHERE product=(:product);
        """

        cursor.execute(query, {"product": self.product_name})
        data = cursor.fetchall()

        # checking if (vendor, product) was found in the database
        if data:
            # warning the user to select the vendor-product pairs manually if multiple pairs are found
            if len(data) != 1:
                LOGGER.warning(
                    textwrap.dedent(f"""
                            ===============================================================
                            Multiple ("vendor", "product") pairs found for "{self.product_name}"
                            Please manually select the appropriate pair.
                            ===============================================================
                        """))
            return data  # [('vendor', 'product')]
        else:
            if self.product_name:
                # removing numeric characters from the product_name
                if any(char.isdigit() for char in self.product_name):
                    LOGGER.debug(
                        f"removing digits from product_name={self.product_name}"
                    )
                    self.product_name = "".join(
                        filter(lambda x: not x.isdigit(), self.product_name))
                    return self.find_vendor_product()
                else:
                    # raise error and ask for product_name
                    LOGGER.warning(
                        textwrap.dedent(f"""
                                =================================================================
                                No match was found for "{self.product_name}" in database.
                                Please check your file or try specifying the "product_name" also.
                                =================================================================
                            """))
                    return []

        CVEDB.db_close(self)

    def output(self):
        """display beautiful output for Helper-Script"""

        self.CONSOLE.rule(
            f"[bold dark_magenta]{self.product_name.capitalize()}Checker")

        rprint(
            textwrap.dedent(f"""
                [bright_black]# Copyright (C) 2021 Intel Corporation
                # SPDX-License-Identifier: GPL-3.0-or-later[/]


                [yellow]\"\"\"
                CVE checker for {self.product_name}:

                <provide reference links here>
                \"\"\"[/]
                [magenta]from[/] cve_bin_tool.checkers [magenta]import[/] Checker


                [red]class[/] [blue]{(self.product_name).capitalize()}Checker[/](Checker):"""
                            ))

        # output: long human readable strings
        print("\tCONTAINS_PATTERNS = [")
        for common_strings in sorted(self.contains_patterns):
            if ".debug" in common_strings:
                rprint(
                    f'\t\t[red]r"{common_strings}"[/] <--- not recommended to use this form of strings'
                )
                continue  # without this, the else statement was getting printed ;-;
            if ".so" in common_strings:
                rprint(
                    f'\t\t[red]r"{common_strings}"[/] <--- not recommended to use this form of strings'
                )
            else:
                rprint(f'\t\t[green]r"{common_strings}"[/],')
        print("\t]")
        """
        Using filenames (containing patterns like '.so' etc.) in the binaries as VERSION_PATTERNS aren't ideal.
        The reason behind this is that these might depend on who packages the file (like it
        might work on fedora but not on ubuntu)
        """

        # output: filenames, that we search for binary strings
        print("\tFILENAME_PATTERNS = [")
        for filename in self.filename_pattern:
            if self.product_name == filename:
                rprint(
                    f'\t\t[cyan]r"{filename}"[/], <--- this is a really common filename pattern'
                )
            elif self.product_name in filename:
                if ".so" in filename:
                    rprint(f'\t\t[green]r"{filename}"[/],')
                else:
                    rprint(
                        f'\t\t[bright_green]r"{filename}"[/], <--- you could just use "{self.product_name}" to match this file'
                    )  # to single-handedly match filenames of type varnishd, varnishlog, varnishtop, etc.
            else:
                rprint(f'\t\t[green]r"{filename}"[/],')
        print("\t]")

        # output: version-strings
        print("\tVERSION_PATTERNS = [")
        for version_string in self.version_pattern:
            rprint(f'\t\t[green]r"{version_string}"[/],')
        print("\t]")

        # output: vendor-product pair
        print("\tVENDOR_PRODUCT = ", end="")
        rprint(self.vendor_product)

        self.CONSOLE.rule()