async def cache_update(self, session, url, sha, chunk_size=16 * 1024):
        """
        Update the cache for a single year of NVD data.
        """
        filename = url.split("/")[-1]
        # Ensure we only write to files within the cachedir
        filepath = os.path.abspath(os.path.join(self.cachedir, filename))
        if not filepath.startswith(os.path.abspath(self.cachedir)):
            with ErrorHandler(mode=self.error_mode, logger=self.LOGGER):
                raise AttemptedToWriteOutsideCachedir(filepath)
        # Validate the contents of the cached file
        if os.path.isfile(filepath):
            # Validate the sha and write out
            sha = sha.upper()
            calculate = hashlib.sha256()
            async with GzipFile(filepath, "rb") as f:
                chunk = await f.read(chunk_size)
                while chunk:
                    calculate.update(chunk)
                    chunk = await f.read(chunk_size)
            # Validate the sha and exit if it is correct, otherwise update
            gotsha = calculate.hexdigest().upper()
            if gotsha != sha:
                os.unlink(filepath)
                self.LOGGER.warning(
                    f"SHA mismatch for {filename} (have: {gotsha}, want: {sha})"
                )
            else:
                self.LOGGER.debug(f"Correct SHA for {filename}")
                return
        self.LOGGER.debug(f"Updating CVE cache for {filename}")

        async with session.get(url) as response:
            # Raise better error message on ratelimit by NVD
            if response.status == 403:
                with ErrorHandler(mode=self.error_mode, logger=self.LOGGER):
                    raise NVDRateLimit(
                        f"{url} : download failed, you may have been rate limited."
                    )
            # Raise for all other 4xx errors
            response.raise_for_status()
            gzip_data = await response.read()
        json_data = gzip.decompress(gzip_data)
        gotsha = hashlib.sha256(json_data).hexdigest().upper()
        async with FileIO(filepath, "wb") as filepath_handle:
            await filepath_handle.write(gzip_data)
        # Raise error if there was an issue with the sha
        if gotsha != sha:
            # Remove the file if there was an issue
            # exit(100)
            os.unlink(filepath)
            with ErrorHandler(mode=self.error_mode, logger=self.LOGGER):
                raise SHAMismatch(f"{url} (have: {gotsha}, want: {sha})")
Exemple #2
0
    def parse_data(self, fields: Set[str], data: Iterable) -> None:
        required_fields = {"vendor", "product", "version"}
        missing_fields = required_fields - fields
        if missing_fields != set():
            with ErrorHandler(mode=self.error_mode):
                raise MissingFieldsError(
                    f"{missing_fields} are required fields")

        for row in data:
            product_info = ProductInfo(row["vendor"].strip(),
                                       row["product"].strip(),
                                       row["version"].strip())
            self.parsed_data[product_info][row.get("cve_number", "").strip()
                                           or "default"] = {
                                               "remarks": Remarks(
                                                   str(row.get("remarks",
                                                               "")).strip()),
                                               "comments":
                                               row.get("comments", "").strip(),
                                               "severity":
                                               row.get("severity", "").strip(),
                                           }
            self.parsed_data[product_info]["paths"] = set(
                map(lambda x: x.strip(),
                    row.get("paths", "").split(",")))
Exemple #3
0
    def parse_filename(self, filename):
        """
        returns package_name/product_name from package_filename of types .rpm, .deb, etc.
        Example: package_filename = openssh-client_8.4p1-5ubuntu1_amd64.deb
            here, package_name = openssh-client
        """

        # resolving directory names
        if sys.platform == "win32":
            filename = filename.split("\\")[-1]
        else:
            filename = filename.split("/")[-1]

        # if extractable, then parsing for different types of files accordingly
        if self.extractor.can_extract(filename):
            if filename.endswith(".tar.xz"):
                product_name = filename.rsplit("-", 3)[0]
                version_number = filename.rsplit("-", 3)[1]
                # example: libarchive-3.5.1-1-aarch64.pkg.tar.xz
            elif filename.endswith(".deb") or filename.endswith(".ipk"):
                product_name = filename.rsplit("_")[0]
                version_number = filename.rsplit("_")[1]
                # example: varnish_6.4.0-3_amd64.deb
            else:
                product_name = filename.rsplit("-", 2)[0]
                version_number = filename.rsplit("-", 2)[1]

            LOGGER.debug(
                f"Parsing file '{self.filename}': Results: product_name='{product_name}', version_number='{version_number}'"
            )
            return product_name, version_number
        else:
            # raise error for unknown archive types
            with ErrorHandler(mode=ErrorMode.NoTrace, logger=LOGGER):
                raise UnknownArchiveType(filename)
Exemple #4
0
    def input_json(self) -> None:
        with open(self.filename) as json_file:
            json_data = json.load(json_file)
            if not json_data or not isinstance(json_data, list):
                with ErrorHandler(mode=self.error_mode):
                    raise InvalidJsonError(self.filename)

            self.parse_data(set(json_data[0].keys()), json_data)
Exemple #5
0
    def input_csv(self) -> None:
        with open(self.filename) as csv_file:
            csvdata = csv.DictReader(csv_file)
            if csvdata is None or csvdata.fieldnames is None:
                with ErrorHandler(mode=self.error_mode):
                    raise InvalidCsvError(self.filename)

            self.parse_data(set(csvdata.fieldnames), csvdata)
def main(argv=None):
    logger = LOGGER.getChild("CSV2CVE")
    argv = argv or sys.argv
    if len(argv) < 2:
        with ErrorHandler(logger=logger):
            raise InsufficientArgs("csv file required")

    flag = False
    for idx, arg in enumerate(argv):
        if arg.endswith(".csv"):
            argv[idx] = f"-i={arg}"
            flag = True
    if flag:
        return cli.main(argv)
    else:
        with ErrorHandler(logger=logger):
            raise InsufficientArgs("csv file required")
Exemple #7
0
 def parse_config(self) -> Mapping[str, Any]:
     if not os.path.isfile(self.filename):
         with ErrorHandler(mode=self.error_mode):
             raise FileNotFoundError(self.filename)
     if self.filename.endswith(".toml"):
         with open(self.filename) as f:
             raw_config_data = toml.load(f)
             self.config_data = ChainMap(*raw_config_data.values())
     elif self.filename.endswith(".yaml"):
         with open(self.filename) as f:
             raw_config_data = yaml.safe_load(f)
             self.config_data = ChainMap(*raw_config_data.values())
     else:
         with ErrorHandler(mode=self.error_mode):
             raise UnknownConfigType(
                 f"config file: {self.filename} is not supported.")
     return self.config_data
Exemple #8
0
 def parse_input(self) -> DefaultDict[ProductInfo, TriageData]:
     if not os.path.isfile(self.filename):
         with ErrorHandler(mode=self.error_mode):
             raise FileNotFoundError(self.filename)
     if self.filename.endswith(".csv"):
         self.input_csv()
     elif self.filename.endswith(".json"):
         self.input_json()
     return self.parsed_data
 def load_curl_version(self, version):
     """
     Return the dict of CVE data for the given curl version.
     """
     filename = os.path.join(
         self.cachedir, self.CURL_CVE_FILENAME_TEMPLATE.format(version))
     # Check if file exists
     if not os.path.isfile(filename):
         with ErrorHandler(mode=self.error_mode, logger=self.LOGGER):
             raise CVEDataForCurlVersionNotInCache(version)
     # Open the file and load the JSON data, log the number of CVEs loaded
     with open(filename, "rb") as fileobj:
         cves_for_version = json.load(fileobj)
         self.LOGGER.debug(
             f"Curl Version {version} has {len(cves_for_version)} CVEs in dataset"
         )
         return cves_for_version
Exemple #10
0
 def load_nvd_year(self, year):
     """
     Return the dict of CVE data for the given year.
     """
     filename = os.path.join(self.cachedir,
                             self.NVDCVE_FILENAME_TEMPLATE.format(year))
     # Check if file exists
     if not os.path.isfile(filename):
         with ErrorHandler(mode=self.error_mode, logger=self.LOGGER):
             raise CVEDataForYearNotInCache(year)
     # Open the file and load the JSON data, log the number of CVEs loaded
     with gzip.open(filename, "rb") as fileobj:
         cves_for_year = json.load(fileobj)
         self.LOGGER.debug(
             f'Year {year} has {len(cves_for_year["CVE_Items"])} CVEs in dataset'
         )
         return cves_for_year
Exemple #11
0
def main(argv=None):
    """Scan a binary file for certain open source libraries that may have CVEs"""
    argv = argv or sys.argv

    # Reset logger level to info
    LOGGER.setLevel(logging.INFO)

    parser = argparse.ArgumentParser(
        prog="cve-bin-tool",
        description=textwrap.dedent("""
            The CVE Binary Tool scans for a number of common, vulnerable open source
            components (openssl, libpng, libxml2, expat and a few others) to let you know
            if a given directory or binary file includes common libraries with known
            vulnerabilities.
            """),
        epilog=textwrap.fill(
            f'Available checkers: {", ".join(VersionScanner.available_checkers())}'
        ) + "\n\nPlease disclose issues responsibly!",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    input_group = parser.add_argument_group("Input")
    input_group.add_argument("directory",
                             help="directory to scan",
                             nargs="?",
                             default=None)
    input_group.add_argument(
        "-e",
        "--exclude",
        action=StringToListAction,
        help="Comma separated Exclude directory path",
        default=None,
    )

    input_group.add_argument(
        "-i",
        "--input-file",
        action="store",
        default="",
        help="provide input filename",
    )
    input_group.add_argument("-C",
                             "--config",
                             action="store",
                             default="",
                             help="provide config file")

    output_group = parser.add_argument_group("Output")
    output_group.add_argument("-q",
                              "--quiet",
                              action="store_true",
                              help="suppress output")
    output_group.add_argument(
        "-l",
        "--log",
        help="log level (default: info)",
        dest="log_level",
        action="store",
        choices=["debug", "info", "warning", "error", "critical"],
    )
    output_group.add_argument(
        "-o",
        "--output-file",
        action="store",
        help="provide output filename (default: output to stdout)",
    )
    output_group.add_argument(
        "--html-theme",
        action="store",
        help="provide custom theme directory for HTML Report",
    )
    output_group.add_argument(
        "-f",
        "--format",
        action="store",
        choices=["csv", "json", "console", "html", "pdf"],
        help="update output format (default: console)",
    )
    output_group.add_argument(
        "-c",
        "--cvss",
        action="store",
        help=
        "minimum CVSS score (as integer in range 0 to 10) to report (default: 0)",
    )
    output_group.add_argument(
        "-S",
        "--severity",
        action="store",
        choices=["low", "medium", "high", "critical"],
        help="minimum CVE severity to report (default: low)",
    )
    parser.add_argument("-V", "--version", action="version", version=VERSION)
    parser.add_argument(
        "-u",
        "--update",
        action="store",
        choices=["now", "daily", "never", "latest"],
        help="update schedule for NVD database (default: daily)",
    )
    parser.add_argument(
        "-x",
        "--extract",
        action="store_true",
        help="autoextract compressed files",
    )
    parser.add_argument(
        "--disable-version-check",
        action="store_true",
        help="skips checking for a new version",
    )

    checker_group = parser.add_argument_group("Checkers")
    checker_group.add_argument(
        "-s",
        "--skips",
        dest="skips",
        action=StringToListAction,
        type=str,
        help="comma-separated list of checkers to disable",
    )
    checker_group.add_argument(
        "-r",
        "--runs",
        dest="runs",
        action=StringToListAction,
        type=str,
        help="comma-separated list of checkers to enable",
    )
    defaults = {
        "directory": "",
        "exclude": [],
        "input_file": "",
        "log_level": "info",
        "format": "console",
        "cvss": 0,
        "severity": "low",
        "update": "daily",
        "extract": True,
        "disable_version_check": False,
        "skips": "",
        "runs": "",
        "quiet": False,
        "output_file": "",
        "html_theme": "",
    }

    with ErrorHandler(mode=ErrorMode.NoTrace):
        raw_args = parser.parse_args(argv[1:])
        args = {key: value for key, value in vars(raw_args).items() if value}

    configs = {}
    if args.get("config"):
        conf = ConfigParser(args["config"])
        configs = conf.parse_config()

    args = ChainMap(args, configs, defaults)

    # logging and error related settings
    if args["log_level"]:
        LOGGER.setLevel(args["log_level"].upper())

    if args["quiet"]:
        LOGGER.setLevel(logging.CRITICAL)

    if 0 < LOGGER.level <= 10:
        error_mode = ErrorMode.FullTrace
    elif LOGGER.level >= 50:
        error_mode = ErrorMode.NoTrace
    else:
        error_mode = ErrorMode.TruncTrace

    if platform.system() != "Linux":
        warning_nolinux = """
                          **********************************************
                          Warning: this utility was developed for Linux.
                          You may need to install additional utilities
                          to use it on other operating systems.
                          **********************************************
                          """
        LOGGER.warning(warning_nolinux)

    # Database update related settings
    # Connect to the database
    cvedb_orig = CVEDB(version_check=not args["disable_version_check"],
                       error_mode=error_mode)

    # if OLD_CACHE_DIR (from cvedb.py) exists, print warning
    if os.path.exists(OLD_CACHE_DIR):
        LOGGER.warning(
            f"Obsolete cache dir {OLD_CACHE_DIR} is no longer needed and can be removed."
        )

    # Clear data if -u now is set
    if args["update"] == "now":
        cvedb_orig.clear_cached_data()

    if args["update"] == "latest":
        cvedb_orig.refresh_cache_and_update_db()

    # update db if needed
    if args["update"] != "never":
        cvedb_orig.get_cvelist_if_stale()
    else:
        LOGGER.warning("Not verifying CVE DB cache")
        if not cvedb_orig.nvd_years():
            with ErrorHandler(mode=error_mode, logger=LOGGER):
                raise EmptyCache(cvedb_orig.cachedir)

    # CVE Database validation
    if not cvedb_orig.check_cve_entries():
        with ErrorHandler(mode=error_mode, logger=LOGGER):
            raise CVEDataMissing("No data in CVE Database")

    # Input validation
    if not args["directory"] and not args["input_file"]:
        parser.print_usage()
        with ErrorHandler(logger=LOGGER, mode=ErrorMode.NoTrace):
            raise InsufficientArgs(
                "Please specify a directory to scan or an input file required")

    if args["directory"] and not os.path.exists(args["directory"]):
        parser.print_usage()
        with ErrorHandler(logger=LOGGER, mode=ErrorMode.NoTrace):
            raise FileNotFoundError("Directory/File doesn't exist")

    # Checkers related settings
    skips = args["skips"]
    if args["runs"]:
        runs = args["runs"]
        skips = list(
            map(
                lambda checker: checker.name,
                filter(
                    lambda checker: checker.name not in runs,
                    pkg_resources.iter_entry_points("cve_bin_tool.checker"),
                ),
            ))

    # CSVScanner related settings
    score = 0
    if args["severity"]:
        # Set minimum CVSS score based on severity
        cvss_score = {"low": 0, "medium": 4, "high": 7, "critical": 9}
        score = cvss_score[args["severity"]]
    if int(args["cvss"]) > 0:
        score = int(args["cvss"])

    with CVEScanner(score=score) as cve_scanner:
        triage_data: TriageData
        total_files: int = 0
        parsed_data: Dict[ProductInfo, TriageData] = {}

        if args["input_file"]:
            input_engine = InputEngine(args["input_file"],
                                       logger=LOGGER,
                                       error_mode=error_mode)
            parsed_data = input_engine.parse_input()
            if not args["directory"]:
                for product_info, triage_data in parsed_data.items():
                    LOGGER.warning(f"{product_info}, {triage_data}")
                    cve_scanner.get_cves(product_info, triage_data)
        if args["directory"]:
            version_scanner = VersionScanner(
                should_extract=args["extract"],
                exclude_folders=args["exclude"],
                error_mode=error_mode,
            )
            version_scanner.remove_skiplist(skips)
            version_scanner.print_checkers()
            for scan_info in version_scanner.recursive_scan(args["directory"]):
                if scan_info:
                    product_info, path = scan_info
                    LOGGER.debug(f"{product_info}: {path}")
                    triage_data = parsed_data.get(product_info,
                                                  {"default": {}})
                    # Ignore paths from triage_data if we are scanning directory
                    triage_data["paths"] = {path}
                    cve_scanner.get_cves(product_info, triage_data)
            total_files = version_scanner.total_scanned_files

        LOGGER.info("")
        LOGGER.info("Overall CVE summary: ")
        if args["input_file"]:
            LOGGER.info(
                f"There are {cve_scanner.products_with_cve} products with known CVEs detected"
            )
        else:
            LOGGER.info(
                f"There are {cve_scanner.products_with_cve} files with known CVEs detected"
            )
        if cve_scanner.products_with_cve > 0 or (args["format"] == "html"
                                                 or args["format"] == "pdf"):
            affected_string = ", ".join(
                map(
                    lambda product_version: "".join(str(product_version)),
                    cve_scanner.affected(),
                ))
            LOGGER.info(f"Known CVEs in {affected_string}:")

            # Creates a Object for OutputEngine
            output = OutputEngine(
                all_cve_data=cve_scanner.all_cve_data,
                scanned_dir=args["directory"],
                filename=args["output_file"],
                themes_dir=args["html_theme"],
                products_with_cve=cve_scanner.products_with_cve,
                products_without_cve=cve_scanner.products_without_cve,
                total_files=total_files,
            )

            if not args["quiet"]:
                output.output_file(args["format"])

        # Use the number of products with known cves as error code
        # as requested by folk planning to automate use of this script.
        # If no cves found, then the program exits cleanly.
        return cve_scanner.products_with_cve
    def check_file(self):
        input_file = self.input_file
        error_mode = self.error_mode

        if not isfile(input_file):
            with ErrorHandler(mode=error_mode):
                raise FileNotFoundError(input_file)

        if getsize(input_file) == 0:
            with ErrorHandler(mode=error_mode):
                raise EmptyTxtError(input_file)

        if not input_file.endswith(".txt"):
            with ErrorHandler(mode=error_mode):
                raise InvalidListError(
                    "Invalid Package list file format (should be .txt)")

        if not input_file.endswith("requirements.txt"):
            if distro.id() not in SUPPORTED_DISTROS:
                LOGGER.warning(
                    f"Package list support only available for {','.join(SUPPORTED_DISTROS)}!"
                )
                with ErrorHandler(mode=error_mode):
                    raise InvalidListError(
                        f"{distro.id().capitalize()} is not supported")

            elif distro.id() in DEB_DISTROS:
                # Simulate installation on Debian based system using apt-get to check if the file is valid
                output = run(
                    ["xargs", "-a", input_file, "apt-get", "install", "-s"],
                    capture_output=True,
                )

                if output.returncode != 0:
                    invalid_packages = re.findall(
                        r"E: Unable to locate package (.+)",
                        output.stderr.decode("utf-8"),
                    )
                    LOGGER.warning(
                        f"Invalid Package found: {','.join(invalid_packages)}")
            elif distro.id() in RPM_DISTROS:
                output = run(
                    ["xargs", "-a", input_file, "rpm", "-qi"],
                    capture_output=True,
                )

                not_installed_packages = re.findall(
                    r"package (.+) is not installed",
                    output.stdout.decode("utf-8"))
                if not_installed_packages:
                    LOGGER.warning(
                        f"The packages {','.join(not_installed_packages)} seems to be not installed.\nIt is either an invalid package or not installed.\nUse `sudo yum install $(cat package-list)` to install all packages"
                    )
            elif distro.id() in PACMAN_DISTROS:
                output = run(
                    ["xargs", "-a", input_file, "pacman", "-Qk"],
                    capture_output=True,
                )

                not_installed_packages = re.findall(
                    r"error: package '(.+)' was not found",
                    output.stderr.decode("utf-8"),
                )

                if not_installed_packages:
                    LOGGER.warning(
                        f"The packages {','.join(not_installed_packages)} seems to be not installed.\nIt is either an invalid package or not installed.\nUse `sudo pacman -S $(cat package-list)` to install all packages"
                    )
            else:
                # TODO: Replace below error handling with a proper pip install dry run
                # See: https://github.com/pypa/pip/issues/53
                with ErrorHandler(mode=error_mode):
                    raise InvalidListError("Invalid Package list")
Exemple #13
0
def main(argv=None):

    argv = argv or sys.argv

    parser = argparse.ArgumentParser(
        prog="helper-script",
        description=textwrap.dedent("""
                Helps contributors who want to write a new cve-bin-tool checker find common filenames,
                version strings, and other necessary data for building a binary checker
                """),
    )
    # scan directory args
    parser.add_argument(
        "filenames",
        help="files to scan",
        nargs="+",
        default=[],
    )

    # product-name args
    parser.add_argument(
        "-p",
        "--product",
        help="provide product-name that would be searched",
        dest="product_name",
        action="store",
        default=None,
    )

    # version-name args
    parser.add_argument(
        "-v",
        "--version",
        help="provide version that would be searched",
        dest="version_number",
        action="store",
        default=None,
    )

    # log level args
    parser.add_argument(
        "-l",
        "--log",
        help="log level (default: warning)",
        dest="log_level",
        action="store",
        choices=["debug", "info", "warning", "error", "critical"],
        default="warning",
    )

    # contains-patterns string length args
    parser.add_argument(
        "--string-length",
        help=
        "changes the output string-length for CONTAINS_PATTERNS (default: %(default)s)",
        type=int,
        action="store",
        default=40,
    )

    with ErrorHandler(mode=ErrorMode.NoTrace):
        raw_args = parser.parse_args(argv[1:])
        args = {key: value for key, value in vars(raw_args).items() if value}
        defaults = {key: parser.get_default(key) for key in vars(raw_args)}

    args = ChainMap(args, defaults)

    LOGGER.setLevel(args["log_level"].upper())

    LOGGER.debug(f"Given filenames: {args['filenames']}")
    LOGGER.info(f"Scanning only the first filename: '{args['filenames'][0]}'")
    hs = HelperScript(
        args["filenames"][0],
        product_name=args["product_name"],
        version_number=args["version_number"],
        string_length=args["string_length"],
    )

    # Parsing, Extracting and Searching for version-strings
    hs.extract_and_parse_file(args["filenames"][0])

    # output on console
    hs.output()