Esempio n. 1
0
    def __init__(self,
                 filename,
                 product_name=None,
                 version_number=None,
                 string_length=40):
        self.filename = filename
        self.extractor = Extractor()
        self.product_name, self.version_number = self.parse_filename(filename)
        if product_name:
            self.product_name = product_name
        if version_number:
            self.version_number = version_number
        self.string_length = string_length

        # for setting the database
        self.connection = None
        self.dbpath = os.path.join(DISK_LOCATION_DEFAULT, DBNAME)

        # for extraction
        self.walker = DirWalk().walk

        # for output (would use in future)
        self.contains_patterns = []
        self.filename_pattern = []
        self.version_pattern = []
        self.vendor_product = self.find_vendor_product()

        # for scanning files versions
        self.version_scanner = VersionScanner()
Esempio n. 2
0
 def test_exclude(self, caplog):
     """ Test that the exclude paths are not scanned """
     test_path = os.path.abspath(os.path.dirname(__file__))
     exclude_path = os.path.join(test_path, "assets/")
     checkers = list(VersionScanner().checkers.keys())
     with caplog.at_level(logging.INFO):
         main(["cve-bin-tool", test_path, "-e", ",".join(exclude_path)])
     self.check_exclude_log(caplog, exclude_path, checkers)
Esempio n. 3
0
 def setup_class(cls):
     cls.cvedb = CVEDB()
     if os.getenv("UPDATE_DB") == "1":
         cls.cvedb.get_cvelist_if_stale()
     else:
         print("Skip NVD database updates.")
     # Instantiate a scanner
     cls.scanner = VersionScanner(should_extract=True)
     # temp dir for mapping tests
     cls.mapping_test_dir = tempfile.mkdtemp(prefix="mapping-test-")
     # temp dir for tests that require downloads
     cls.package_test_dir = tempfile.mkdtemp(prefix="package_test-")
Esempio n. 4
0
def main(argv=None):
    """Scan a binary file for certain open source libraries that may have CVEs"""
    argv = argv or sys.argv

    # Reset logger level to info
    LOGGER.setLevel(logging.INFO)

    parser = argparse.ArgumentParser(
        prog="cve-bin-tool",
        description=textwrap.dedent("""
            The CVE Binary Tool scans for a number of common, vulnerable open source
            components (openssl, libpng, libxml2, expat and a few others) to let you know
            if a given directory or binary file includes common libraries with known
            vulnerabilities.
            """),
        epilog=textwrap.fill(
            f'Available checkers: {", ".join(VersionScanner.available_checkers())}'
        ) + "\n\nPlease disclose issues responsibly!",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    input_group = parser.add_argument_group("Input")
    input_group.add_argument("directory",
                             help="directory to scan",
                             nargs="?",
                             default=None)
    input_group.add_argument(
        "-e",
        "--exclude",
        action=StringToListAction,
        help="Comma separated Exclude directory path",
        default=None,
    )

    input_group.add_argument(
        "-i",
        "--input-file",
        action="store",
        default="",
        help="provide input filename",
    )
    input_group.add_argument("-C",
                             "--config",
                             action="store",
                             default="",
                             help="provide config file")

    output_group = parser.add_argument_group("Output")
    output_group.add_argument("-q",
                              "--quiet",
                              action="store_true",
                              help="suppress output")
    output_group.add_argument(
        "-l",
        "--log",
        help="log level (default: info)",
        dest="log_level",
        action="store",
        choices=["debug", "info", "warning", "error", "critical"],
    )
    output_group.add_argument(
        "-o",
        "--output-file",
        action="store",
        help="provide output filename (default: output to stdout)",
    )
    output_group.add_argument(
        "--html-theme",
        action="store",
        help="provide custom theme directory for HTML Report",
    )
    output_group.add_argument(
        "-f",
        "--format",
        action="store",
        choices=["csv", "json", "console", "html", "pdf"],
        help="update output format (default: console)",
    )
    output_group.add_argument(
        "-c",
        "--cvss",
        action="store",
        help=
        "minimum CVSS score (as integer in range 0 to 10) to report (default: 0)",
    )
    output_group.add_argument(
        "-S",
        "--severity",
        action="store",
        choices=["low", "medium", "high", "critical"],
        help="minimum CVE severity to report (default: low)",
    )
    parser.add_argument("-V", "--version", action="version", version=VERSION)
    parser.add_argument(
        "-u",
        "--update",
        action="store",
        choices=["now", "daily", "never", "latest"],
        help="update schedule for NVD database (default: daily)",
    )
    parser.add_argument(
        "-x",
        "--extract",
        action="store_true",
        help="autoextract compressed files",
    )
    parser.add_argument(
        "--disable-version-check",
        action="store_true",
        help="skips checking for a new version",
    )

    checker_group = parser.add_argument_group("Checkers")
    checker_group.add_argument(
        "-s",
        "--skips",
        dest="skips",
        action=StringToListAction,
        type=str,
        help="comma-separated list of checkers to disable",
    )
    checker_group.add_argument(
        "-r",
        "--runs",
        dest="runs",
        action=StringToListAction,
        type=str,
        help="comma-separated list of checkers to enable",
    )
    defaults = {
        "directory": "",
        "exclude": [],
        "input_file": "",
        "log_level": "info",
        "format": "console",
        "cvss": 0,
        "severity": "low",
        "update": "daily",
        "extract": True,
        "disable_version_check": False,
        "skips": "",
        "runs": "",
        "quiet": False,
        "output_file": "",
        "html_theme": "",
    }

    with ErrorHandler(mode=ErrorMode.NoTrace):
        raw_args = parser.parse_args(argv[1:])
        args = {key: value for key, value in vars(raw_args).items() if value}

    configs = {}
    if args.get("config"):
        conf = ConfigParser(args["config"])
        configs = conf.parse_config()

    args = ChainMap(args, configs, defaults)

    # logging and error related settings
    if args["log_level"]:
        LOGGER.setLevel(args["log_level"].upper())

    if args["quiet"]:
        LOGGER.setLevel(logging.CRITICAL)

    if 0 < LOGGER.level <= 10:
        error_mode = ErrorMode.FullTrace
    elif LOGGER.level >= 50:
        error_mode = ErrorMode.NoTrace
    else:
        error_mode = ErrorMode.TruncTrace

    if platform.system() != "Linux":
        warning_nolinux = """
                          **********************************************
                          Warning: this utility was developed for Linux.
                          You may need to install additional utilities
                          to use it on other operating systems.
                          **********************************************
                          """
        LOGGER.warning(warning_nolinux)

    # Database update related settings
    # Connect to the database
    cvedb_orig = CVEDB(version_check=not args["disable_version_check"],
                       error_mode=error_mode)

    # if OLD_CACHE_DIR (from cvedb.py) exists, print warning
    if os.path.exists(OLD_CACHE_DIR):
        LOGGER.warning(
            f"Obsolete cache dir {OLD_CACHE_DIR} is no longer needed and can be removed."
        )

    # Clear data if -u now is set
    if args["update"] == "now":
        cvedb_orig.clear_cached_data()

    if args["update"] == "latest":
        cvedb_orig.refresh_cache_and_update_db()

    # update db if needed
    if args["update"] != "never":
        cvedb_orig.get_cvelist_if_stale()
    else:
        LOGGER.warning("Not verifying CVE DB cache")
        if not cvedb_orig.nvd_years():
            with ErrorHandler(mode=error_mode, logger=LOGGER):
                raise EmptyCache(cvedb_orig.cachedir)

    # CVE Database validation
    if not cvedb_orig.check_cve_entries():
        with ErrorHandler(mode=error_mode, logger=LOGGER):
            raise CVEDataMissing("No data in CVE Database")

    # Input validation
    if not args["directory"] and not args["input_file"]:
        parser.print_usage()
        with ErrorHandler(logger=LOGGER, mode=ErrorMode.NoTrace):
            raise InsufficientArgs(
                "Please specify a directory to scan or an input file required")

    if args["directory"] and not os.path.exists(args["directory"]):
        parser.print_usage()
        with ErrorHandler(logger=LOGGER, mode=ErrorMode.NoTrace):
            raise FileNotFoundError("Directory/File doesn't exist")

    # Checkers related settings
    skips = args["skips"]
    if args["runs"]:
        runs = args["runs"]
        skips = list(
            map(
                lambda checker: checker.name,
                filter(
                    lambda checker: checker.name not in runs,
                    pkg_resources.iter_entry_points("cve_bin_tool.checker"),
                ),
            ))

    # CSVScanner related settings
    score = 0
    if args["severity"]:
        # Set minimum CVSS score based on severity
        cvss_score = {"low": 0, "medium": 4, "high": 7, "critical": 9}
        score = cvss_score[args["severity"]]
    if int(args["cvss"]) > 0:
        score = int(args["cvss"])

    with CVEScanner(score=score) as cve_scanner:
        triage_data: TriageData
        total_files: int = 0
        parsed_data: Dict[ProductInfo, TriageData] = {}

        if args["input_file"]:
            input_engine = InputEngine(args["input_file"],
                                       logger=LOGGER,
                                       error_mode=error_mode)
            parsed_data = input_engine.parse_input()
            if not args["directory"]:
                for product_info, triage_data in parsed_data.items():
                    LOGGER.warning(f"{product_info}, {triage_data}")
                    cve_scanner.get_cves(product_info, triage_data)
        if args["directory"]:
            version_scanner = VersionScanner(
                should_extract=args["extract"],
                exclude_folders=args["exclude"],
                error_mode=error_mode,
            )
            version_scanner.remove_skiplist(skips)
            version_scanner.print_checkers()
            for scan_info in version_scanner.recursive_scan(args["directory"]):
                if scan_info:
                    product_info, path = scan_info
                    LOGGER.debug(f"{product_info}: {path}")
                    triage_data = parsed_data.get(product_info,
                                                  {"default": {}})
                    # Ignore paths from triage_data if we are scanning directory
                    triage_data["paths"] = {path}
                    cve_scanner.get_cves(product_info, triage_data)
            total_files = version_scanner.total_scanned_files

        LOGGER.info("")
        LOGGER.info("Overall CVE summary: ")
        if args["input_file"]:
            LOGGER.info(
                f"There are {cve_scanner.products_with_cve} products with known CVEs detected"
            )
        else:
            LOGGER.info(
                f"There are {cve_scanner.products_with_cve} files with known CVEs detected"
            )
        if cve_scanner.products_with_cve > 0 or (args["format"] == "html"
                                                 or args["format"] == "pdf"):
            affected_string = ", ".join(
                map(
                    lambda product_version: "".join(str(product_version)),
                    cve_scanner.affected(),
                ))
            LOGGER.info(f"Known CVEs in {affected_string}:")

            # Creates a Object for OutputEngine
            output = OutputEngine(
                all_cve_data=cve_scanner.all_cve_data,
                scanned_dir=args["directory"],
                filename=args["output_file"],
                themes_dir=args["html_theme"],
                products_with_cve=cve_scanner.products_with_cve,
                products_without_cve=cve_scanner.products_without_cve,
                total_files=total_files,
            )

            if not args["quiet"]:
                output.output_file(args["format"])

        # Use the number of products with known cves as error code
        # as requested by folk planning to automate use of this script.
        # If no cves found, then the program exits cleanly.
        return cve_scanner.products_with_cve
Esempio n. 5
0
class HelperScript:
    """Helps contributors who want to write a new cve-bin-tool checker find common filenames, version strings, and other necessary data for building a binary checker"""

    CONSOLE = Console()
    LOGGER = LOGGER.getChild("HelperScript")

    def __init__(self,
                 filename,
                 product_name=None,
                 version_number=None,
                 string_length=40):
        self.filename = filename
        self.extractor = Extractor()
        self.product_name, self.version_number = self.parse_filename(filename)
        if product_name:
            self.product_name = product_name
        if version_number:
            self.version_number = version_number
        self.string_length = string_length

        # for setting the database
        self.connection = None
        self.dbpath = os.path.join(DISK_LOCATION_DEFAULT, DBNAME)

        # for extraction
        self.walker = DirWalk().walk

        # for output (would use in future)
        self.contains_patterns = []
        self.filename_pattern = []
        self.version_pattern = []
        self.vendor_product = self.find_vendor_product()

        # for scanning files versions
        self.version_scanner = VersionScanner()

    def extract_and_parse_file(self, filename):
        """extracts and parses the file for common patterns, version strings and common filename patterns"""

        with self.extractor as ectx:
            if ectx.can_extract(filename):
                binary_string_list = []
                for filepath in self.walker([ectx.extract(filename)]):
                    clean_path = self.version_scanner.clean_file_path(filepath)
                    LOGGER.debug(f"checking whether {clean_path} is binary")

                    # see if the file is ELF binary file and parse for strings
                    is_exec = self.version_scanner.is_executable(filepath)[0]
                    if is_exec:
                        LOGGER.debug(
                            f"{clean_path} <--- this is an ELF binary")
                        file_content = self.version_scanner.parse_strings(
                            filepath)

                        matches = self.search_pattern(file_content,
                                                      self.product_name)

                        # searching for version strings in the found matches
                        version_string = self.search_version_string(matches)
                        self.version_pattern += version_string

                        # if version string is found in file, append it to filename_pattern
                        if version_string:
                            if sys.platform == "win32":
                                self.filename_pattern.append(
                                    filepath.split("\\")[-1])
                            else:
                                self.filename_pattern.append(
                                    filepath.split("/")[-1])
                            LOGGER.info(
                                f"matches for {self.product_name} found in {clean_path}"
                            )

                            binary_string_list += matches

                            for i in matches:
                                if ("/" not in i and "!" not in i
                                    ) and len(i) > self.string_length:
                                    self.contains_patterns.append(i)

                        LOGGER.debug(f"{self.filename_pattern}")

                # to resolve case when there are no strings common with product_name in them
                if self.contains_patterns:
                    return self.contains_patterns
                return binary_string_list

    def search_pattern(self, file_content, pattern):
        """find strings for CONTAINS_PATTERNS with product_name in them"""

        file_content_list = file_content.split("\n")
        matches = [
            i.strip() for i in file_content_list
            if re.search(pattern, i, re.IGNORECASE)
        ]
        LOGGER.debug(f"found matches = {matches}"
                     )  # TODO: regex highlight in these matched strings?
        return matches

    def search_version_string(self, matched_list):
        """finds version strings from matched list"""

        # TODO: add multiline string finding

        pattern1 = rf"{self.product_name}(.*){self.version_number}"
        # ^ this does not work for debian packages

        # pattern2 = rf"{self.product_name}(.*)([0-9]+[.-][0-9]+([.-][0-9]+)?)"
        # this matches patterns like:
        # product1.2.3
        # product 1.2.3
        # product-1.2.3
        # product.1.2.3
        # product version 1.2.3
        # product v1.2.3(1)

        version_strings = [
            i for i in matched_list
            if re.search(pattern1, i, re.IGNORECASE) if not i.endswith(
                ".debug")  # removes .debug, so, this does not gets printed
        ]
        LOGGER.debug(f"found version-string matches = {version_strings}"
                     )  # TODO: regex highlight in these matched strings?
        return version_strings

    def parse_filename(self, filename):
        """
        returns package_name/product_name from package_filename of types .rpm, .deb, etc.
        Example: package_filename = openssh-client_8.4p1-5ubuntu1_amd64.deb
            here, package_name = openssh-client
        """

        # resolving directory names
        if sys.platform == "win32":
            filename = filename.split("\\")[-1]
        else:
            filename = filename.split("/")[-1]

        # if extractable, then parsing for different types of files accordingly
        if self.extractor.can_extract(filename):
            if filename.endswith(".tar.xz"):
                product_name = filename.rsplit("-", 3)[0]
                version_number = filename.rsplit("-", 3)[1]
                # example: libarchive-3.5.1-1-aarch64.pkg.tar.xz
            elif filename.endswith(".deb") or filename.endswith(".ipk"):
                product_name = filename.rsplit("_")[0]
                version_number = filename.rsplit("_")[1]
                # example: varnish_6.4.0-3_amd64.deb
            else:
                product_name = filename.rsplit("-", 2)[0]
                version_number = filename.rsplit("-", 2)[1]

            LOGGER.debug(
                f"Parsing file '{self.filename}': Results: product_name='{product_name}', version_number='{version_number}'"
            )
            return product_name, version_number
        else:
            # raise error for unknown archive types
            with ErrorHandler(mode=ErrorMode.NoTrace, logger=LOGGER):
                raise UnknownArchiveType(filename)

    def find_vendor_product(self):
        """find vendor-product pairs from database"""

        LOGGER.debug(
            f"checking for product_name='{self.product_name}' and version_name='{self.version_number}' in the database"
        )

        CVEDB.db_open(self)
        cursor = self.connection.cursor()

        # finding out all distinct (vendor, product) pairs with the help of product_name
        query = """
            SELECT distinct vendor, product FROM cve_range
            WHERE product=(:product);
        """

        cursor.execute(query, {"product": self.product_name})
        data = cursor.fetchall()

        # checking if (vendor, product) was found in the database
        if data:
            # warning the user to select the vendor-product pairs manually if multiple pairs are found
            if len(data) != 1:
                LOGGER.warning(
                    textwrap.dedent(f"""
                            ===============================================================
                            Multiple ("vendor", "product") pairs found for "{self.product_name}"
                            Please manually select the appropriate pair.
                            ===============================================================
                        """))
            return data  # [('vendor', 'product')]
        else:
            if self.product_name:
                # removing numeric characters from the product_name
                if any(char.isdigit() for char in self.product_name):
                    LOGGER.debug(
                        f"removing digits from product_name={self.product_name}"
                    )
                    self.product_name = "".join(
                        filter(lambda x: not x.isdigit(), self.product_name))
                    return self.find_vendor_product()
                else:
                    # raise error and ask for product_name
                    LOGGER.warning(
                        textwrap.dedent(f"""
                                =================================================================
                                No match was found for "{self.product_name}" in database.
                                Please check your file or try specifying the "product_name" also.
                                =================================================================
                            """))
                    return []

        CVEDB.db_close(self)

    def output(self):
        """display beautiful output for Helper-Script"""

        self.CONSOLE.rule(
            f"[bold dark_magenta]{self.product_name.capitalize()}Checker")

        rprint(
            textwrap.dedent(f"""
                [bright_black]# Copyright (C) 2021 Intel Corporation
                # SPDX-License-Identifier: GPL-3.0-or-later[/]


                [yellow]\"\"\"
                CVE checker for {self.product_name}:

                <provide reference links here>
                \"\"\"[/]
                [magenta]from[/] cve_bin_tool.checkers [magenta]import[/] Checker


                [red]class[/] [blue]{(self.product_name).capitalize()}Checker[/](Checker):"""
                            ))

        # output: long human readable strings
        print("\tCONTAINS_PATTERNS = [")
        for common_strings in sorted(self.contains_patterns):
            if ".debug" in common_strings:
                rprint(
                    f'\t\t[red]r"{common_strings}"[/] <--- not recommended to use this form of strings'
                )
                continue  # without this, the else statement was getting printed ;-;
            if ".so" in common_strings:
                rprint(
                    f'\t\t[red]r"{common_strings}"[/] <--- not recommended to use this form of strings'
                )
            else:
                rprint(f'\t\t[green]r"{common_strings}"[/],')
        print("\t]")
        """
        Using filenames (containing patterns like '.so' etc.) in the binaries as VERSION_PATTERNS aren't ideal.
        The reason behind this is that these might depend on who packages the file (like it
        might work on fedora but not on ubuntu)
        """

        # output: filenames, that we search for binary strings
        print("\tFILENAME_PATTERNS = [")
        for filename in self.filename_pattern:
            if self.product_name == filename:
                rprint(
                    f'\t\t[cyan]r"{filename}"[/], <--- this is a really common filename pattern'
                )
            elif self.product_name in filename:
                if ".so" in filename:
                    rprint(f'\t\t[green]r"{filename}"[/],')
                else:
                    rprint(
                        f'\t\t[bright_green]r"{filename}"[/], <--- you could just use "{self.product_name}" to match this file'
                    )  # to single-handedly match filenames of type varnishd, varnishlog, varnishtop, etc.
            else:
                rprint(f'\t\t[green]r"{filename}"[/],')
        print("\t]")

        # output: version-strings
        print("\tVERSION_PATTERNS = [")
        for version_string in self.version_pattern:
            rprint(f'\t\t[green]r"{version_string}"[/],')
        print("\t]")

        # output: vendor-product pair
        print("\tVENDOR_PRODUCT = ", end="")
        rprint(self.vendor_product)

        self.CONSOLE.rule()