Ejemplo n.º 1
0
    def download_file(url, download_directory):
        """Download a remote file

        Args:
            download_directory: (string)

        Returns:
            (string) that path of the file that was just downloaded. If something failed during
                download, return None

        Raises:
            DownloadError
        """
        Output.print_information("Downloading " + url + " ...")

        parsed_url = urlparse(url)
        if parsed_url.path in ["/", ""]:
            file_name = parsed_url.netloc
        else:
            file_name = parsed_url.path.split("/")[-1]
        download_path = abspath(join(download_directory, file_name))

        try:
            with open(download_path, 'wb') as file_object:
                file_object.write(urlopen(url).read())
                return download_path

        except HTTPError as expn:
            raise DownloadError("HTTP error code " + str(expn.code) + " while retrieving " \
             + url + "\n" + str(expn.reason))
        except URLError as expn:
            raise DownloadError("HTTP URL error while retrieving " + url + "\n" + str(expn.reason))
        except Exception as expn:
            raise DownloadError("Unable to retrieve " + url + "\n" + str(expn))
Ejemplo n.º 2
0
    def extract_by_library(library, archive_path, display_path, output_directory):
        """Extracts the given archive file to the output directory using the given library

        Args:
            library: (module)
            archive_path: (string) physical path of file on the hardware
            display_path: (string) file path that should be displayed to the user
            output_directory: (string)

        Returns:
            None

        Raises:
            ExtractError
        """
        library_name = library.__name__
        Output.print_information("Extracting " + library_name + " archive " \
            + display_path + " ...")

        try:
            with library.open(archive_path, "rb") as archive_file:
                decompressed_data = archive_file.read()

                # remove the extension from filename
                filename = basename(archive_path)
                if len(filename.split(".")) > 1:
                    filename = ".".join(filename.split(".")[:-1])

                with open(abspath(join(output_directory, filename)), "wb") as decomp_file:
                    decomp_file.write(decompressed_data)
        except Exception as expn:
            raise ExtractError("Failed to extract " + library_name + " archive " \
                + display_path + "\n" + str(expn))
Ejemplo n.º 3
0
    def skip_package(self, package_name, package_root):
        """Check to see if we should skip listing this package if the crypto file already exists

        Args:
            package_name: (string) package name
            package_root: (string) the directory where package is located. If package is not
                a local one, this is None

        Returns:
            (bool) if we should skip listing this package
        """
        if not self.skip_existing:
            return False

        output_directory = self.output_directory
        if package_root and self.output_in_package_directory:
            output_directory = package_root

        crypto_file_path = join(output_directory, package_name + ".crypto")
        crypto_exists = isfile(crypto_file_path)

        if crypto_exists:
            skip_message = "Found a crypto file for package " \
                + package_name + " at " + crypto_file_path + ". Will skip scanning this package."
            Output.print_information(skip_message)
            Logger.log(skip_message)
        return crypto_exists
Ejemplo n.º 4
0
    def cleanup_tmp_folder(self):
        """Clean up temporary folder

        Args:
            None

        Returns:
            None
        """
        lose = set()
        # do not modify tmp_directories during the loop
        for tmp_dir in self.tmp_directories:
            if exists(tmp_dir):
                try:
                    shutil.rmtree(tmp_dir)
                except:
                    # directories that cannot be searched cause problems
                    try:
                        FileLister.set_tree_perms(tmp_dir)
                        shutil.rmtree(tmp_dir)
                    except Exception as e:
                        Output.print_warning(
                            "Temp directory %s was not removed (%s)" %
                            (tmp_dir, str(e)))
                        continue
            lose.add(tmp_dir)

        FileLister.all_temp_dirs -= lose
        self.tmp_directories -= lose
        Output.print_information(
            "Temp dir count is %s %s" %
            (len(self.tmp_directories), len(FileLister.all_temp_dirs)))
Ejemplo n.º 5
0
    def list_file(self, file_path, tmp_root_path="", current_path=""):
        """List a single file as package

        Args:
            file_path: (string) file path
            tmp_root_path: (string) if file is in a tmp directory, this is the address of that
                directory, otherwise null.
            current_path: (string) current address within the temporary directory. If we are not in
                a tmp directory, this is also null. This is used to compute the display path.

        Returns:
            (list) a list containing one file-list for this file.
        """
        archive_type = FileLister.archive_type(file_path)

        package_name = basename(file_path)
        package_root = abspath(dirname(file_path))

        if tmp_root_path:
            package_root = None

        if self.skip_package(package_name, package_root):
            return []

        # if this is itself a cyrpto file, don't list it as a package
        if file_path.split(".")[-1] == "crypto":
            Output.print_information("\nThe file " + file_path + " has a .crypto extention. " \
                + "This is reserved for the output of this program. Will not list this file " \
                + "as a package.")
            return []

        if tmp_root_path:
            display_path = join(current_path, relpath(file_path, tmp_root_path))
        else:
            display_path = abspath(file_path)

        if archive_type:
            tmp_dir = self.create_tmp_directory(package_name)
            FileLister.extract_archive(archive_type, file_path, display_path, tmp_dir)

            return self.list_directory(tmp_dir, package_name, tmp_root_path=tmp_dir, \
                current_path=display_path, _package_root=package_root)

        else:
            display_path = file_path
            if tmp_root_path:
                display_path = join(current_path, relpath(file_path, tmp_root_path))

            return [{
                "package_name": package_name,
                "package_root": package_root,
                "file_list": [{"display_path": display_path, "physical_path": file_path}]
            }]
Ejemplo n.º 6
0
    def write_crypto_file(self, json_data, output_directory, package_name):
        """Writes the crypto data to a file at the output_directory
            Args:
                json_data: (dict)
                output_directory: (string)
                package_name: (string)

            Returns:
                None

            Raises:
                FileWriteException
        """
        output_file = os.path.join(output_directory, package_name)

        if self.output_existing == "rename":
            duplicate_number = 1
            duplicate_file = output_file
            while os.path.exists(duplicate_file + ".crypto"):
                duplicate_file = output_file + "." + str(duplicate_number)
                duplicate_number += 1
            output_file = duplicate_file + ".crypto"
        else:
            output_file += ".crypto"

        Output.print_information("\nWriting output in " + output_file +
                                 " ...\n")

        output_file += ".partial"

        try:
            with open(output_file, 'w') as file_object:

                if self.pretty:
                    JSON_string = json.dumps(json_data,
                                             sort_keys=True,
                                             indent=2)
                else:
                    JSON_string = json.dumps(json_data)

                file_object.write(JSON_string)

        except (OSError, IOError) as e:
            raise FileWriteException("Failed to write result in the crypto file " + output_file \
                + "\n" + str(e))

        # rename the file back from .crypto.partial to .crypto at the very last step to ensure
        # writing completely succeeded when a .crypto file exists
        crypto_file_path = output_file[:-8]
        if os.path.exists(crypto_file_path):
            os.remove(crypto_file_path)
        os.rename(output_file, crypto_file_path)
Ejemplo n.º 7
0
    def extract_tar(tar_file_path, display_path, output_directory):
        """Extract a tar archive

        Args:
            tar_file_path: (string) physical path of file on the hardware
            display_path: (string) file path that should be displayed to the user
            output_directory: (string)

        Returns:
            None

        Raises:
            ExtractError
        """
        Output.print_information("Extracting tar archive " + display_path + " ...")
        try:
            with tarfile.open(tar_file_path) as tar_file:
                tar_file.extractall(output_directory)
        except Exception as expn:
            raise ExtractError("Failed to extract tar archive " + display_path + "\n" + str(expn))
Ejemplo n.º 8
0
    def extract_zip(zip_file_path, display_path, output_directory):
        """Extract a zip file

        Args:
            zip_file_path: (string) physical path of file on the hardware
            display_path: (string) file path that should be displayed to the user
            output_directory: (string)

        Returns:
            None

        Raises:
            ExtractError
        """
        Output.print_information("Extracting zip archive " + display_path + " ...")
        try:
            with zipfile.ZipFile(zip_file_path) as zip_file:
                zip_file.extractall(output_directory)
        except Exception as expn:
            raise ExtractError("Failed to extract zip archive " + display_path + "\n" + str(expn))
Ejemplo n.º 9
0
    def extract_rpm_archive(archive_path, display_path, output_directory):
        """Extract RPM archive

        Args:
            archive_path: (string) physical path of file on the hardware
            display_path: (string) file path that should be displayed to the user
            output_directory: (string)

        Returns:
            None

        Raises:
            ExtractError
        """
        Output.print_information("Extracting RPM archive " \
            + display_path + " ...")
        try:
            extract_rpm(archive_path, output_directory)
        except Exception as expn:
            raise ExtractError("Failed to extract RPM archive " + display_path \
                + "\n\n" + str(expn))
Ejemplo n.º 10
0
    def __init__(self, options, skip_output=False):
        """
        Args:
            options: (dict)
            skip_output: (bool) whether we should skip writing or printing out the output (used only
                for unit testing)

        Returns:
            None

        Raises:
            InvalidOptionsException
        """
        try:
            for option in ["output", "quick", "output_in_package_directory", "output_existing", \
                "pretty", "log", "source_files_only"]:
                setattr(self, option, options[option])
            self.output_directory = self.output
            Method.ignore_evidence_types = options["ignore_evidence_types"]
            Output.verbose = options["verbose"]
            Output.suppress_warnings = options["suppress_warnings"]
            stop_after = options["stop_after"]
            packages = options["packages"]
            methods = options["methods"]
        except KeyError as expn:
            raise InvalidOptionsException("Missing required option: \n" +
                                          str(expn))

        self.packages = packages
        self.quick_scan_result = {}
        self.full_scan_result = {}
        self.skip_output = skip_output
        self.current_package = None
        self.stop_after = None

        if stop_after:
            try:
                self.stop_after = int(stop_after)
            except:
                raise InvalidOptionsException("Invalid stop_after value: '" +
                                              stop_after + "'.")
            if self.stop_after < 1:
                raise InvalidOptionsException(
                    "stop_after should be a positive integer.")

        if not os.path.isdir(self.output_directory):
            raise InvalidOptionsException("The specified output directory doesn't exist: " \
                + self.output_directory)

        if not packages:
            Output.print_information("\nNo package specified. Nothing to scan.\n", \
                ignore_verbose=True)
            return

        if self.output_existing not in ["rename", "overwrite", "skip"]:
            raise InvalidOptionsException("output_existing had invalid value '" \
                + self.output_existing + "'. Its value must be one of three choices: " \
                + "'rename', 'overwrite', and 'skip'.")

        self.file_lister = FileLister(packages, (self.output_existing == "skip"), \
            self.output_directory, self.output_in_package_directory)

        method_classes = {
            mc.method_id: mc
            for mc in MethodFactory.method_classes
        }

        method_instances = {}

        for method in method_classes:

            # instantiate method
            method_instances[method] = method_classes[method]()

            # read options
            method_options = None
            if hasattr(method_instances[method], "options"):
                method_options = method_instances[method].options

            if method == "keyword":
                self.method_keyword_active = (method in methods)
                self.method_keyword_ignore_case = method_options["ignore_case"]
                self.method_keyword_kwlist_version = method_options[
                    "keyword_list_version"]

            elif method == "api":
                self.method_api_active = (method in methods)
                self.method_api_kwlist_version = method_options[
                    "keyword_list_version"]

        # active methods are the ones we use to scan the code

        self.active_methods = {}
        for method in methods:
            if method not in method_classes:
                raise InvalidOptionsException("Invalid method " + method)

            self.active_methods[method] = method_instances[method]
Ejemplo n.º 11
0
    def scan(self):
        """Main function to initiate the scanning job

        Args:
            None

        Returns:
            None
        """
        if not self.packages:
            return

        Logger.log("Crypto Detector " + CryptoDetector.VERSION +
                   " (c) Wind River Systems")
        Logger.log(("{} "*6).format(platform.system(), platform.node(), platform.release(), \
            platform.version(), platform.machine(), platform.processor()))
        Logger.log("Python " + str(platform.python_version()))
        Logger.log("")
        Logger.log("")
        Logger.log(
            "Starting a new crypto scanning job with the following options:")

        Logger.log("source_files_only: " + str(self.source_files_only))
        Logger.log("stop_after: " + str(self.stop_after))
        Logger.log("output_existing: " + str(self.output_existing))
        Logger.log("ignore_evidence_types: " +
                   str(Method.ignore_evidence_types))

        if self.method_keyword_active:
            Logger.log("keyword method is active keyword list version " \
                + str(self.method_keyword_kwlist_version) + " and ignore_case set to " \
                + str(self.method_keyword_ignore_case))

        if self.method_api_active:
            Logger.log("API method is active with api list version " \
                + str(self.method_api_kwlist_version))

        package_count = 0
        total_execution_time = 0
        total_text_bytes = 0
        total_binary_bytes = 0
        total_file_count = 0
        total_lines_of_text = 0

        for package_path in self.packages:
            package_filelist = self.file_lister.get_package_filelist(
                package_path)

            for package in package_filelist:
                package_name = package["package_name"]
                package_root = package["package_root"]
                file_list = package["file_list"]
                sha1_list = []
                package_count += 1
                match_count = 0
                checksums = {}
                crypto_output = CryptoOutput()

                self.current_package = package_name
                self.package_text_bytes = 0
                self.package_binary_bytes = 0
                self.package_lines_of_text = 0

                crypto_output.set_package_name(package_name)

                Output.print_information("Scanning package " + package_name +
                                         "\n")

                start_time = time.time()
                stats = {}
                self.package_text_bytes = 0
                self.package_binary_bytes = 0
                self.package_lines_of_text = 0

                if self.quick and package_name not in self.quick_scan_result:
                    self.quick_scan_result[package_name] = False

                for file_path in file_list:
                    content, language = self.read_file(
                        file_path["physical_path"])

                    if content is None:
                        raise FailedFileRead("Failed to open the file '" + file_path["display_path"] \
                            + "' to read its contents. Please run the scan with --log and open the log" \
                            + " file for details of this error.")

                    if isinstance(content, str):
                        encoded_content = codecs.encode(content, "utf-8")
                    else:
                        encoded_content = content

                    hexdigest = hashlib.sha1(encoded_content).hexdigest()
                    sha1_list.append(hexdigest)

                    found_matches = False

                    for method_id in self.active_methods:
                        method = self.active_methods[method_id]

                        if not method.supports_scanning_file(language):
                            continue

                        if self.source_files_only and not language.is_source_code:
                            continue

                        Output.print_information("[" + method.method_id \
                            + "] Scanning file " + file_path["display_path"])

                        if self.quick:
                            if method.quick_search(content, language):
                                found_matches = True
                                break
                        else:
                            result = method.search(content, language)

                            if not result:
                                continue
                            else:
                                found_matches = True

                            if file_path["display_path"] not in checksums:
                                checksums[
                                    file_path["display_path"]] = hexdigest

                            for match in result:
                                match["detection_method"] = method_id
                                match = self.validate_match_fields(
                                    method_id, match)
                                crypto_output.add_hit(
                                    file_path=file_path["display_path"],
                                    file_sha1=checksums[
                                        file_path["display_path"]],
                                    file_language=language,
                                    hit=match)
                                match_count += 1

                    if self.quick:
                        if found_matches:
                            self.quick_scan_result[package_name] = True
                            break
                    else:
                        self.full_scan_result[
                            package_name] = crypto_output.get_crypto_data()

                    if self.stop_after and found_matches:
                        if self.stop_after == 1:
                            break
                        else:
                            self.stop_after -= 1

                crypto_output.set_verif_code(sha1_list)

                stats["execution_time"] = time.time() - start_time
                stats["file_count"] = len(file_list)
                stats["package_text_bytes"] = self.package_text_bytes
                stats["package_binary_bytes"] = self.package_binary_bytes
                stats["package_lines_of_text"] = self.package_lines_of_text

                if package_root != None and self.output_in_package_directory:
                    output_directory = package_root
                else:
                    output_directory = self.output_directory

                # write the output to a file

                if not self.skip_output and not self.quick:
                    self.write_crypto_file(crypto_output.get_crypto_data(),
                                           output_directory, package_name)

                number_of_matches = "Did not find any matches"
                if match_count == 1:
                    number_of_matches = "Found only one match"
                elif match_count > 1:
                    number_of_matches = "Found " + str(
                        match_count) + " matches"

                Logger.log("")
                Logger.log("Finished scanning package " + package_name + " in " \
                    + str(round(stats["execution_time"], 2)) + " seconds.")
                Logger.log("There were " + str(stats["file_count"]) + " files consisting of " \
                    + str(stats["package_lines_of_text"]) + " lines of text in " \
                    + CryptoDetector.human_readable_filesize(stats["package_text_bytes"]) \
                    + " of text data and " \
                    + CryptoDetector.human_readable_filesize(stats["package_binary_bytes"]) \
                    + " of binary data.")
                Logger.log(number_of_matches + " in " + package_name)

                total_execution_time += stats["execution_time"]
                total_file_count += stats["file_count"]
                total_text_bytes += stats["package_text_bytes"]
                total_binary_bytes += stats["package_binary_bytes"]
                total_lines_of_text += stats["package_lines_of_text"]

                Output.print_information("\nCleaning up temporary files ...")
                self.file_lister.cleanup_tmp_folder()

        # write quick scan output to stdout and some output file

        if self.quick and not self.skip_output:
            output_message = ""
            if not any([
                    self.quick_scan_result[package]
                    for package in self.quick_scan_result
            ]):
                output_message += "\n\nDid not find any matches in any of the packages.\n\n"
            else:
                output_message += "\n\nFound matches in the following packages:\n\n"
                for package in self.quick_scan_result:
                    if self.quick_scan_result[package]:
                        output_message += "    " + package + "\n"
                output_message += "\n\n"

                print(output_message)

                # write quick search output to a file
                quick_output_filename = os.path.join(self.output_directory, \
                   "quick-scan-result.txt")

                with open(quick_output_filename, "w") as output_file:
                    output_file.write(output_message)

        # print stats

        Output.print_information("\nTook " \
            + str(round(total_execution_time, 2)) + " seconds to scan " \
            + str(total_file_count) + " files in " \
            + str(package_count) + " package" \
            + ("s" if package_count > 1 else "") + ", processing " \
            + str(total_lines_of_text) \
            + " lines of text in " \
            + CryptoDetector.human_readable_filesize(total_text_bytes) \
            + " of text data, in addition to scanning " \
            + CryptoDetector.human_readable_filesize(total_binary_bytes) \
            + " of binary data.")

        if self.log:
            Logger.write_log_files(self.output_directory)

        if self.quick:
            return self.quick_scan_result
        else:
            return self.full_scan_result
Ejemplo n.º 12
0
    def read_config_file(self, path):
        """Read configuration file and update self.options

        Args:
            path: (string) path of the config file

        Returns:
            None

        Raises:
            InvalidConfigException
        """
        config = configparser.ConfigParser(allow_no_value=True,
                                           delimiters=('='))
        config.optionxform = str

        if path:
            path_conf = os.path.abspath(path)
            Output.print_information("Reading configuration file " + path_conf,
                                     True)
            if not os.path.isfile(path_conf):
                raise InvalidConfigException(
                    "The specified config file doesn't exist.")
            config.read(path_conf)

        else:
            home_directory = os.path.expanduser("~")
            cwd_conf = os.path.abspath(
                os.path.join(os.getcwd(), "cryptodetector.conf"))
            home_conf = os.path.abspath(
                os.path.join(home_directory, "cryptodetector.conf"))

            # Does config file exist in current working directory?
            if os.path.isfile(cwd_conf):
                Output.print_information(
                    "Reading configuration file " + cwd_conf, True)
                self.options["config_file"] = cwd_conf
                config.read(cwd_conf)

            # Does config file exist in home folder ?
            elif os.path.isfile(home_conf):
                Output.print_information(
                    "Reading configuration file " + home_conf, True)
                self.options["config_file"] = home_conf
                config.read(home_conf)

            else:
                Output.print_information("Didn't find any configuration file. Expect all " \
                    + "parameters from the command line.", True)
                return

        for section in ["settings", "methods"]:
            if section not in config.sections():
                raise InvalidConfigException("Invalid configuration file. [" \
                    + section + "] section " + "is required.")

        for option in self.options:

            if isinstance(self.options[option], list):
                option_value = Options.read_array_option(config, option)

            elif isinstance(self.options[option], bool):
                option_value = Options.read_boolean_option(
                    config, "settings", option)

            else:
                option_value = Options.read_string_option(
                    config, "settings", option)

            if option_value != None:
                self.options[option] = option_value

        for option in ["methods", "packages"]:
            if config.has_section(option):
                self.options[option] = [
                    item for item, _ in config.items(option)
                ]

        if self.options["output_existing"] not in [
                "rename", "overwrite", "skip"
        ]:
            raise InvalidConfigException("Invalid config file. In section [settings] " \
                + "output_existing had invalid value '" + self.options["output_existing"] \
                + "'. Its value must be one of three choices: " \
                + "'rename', 'overwrite', and 'skip'.")

        if not self.options["methods"]:
            raise InvalidConfigException("Invalid configuration file. There should be one " \
                + "or more items under the [methods] section.")

        methods = self.options["methods"]
        Options.validate_methods(methods)

        for method in methods:
            method_class = Options.available_methods()[method]

            if not hasattr(method_class, "options"):
                continue

            for option in method_class.options:
                if isinstance(method_class.options[option], list):
                    option_value = Options.read_array_option(config, "method:" \
                        + method + ":" + option)

                elif isinstance(method_class.options[option], bool):
                    option_value = Options.read_boolean_option(
                        config, "method:" + method, option)

                else:
                    option_value = Options.read_string_option(
                        config, "method:" + method, option)

                if option_value != None:
                    method_class.options[option] = option_value
                    self.options[method + "_" + option] = option_value