Exemple #1
0
    def read_binary_file(self, path):
        """Read a binary file

        Args:
            path: (string) file path

        Returns:
            (bytes) raw bytes sequence in the binary file
            None if it failed to read the file
        """
        content = None

        try:
            with open(path, 'rb') as content_file:
                content = content_file.read()

        except (OSError, IOError) as expn:
            Output.print_error("Critical error while reading file " + path +
                               "\n" + str(expn))
            return content

        except Exception as expn:
            Output.print_error("Couldn't open binary file " + path + "\n" +
                               str(expn))
            return content

        return content
Exemple #2
0
    def read_text_file(self, path):
        """Try multiple different text encodings to read a text file

        Args:
            path: (string) file path

        Returns:
            (string, string) the content of the file and its encoding
            None if it failed to read the file
        """
        text_encodings = [
            "utf-8", "latin-1", "iso-8859-1", "utf-16", "utf-32", "cp500"
        ]
        content = None

        for encoding in text_encodings:
            try:
                with open(path, 'r', encoding=encoding) as content_file:
                    content = content_file.read()
                    break

            except ValueError as expn:
                continue

            except (OSError, IOError) as expn:
                Output.print_error("Critical error while reading file " +
                                   path + "\n" + str(expn))
                return content

            except Exception as expn:
                Output.print_error("Exception while opening file " + path +
                                   "\n" + str(expn))
                return content

        return content
Exemple #3
0
    def read_file(self, path):
        """Reads a file at the given path to return its content and language

        Args:
            path: (string) file path

        Returns:
            tuple (file content, language) file content is either a str or bytes array depending
                on whether or not it is binary.
        """
        content = None

        filename, file_extension = os.path.splitext(path)
        file_extension = file_extension.split(".")[-1].lower()

        language = Language.guess_language(file_extension)

        if language == Language.Unknown:

            # if we couldn't guess the type of the file from its extension, try to open it
            # as plain text, and if that failed, treat it as binary, but if that succeeded,
            # check the characters in the file to ensure it is a text file.

            content = self.read_text_file(path)

            if content is None:
                content = self.read_binary_file(path)
                language = Language.Binary

            else:
                if self.has_nontext_characters(content):
                    content = self.read_binary_file(path)
                    language = Language.Binary
                else:
                    language = Language.PlainText

        else:
            if language.is_text:
                content = self.read_text_file(path)

                if content is None:
                    Output.print_error("Couldn't decode the text file " + \
                        path + "using any of Unicode, Latin, ISO-8859, or EBCDIC encodings." + \
                        " Will treat as binary.")
                    content = self.read_binary_file(path)
                    language = Language.Binary
            else:
                content = self.read_binary_file(path)
                language = Language.Binary

        if content is not None:
            if language == Language.Binary:
                self.package_binary_bytes += len(content)
            else:
                self.package_text_bytes += len(content)
                self.package_lines_of_text += len(content.split("\n"))

        return content, language
    def get_directory_filelist(self, path, tmp_root_path, current_path):
        """Recursively list all the files in a directory, extracting all the archives inside.

        Args:
            path: (string) path of the directory
            tmp_root_path: (string) if the directory is inside of a tmp directory, this is the
                address of that directory, otherwise null.
            current_path: (string) current address within the temporary directory. If we are not in
                a tmp directory, this is also null. This is used to compute the display path.

        Returns:
            (list) a list of files, where each file is a dict with two keys "display_path" and
            "physical_path". "display_path" is the path that's shown to the user and "physical_path"
            is where file can be accessed.
        """
        file_list = []

        for dirpath, _, filenames in walk(path, followlinks=False):
            for filename in filenames:
                full_path = abspath(join(dirpath, filename))
                if islink(full_path):
                    Output.print_warning("Skipping symbolic link: " + full_path)
                    continue

                archive_type = FileLister.archive_type(full_path)

                if archive_type:
                    tmp_dir = self.create_tmp_directory(full_path)

                    if tmp_root_path:
                        display_path = join(current_path, relpath(full_path, tmp_root_path))
                    else:
                        display_path = full_path

                    try:
                        FileLister.extract_archive(archive_type, full_path, display_path, tmp_dir)
                    except ExtractError as expn:
                        Output.print_error(str(expn))
                        continue

                    file_list.extend(self.get_directory_filelist(tmp_dir, \
                        tmp_root_path=tmp_dir, current_path=display_path))
                else:
                    if tmp_root_path:
                        file_list.append({
                            "display_path": join(current_path, relpath(full_path, tmp_root_path)),
                            "physical_path": full_path
                        })
                    else:
                        file_list.append({"display_path": full_path, "physical_path": full_path})

        return file_list
Exemple #5
0
import traceback
from cryptodetector import CryptoDetector, Output, Options, Logger, FileLister
from cryptodetector.exceptions import CryptoDetectorError

if __name__ == '__main__':

    try:
        log_output_directory = None
        options = Options(CryptoDetector.VERSION).read_all_options()
        if "log" in options:
            if options["log"]:
                log_output_directory = options["output"]
        CryptoDetector(options).scan()

        print("done")

    except CryptoDetectorError as expn:
        Output.print_error(str(expn))
        if log_output_directory: Logger.write_log_files(log_output_directory)
        FileLister.cleanup_all_tmp_files()

    except KeyboardInterrupt:
        FileLister.cleanup_all_tmp_files()
        raise

    except Exception as expn:
        Output.print_error("Unhandled exception.\n\n" +
                           str(traceback.format_exc()))
        if log_output_directory: Logger.write_log_files(log_output_directory)
        FileLister.cleanup_all_tmp_files()