def read_binary_file(self, path): """Read a binary file Args: path: (string) file path Returns: (bytes) raw bytes sequence in the binary file None if it failed to read the file """ content = None try: with open(path, 'rb') as content_file: content = content_file.read() except (OSError, IOError) as expn: Output.print_error("Critical error while reading file " + path + "\n" + str(expn)) return content except Exception as expn: Output.print_error("Couldn't open binary file " + path + "\n" + str(expn)) return content return content
def read_text_file(self, path): """Try multiple different text encodings to read a text file Args: path: (string) file path Returns: (string, string) the content of the file and its encoding None if it failed to read the file """ text_encodings = [ "utf-8", "latin-1", "iso-8859-1", "utf-16", "utf-32", "cp500" ] content = None for encoding in text_encodings: try: with open(path, 'r', encoding=encoding) as content_file: content = content_file.read() break except ValueError as expn: continue except (OSError, IOError) as expn: Output.print_error("Critical error while reading file " + path + "\n" + str(expn)) return content except Exception as expn: Output.print_error("Exception while opening file " + path + "\n" + str(expn)) return content return content
def read_file(self, path): """Reads a file at the given path to return its content and language Args: path: (string) file path Returns: tuple (file content, language) file content is either a str or bytes array depending on whether or not it is binary. """ content = None filename, file_extension = os.path.splitext(path) file_extension = file_extension.split(".")[-1].lower() language = Language.guess_language(file_extension) if language == Language.Unknown: # if we couldn't guess the type of the file from its extension, try to open it # as plain text, and if that failed, treat it as binary, but if that succeeded, # check the characters in the file to ensure it is a text file. content = self.read_text_file(path) if content is None: content = self.read_binary_file(path) language = Language.Binary else: if self.has_nontext_characters(content): content = self.read_binary_file(path) language = Language.Binary else: language = Language.PlainText else: if language.is_text: content = self.read_text_file(path) if content is None: Output.print_error("Couldn't decode the text file " + \ path + "using any of Unicode, Latin, ISO-8859, or EBCDIC encodings." + \ " Will treat as binary.") content = self.read_binary_file(path) language = Language.Binary else: content = self.read_binary_file(path) language = Language.Binary if content is not None: if language == Language.Binary: self.package_binary_bytes += len(content) else: self.package_text_bytes += len(content) self.package_lines_of_text += len(content.split("\n")) return content, language
def get_directory_filelist(self, path, tmp_root_path, current_path): """Recursively list all the files in a directory, extracting all the archives inside. Args: path: (string) path of the directory tmp_root_path: (string) if the directory is inside of a tmp directory, this is the address of that directory, otherwise null. current_path: (string) current address within the temporary directory. If we are not in a tmp directory, this is also null. This is used to compute the display path. Returns: (list) a list of files, where each file is a dict with two keys "display_path" and "physical_path". "display_path" is the path that's shown to the user and "physical_path" is where file can be accessed. """ file_list = [] for dirpath, _, filenames in walk(path, followlinks=False): for filename in filenames: full_path = abspath(join(dirpath, filename)) if islink(full_path): Output.print_warning("Skipping symbolic link: " + full_path) continue archive_type = FileLister.archive_type(full_path) if archive_type: tmp_dir = self.create_tmp_directory(full_path) if tmp_root_path: display_path = join(current_path, relpath(full_path, tmp_root_path)) else: display_path = full_path try: FileLister.extract_archive(archive_type, full_path, display_path, tmp_dir) except ExtractError as expn: Output.print_error(str(expn)) continue file_list.extend(self.get_directory_filelist(tmp_dir, \ tmp_root_path=tmp_dir, current_path=display_path)) else: if tmp_root_path: file_list.append({ "display_path": join(current_path, relpath(full_path, tmp_root_path)), "physical_path": full_path }) else: file_list.append({"display_path": full_path, "physical_path": full_path}) return file_list
import traceback from cryptodetector import CryptoDetector, Output, Options, Logger, FileLister from cryptodetector.exceptions import CryptoDetectorError if __name__ == '__main__': try: log_output_directory = None options = Options(CryptoDetector.VERSION).read_all_options() if "log" in options: if options["log"]: log_output_directory = options["output"] CryptoDetector(options).scan() print("done") except CryptoDetectorError as expn: Output.print_error(str(expn)) if log_output_directory: Logger.write_log_files(log_output_directory) FileLister.cleanup_all_tmp_files() except KeyboardInterrupt: FileLister.cleanup_all_tmp_files() raise except Exception as expn: Output.print_error("Unhandled exception.\n\n" + str(traceback.format_exc())) if log_output_directory: Logger.write_log_files(log_output_directory) FileLister.cleanup_all_tmp_files()