Ejemplo n.º 1
0
    def test_calc_hash_for_three_small_files(self):
        calc = hash_calc.FileHashCalc()
        calc.suppress_console_reporting_output = True

        for i in range(1, 4):
            file_name = f'{self.data_path}/file{i}.txt'
            calc.file_name = file_name

            with open(f'{self.data_path}/file{i}.txt.sha1',
                      mode='r') as sha1_expected_file:
                sha1_expected = sha1_expected_file.read()
            calc.hash_str = "sha1"
            calc_res = calc.run()
            self.assertEqual(calc_res, hash_calc.FileHashCalc.ReturnCode.OK)
            sha1_actual = calc.result
            self.assertEqual(sha1_expected, sha1_actual)

            with open(f'{self.data_path}/file{i}.txt.md5',
                      mode='r') as md5_expected_file:
                md5_expected = md5_expected_file.read()
            calc.hash_str = "md5"
            calc_res = calc.run()
            self.assertEqual(calc_res, hash_calc.FileHashCalc.ReturnCode.OK)
            md5_actual = calc.result
            self.assertEqual(md5_expected, md5_actual)
Ejemplo n.º 2
0
    def test_calc_hash_for_one_small_file(self):
        # Ref: https://stackoverflow.com/questions/5137497/find-current-directory-and-files-directory
        #cwd = os.getcwd()
        #print('current dir = ' + cwd)

        # Ref: https://matthew-brett.github.io/teaching/string_formatting.html#option-2-f-strings-in-python-3-6
        file_name = f'{self.data_path}/file1.txt'

        calc = hash_calc.FileHashCalc()
        calc.file_name = file_name
        calc.suppress_console_reporting_output = True

        with open(f'{self.data_path}/file1.txt.sha1',
                  mode='r') as sha1_expected_file:
            sha1_expected = sha1_expected_file.read()

        calc.hash_str = "sha1"
        calc_res = calc.run()
        self.assertEqual(calc_res, hash_calc.FileHashCalc.ReturnCode.OK)
        sha1_actual = calc.result
        self.assertEqual(sha1_expected, sha1_actual)

        with open(f'{self.data_path}/file1.txt.md5',
                  mode='r') as md5_expected_file:
            md5_expected = md5_expected_file.read()

        calc.hash_str = "md5"
        calc_res = calc.run()
        self.assertEqual(calc_res, hash_calc.FileHashCalc.ReturnCode.OK)
        md5_actual = calc.result
        self.assertEqual(md5_expected, md5_actual)
Ejemplo n.º 3
0
    def _configure_parser(self):
        """    
        Ref: "Argparse Tutorial" https://docs.python.org/3/howto/argparse.html
        Ref: "15.4.3. The add_argument() method" https://docs.python.org/2/library/argparse.html#the-add-argument-method
        """

        if self._input_args is None:
            raise Exception("Arguments are not specified")

        # Ref: https://developer.rhino3d.com/guides/rhinopython/python-statements/
        description = "This is a command line tool to calculate hashes for one or many files at once with many convenient features: support of show progress,\n" \
            "folders and file masks for multiple files, skip calculation of handled files etc...\n\n"

        description += "Application exit codes:\n"
        for ec in ExitCode:
            description += f"{ec:2} - {ec.name}"
            code_desc = exit_code_descriptions.get(ec)
            if code_desc is not None:
                description += f": {code_desc}"
            description += "\n"

        calc = hash_calc.FileHashCalc()

        autosave_timeout_default = 300

        # Ref: https://www.programcreek.com/python/example/6706/argparse.RawDescriptionHelpFormatter
        self._parser = argparse.ArgumentParser(
            description=description,
            formatter_class=argparse.RawDescriptionHelpFormatter)
        self._parser.add_argument(
            '--input-file',
            '-i',
            action="append",
            help="Specify input files. Key can be specified multiple times")
        self._parser.add_argument(
            '--input-folder',
            action="append",
            help=
            "Specify input folders. All files in folder are handled recursively. Key can be specified multiple times"
        )
        self._parser.add_argument(
            '--input-folder-file-mask-include',
            help=
            "Specify file mask to include for input folder. All files in the folder considered if not specified. Separate multiple masks with semicolon (;)"
        )
        self._parser.add_argument(
            '--input-folder-file-mask-exclude',
            help=
            "Specify file mask to exclude for input folder. It is applied after --input-folder-file-mask-include. Separate multiple masks with semicolon (;)"
        )
        self._parser.add_argument(
            '--hash-file-name-output-postfix',
            action='append',
            help=
            "Specify postfix, which will be appended to the end of output file names. This is to specify for different contextes, "
            "e.g. if file name ends with \".md5\", then it ends with \"md5.<value>\""
        )
        self._parser.add_argument(
            '--hash-algo',
            help=
            f"Specify hash algo (default: {hash_calc.FileHashCalc.hash_algo_default_str})",
            default=hash_calc.FileHashCalc.hash_algo_default_str,
            choices=hash_calc.FileHashCalc.hash_algos)
        self._parser.add_argument(
            '--suppress-console-reporting-output',
            '-s',
            help="Suppress console output with progress reporting",
            action="store_true")
        self._parser.add_argument(
            '--pause-after-file',
            '-p',
            help=
            "Specify pause after every file handled, in seconds. Note, if file is skipped, then no pause applied",
            type=int)
        self._parser.add_argument(
            '--retry-count-on-data-read-error',
            help=
            f"Specify count of retries on data read error (default: {calc.retry_count_on_data_read_error})",
            default=calc.retry_count_on_data_read_error,
            type=int)
        self._parser.add_argument(
            '--retry-pause-on-data-read-error',
            help=
            f"Specify pause before retrying on data read error, in seconds (default: {calc.retry_pause_on_data_read_error})",
            default=calc.retry_pause_on_data_read_error,
            type=int)
        self._parser.add_argument(
            '--force-calc-hash',
            help=
            "If specified than hash calculated always. If not, then hash is not calculated if file with hash already exist",
            action="store_true")
        self._parser.add_argument(
            '--add-output-file-name-timestamp',
            action="store_true",
            help=
            "Add timestamp to the output file names. Note, that the time on program run taken. So it may differ from the file creation time, "
            "but it is equal for all files in one run")
        self._parser.add_argument(
            '--suppress-output-file-comments',
            help=
            "Don't add comments to output files. E.g. timestamp when hash generated",
            action="store_true")
        self._parser.add_argument(
            '--use-absolute-file-names',
            help=
            "Use absolute file names in output. If argument is not specified, relative file names used",
            action="store_true")
        self._parser.add_argument(
            '--single-hash-file-name-base',
            help=
            "If specified then all hashes are stored in one file specified as a value for this argument. Final file name include postfix",
            action="append")
        self._parser.add_argument(
            '--single-hash-file-name-base-json',
            help=
            "This is the same key as --single-hash-file-name-base. But postfix json is added. Result data stored in JSON",
            action="append")
        self._parser.add_argument(
            '--suppress-hash-file-name-postfix',
            help=
            "Suppress adding postfix in the hash file name for hash algo name",
            action="store_true")
        self._parser.add_argument(
            '--preserve-unused-hash-records',
            action="store_true",
            help=
            "This key works with --single-hash-file-name-base. By default if file with hashes already exists then records for files which not handled are deleted to avoid records for non-existing files. "
            "If this key specified, then such records preserved in hash file")
        self._parser.add_argument(
            '--norm-case-file-names',
            action="store_true",
            help=
            "Use normalized case of file names on output. This is more robust, but file names may differ which may look inconvenient. It is also platform dependent. "
            "Refer for details to https://docs.python.org/3/library/os.path.html#os.path.normcase"
        )
        self._parser.add_argument(
            '--sort-by-hash-value',
            action="store_true",
            help=
            "Specify to store hash records sorted by hash values in case when multiple hashes are stored in one file. By default without this option hash records are sorted by file name"
        )
        self._parser.add_argument(
            '--autosave-timeout',
            default=autosave_timeout_default,
            type=int,
            help=
            f"Save accumulated hashes after interval specified as argument, in seconds (default: {autosave_timeout_default}). "
            "Specify 0 to save hash info after handling every file, this may result in large overhead when many files on input. "
            "Specify -1 to disable autosave, this may result the accumulated hash data missed if execution interrupts unexpectedly. "
            "This is essential when multiple hashes stored in one file.")
        self._parser.add_argument(
            '--user-comment',
            '-u',
            action="append",
            help="Specify comment which will be added to output hash file")
Ejemplo n.º 4
0
    def _handle_input_file(self,
                           hash_storage: hash_storages.HashStorageAbstract,
                           input_file_name):
        """
        Handle single input file input_file_name
        """
        if not isinstance(hash_storage, hash_storages.HashStorageAbstract):
            raise TypeError(
                f"HashStorageAbstract expected, {type(hash_storage)} found")

        start_date_time = datetime.now()
        self._info("Handle file start time: " +
                   util.get_datetime_str(start_date_time) + " (" +
                   input_file_name + ")")

        # Ref: https://stackoverflow.com/questions/82831/how-do-i-check-whether-a-file-exists-without-exceptions
        if not self._cmd_line_args.force_calc_hash and hash_storage.has_hash(
                input_file_name):
            self._info("Hash for file '" + input_file_name +
                       "' exists ... calculation of hash skipped.")
            return ExitCode.OK_SKIPPED_ALREADY_CALCULATED
        self._info("Calculate hash for file '" + input_file_name + "'...")

        calc = hash_calc.FileHashCalc()
        calc.file_name = input_file_name
        calc.hash_str = self._cmd_line_args.hash_algo
        calc.suppress_console_reporting_output = self._cmd_line_args.suppress_console_reporting_output
        calc.retry_count_on_data_read_error = self._cmd_line_args.retry_count_on_data_read_error
        calc.retry_pause_on_data_read_error = self._cmd_line_args.retry_pause_on_data_read_error

        calc_res = calc.run()
        if calc_res != hash_calc.FileHashCalc.ReturnCode.OK:
            if calc_res == hash_calc.FileHashCalc.ReturnCode.PROGRAM_INTERRUPTED_BY_USER:
                return ExitCode.PROGRAM_INTERRUPTED_BY_USER
            elif calc_res == hash_calc.FileHashCalc.ReturnCode.DATA_READ_ERROR:
                return ExitCode.DATA_READ_ERROR
            else:
                raise Exception(
                    f"Error on calculation of the hash: {calc_res}")
        hash_value = calc.result

        hash_storage.set_hash(input_file_name, hash_value)

        output_file_name = hash_storage.get_hash_file_name(input_file_name)
        self._info("HASH:", hash_value,
                   "(storage in file '" + output_file_name + "')")

        end_date_time = datetime.now()
        self._info("Handle file end time: " +
                   util.get_datetime_str(end_date_time) + " (" +
                   input_file_name + ")")
        seconds = int((end_date_time - start_date_time).total_seconds())
        # print("Elapsed time: {0}:{1:02d}:{2:02d}".format(int(seconds / 60 / 60), int(seconds / 60) % 60, seconds % 60))

        file_size = os.path.getsize(input_file_name)
        speed = file_size / seconds if seconds > 0 else 0
        self._info(
            f"Elapsed time for file: {util.format_seconds(seconds)} (Average speed: {util.convert_size_to_display(speed)}/sec)"
        )

        if self._cmd_line_args.pause_after_file is not None:
            if not util.pause(self._cmd_line_args.pause_after_file):
                # Return specific error code
                return ExitCode.PROGRAM_INTERRUPTED_BY_USER

        return ExitCode.OK