def test_simple_force_calc_hash(self): data_file_name = f"{self.work_path}/file1.txt" hash_file_name = f"{data_file_name}.sha1" wrong_hash = "<wrong_hash_value>" corrent_hash = "e8b0faa145c4590e3e424403e758f6d4b5347c45" with open(data_file_name, "w") as f: f.write("qwerty1234567890") with open(hash_file_name, "w") as f: f.write(wrong_hash) cmd_line_adapter = cmd_line.CommandLineAdapter() exit_code = cmd_line_adapter.run_cmd_line(f'--input-file {data_file_name} --suppress-console-reporting-output') self.assertEqual(exit_code, 0) with open(hash_file_name, "r") as f: actual_hash = f.read() self.assertEqual(wrong_hash, actual_hash) cmd_line_adapter = cmd_line.CommandLineAdapter() exit_code = cmd_line_adapter.run_cmd_line(f'--input-file {data_file_name} --force-calc-hash --suppress-console-reporting-output --suppress-output-file-comments') self.assertEqual(exit_code, 0) with open(hash_file_name, "r") as f: actual_hash_text = f.read() actual_hash = actual_hash_text[:40] self.assertEqual(corrent_hash, actual_hash)
def test_cli_unused_hash_records(self): for i in range(1, 4): shutil.copyfile(f"{self.data_path}/file{i}.txt", f"{self.work_path}/file{i}.txt") work_hash_storage_file = f"{self.work_path}/hash_storage.sha1" data_hash_storage_file_123 = f"{self.data_path}/hash_storage_123.sha1" data_hash_storage_file_24 = f"{self.data_path}/hash_storage_24.sha1" cmd_line_adapter = cmd_line.CommandLineAdapter() exit_code = cmd_line_adapter.run_cmd_line(f"--input-folder {self.work_path} --single-hash-file-name-base {work_hash_storage_file} --suppress-hash-file-name-postfix " "--suppress-console-reporting-output --suppress-output-file-comments") self.assertEqual(exit_code, cmd_line.ExitCode.OK) self.assertTrue(filecmp.cmp(work_hash_storage_file, data_hash_storage_file_123, shallow=False), f"Incorrect output hash file (file '{work_hash_storage_file}')") cmd_line_adapter = cmd_line.CommandLineAdapter() exit_code = cmd_line_adapter.run_cmd_line(f"--input-file {self.work_path}/file3.txt --input-file {self.work_path}/file2.txt " f"--preserve-unused-hash-records " # Essence of the test f"--single-hash-file-name-base {work_hash_storage_file} --suppress-hash-file-name-postfix " f"--suppress-console-reporting-output --suppress-output-file-comments") self.assertEqual(exit_code, cmd_line.ExitCode.OK) self.assertTrue(filecmp.cmp(work_hash_storage_file, data_hash_storage_file_123, shallow=False), f"Incorrect output hash file (file '{work_hash_storage_file}')") shutil.copyfile(f"{self.data_path}/file4.txt", f"{self.work_path}/file4.txt") cmd_line_adapter = cmd_line.CommandLineAdapter() exit_code = cmd_line_adapter.run_cmd_line(f"--input-file {self.work_path}/file4.txt --input-file {self.work_path}/file2.txt " f"--single-hash-file-name-base {work_hash_storage_file} --suppress-hash-file-name-postfix " f"--suppress-console-reporting-output --suppress-output-file-comments") self.assertEqual(exit_code, cmd_line.ExitCode.OK) self.assertTrue(filecmp.cmp(work_hash_storage_file, data_hash_storage_file_24, shallow=False), f"Incorrect output hash file (file '{work_hash_storage_file}')")
def test_non_existent_file_and_folder_error(self): shutil.copyfile(f'{self.data_path}/file1.txt', f'{self.work_path}/file1.txt') cmd_line_adapter = cmd_line.CommandLineAdapter() exit_code = cmd_line_adapter.run_cmd_line(f'--input-folder {self.work_path}\\fake_folder --suppress-console-reporting-output') self.assertTrue(exit_code == cmd_line.ExitCode.DATA_READ_ERROR, "Report on non-existent folder expected") cmd_line_adapter = cmd_line.CommandLineAdapter() exit_code = cmd_line_adapter.run_cmd_line(f'--input-file {self.work_path}\\file1.txt --input-file {self.work_path}\\fake_file.txt --suppress-console-reporting-output') self.assertTrue(exit_code == cmd_line.ExitCode.DATA_READ_ERROR, "Report on non-existent file expected")
def test_calc_hash_for_one_small_file_sha1(self): # Without and with input folder trailing slash for input_folder_trailing in ["", "\\"]: tests.util_test.clean_work_dir() data_file_name = f'{self.work_path}/file1.txt' shutil.copyfile(f'{self.data_path}/file1.txt', data_file_name) cmd_args = f'--input-folder {self.work_path}{input_folder_trailing} --suppress-output-file-comments --suppress-console-reporting-output' cmd_line_adapter = cmd_line.CommandLineAdapter() exit_code = cmd_line_adapter.run_cmd_line(cmd_args) self.assertEqual(exit_code, cmd_line.ExitCode.OK) with open(f'{self.data_path}/file1.txt.sha1', mode='r') as sha1_expected_file: sha1_expected = sha1_expected_file.read() with open(f'{self.work_path}/file1.txt.sha1', mode='r') as sha1_actual_file: sha1_actual = sha1_actual_file.read() sha1_actual = sha1_actual[:40] self.assertEqual(sha1_expected, sha1_actual, f'Wrong sha1-hash for file "file1.txt". Expected: "{sha1_expected}", actual: "{sha1_actual}"') # Ref: https://docs.python.org/3.7/library/filecmp.html self.assertTrue(filecmp.cmp(f'{self.data_path}/file1.txt', data_file_name, shallow=False), f"Input data file is corrupted! ({data_file_name})")
def test_calc_hash_with_user_comments(self): shutil.copyfile(f'{self.data_path}/file1.txt', f'{self.work_path}/file1.txt') # We check that following string are in output file comments user_comments = {"aaa-bbb-ccc", "Это кириллица", "English phrase"} user_comments_args = "" for uc in user_comments: uc1 = uc if " " in uc1: uc1 = f'"{uc1}"' uc1 = f" --user-comment {uc1}" user_comments_args += uc1 cmd_line_adapter = cmd_line.CommandLineAdapter() exit_code = cmd_line_adapter.run_cmd_line(f'--input-folder {self.work_path} --suppress-console-reporting-output {user_comments_args}') self.assertEqual(exit_code, cmd_line.ExitCode.OK) with open(f'{self.work_path}/file1.txt.sha1') as hash_file: line = hash_file.readline() while line: if line.startswith("#"): for uc in user_comments: if uc in line: user_comments.remove(uc) break line = hash_file.readline() self.assertTrue(len(user_comments) == 0, f"Some user comments are not stored in output hash file: {' ,'.join(user_comments)}")
def test_calc_hash_with_comments_in_output_file(self): shutil.copyfile(f'{self.data_path}/file1.txt', f'{self.work_path}/file1.txt') # Check hash in output file without comments cmd_line_adapter = cmd_line.CommandLineAdapter() exit_code = cmd_line_adapter.run_cmd_line(f'--input-folder {self.work_path} --suppress-console-reporting-output --suppress-output-file-comments --hash-file-name-output-postfix test_hash') self.assertEqual(exit_code, cmd_line.ExitCode.OK) hash_file_name_expected = f'{self.data_path}/file1.txt.sha1' hash_file_name_actual = f'{self.work_path}/file1.txt.sha1.test_hash' # Note, we also added postfix here in file name, so it is also check during testing with open(hash_file_name_expected, mode='r') as hash_file_expected: hash_expected = hash_file_expected.read() with open(hash_file_name_actual, mode='r') as hash_file_actual: hash_actual = hash_file_actual.readline() hash_actual = hash_actual[:40] self.assertEqual(hash_expected, hash_actual, f'Wrong hash for input file "file1.txt". Expected: "{hash_expected}", actual: "{hash_actual}"') os.remove(hash_file_name_actual) # Check hash in output file with comments cmd_line_adapter = cmd_line.CommandLineAdapter() exit_code = cmd_line_adapter.run_cmd_line(f'--input-folder {self.work_path} --suppress-console-reporting-output') self.assertEqual(exit_code, cmd_line.ExitCode.OK) hash_file_name_expected = f'{self.data_path}/file1.txt.sha1' hash_file_name_actual = f'{self.work_path}/file1.txt.sha1' with open(hash_file_name_expected, mode='r') as hash_file_expected: hash_expected = hash_file_expected.read() with open(hash_file_name_actual, mode='r') as hash_file_actual: hash_actual = hash_file_actual.readline() has_comment = False while hash_actual.startswith("#"): hash_actual = hash_file_actual.readline() has_comment = True self.assertTrue(has_comment, "Output file should have comment") hash_actual = hash_actual[:40] self.assertEqual(hash_expected, hash_actual, 'Wrong hash for input file "file1.txt". Expected: "{}", actual: "{}"'.format(hash_expected, hash_actual))
def test_error_report_on_equal_data_and_hash_file_names(self): data_file_name = f'{self.data_path}/file1.txt' work_file_name = f'{self.work_path}/file1.txt' shutil.copyfile(data_file_name, work_file_name) cmd_line_adapter = cmd_line.CommandLineAdapter() exit_code = cmd_line_adapter.run_cmd_line(f'--input-file {work_file_name} --suppress-console-reporting-output --suppress-hash-file-name-postfix') self.assertEqual(exit_code, cmd_line.ExitCode.APP_USAGE_ERROR) # Ref: https://docs.python.org/3.7/library/filecmp.html self.assertTrue(filecmp.cmp(data_file_name, work_file_name, shallow=False), f"Input data file is corrupted! ({work_file_name})")
def test_sort_non_ascii_file_names(self): work_hash_file = f"{self.work_path}/hash_storage.sha1" data_hash_file = f"{self.data_path}/cyrillic_files/hash_storage.sha1" for fn in ["а кириллическое.txt", "В прописное.txt", "в строчное.txt", "имя_без_пробелов.txt", "українська мова.txt", "Л_вначале.txt"]: shutil.copyfile(os.path.join(self.data_path, "cyrillic_files", fn), os.path.join(self.work_path, fn)) cmd_line_adapter = cmd_line.CommandLineAdapter() exit_code = cmd_line_adapter.run_cmd_line(f"--input-folder {self.work_path} --single-hash-file-name-base {work_hash_file} --suppress-hash-file-name-postfix " f"--suppress-console-reporting-output --suppress-output-file-comments") self.assertEqual(exit_code, cmd_line.ExitCode.OK) self.assertTrue(filecmp.cmp(work_hash_file, data_hash_file, shallow=False), f"Incorrect output hash file (file '{work_hash_file}')")
def test_calc_hash_for_one_small_file_md5(self): shutil.copyfile(f'{self.data_path}/file1.txt', f'{self.work_path}/file1.txt') cmd_line_adapter = cmd_line.CommandLineAdapter() exit_code = cmd_line_adapter.run_cmd_line(f'--input-folder {self.work_path} --hash-algo md5 --suppress-console-reporting-output --suppress-output-file-comments') self.assertEqual(exit_code, cmd_line.ExitCode.OK) with open(f'{self.data_path}/file1.txt.md5', mode='r') as md5_expected_file: md5_expected = md5_expected_file.read() with open(f'{self.work_path}/file1.txt.md5', mode='r') as md5_actual_file: md5_actual = md5_actual_file.read() md5_actual = md5_actual[:32] self.assertEqual(md5_expected, md5_actual, f'Wrong md5-hash for file "file1.txt". Expected: "{md5_expected}", actual: "{md5_actual}"')
def test_cli_wrong_input_hash_file(self): for fi in [1, 2, 3]: tests.util_test.clean_work_dir() hash_storage_file = f"dummy_hash_storage_4_wrong_format_{fi}.sha1" data_hash_storage_file = os.path.join(self.data_path, "hash_storages", hash_storage_file) work_hash_storage_file = os.path.join(self.work_path, hash_storage_file) shutil.copyfile(data_hash_storage_file, work_hash_storage_file) cl = f"--input-folder {self.work_path} --input-folder-file-mask-exclude * --suppress-console-reporting-output " \ f"--single-hash-file-name-base {work_hash_storage_file} --suppress-hash-file-name-postfix --use-absolute-file-names" # Run command which does not handle any files cmd_line_adapter = cmd_line.CommandLineAdapter() exit_code = cmd_line_adapter.run_cmd_line(cl) self.assertEqual(exit_code, cmd_line.ExitCode.APP_USAGE_ERROR) self.assertTrue(filecmp.cmp(work_hash_storage_file, data_hash_storage_file, shallow=False), f"Invalid input hash file is corrupted (file '{work_hash_storage_file}')")
def test_cli_simple_hash_storages_abs(self): hash_storage_file = "dummy_hash_storage_1_general_abs.sha1" tests.util_test.clean_work_dir() data_hash_storage_file = os.path.join(self.data_path, "hash_storages", hash_storage_file) work_hash_storage_file = os.path.join(self.work_path, hash_storage_file) shutil.copyfile(data_hash_storage_file, work_hash_storage_file) cl = f"--input-folder {self.work_path} --input-folder-file-mask-exclude * --suppress-output-file-comments --suppress-console-reporting-output " \ f"--single-hash-file-name-base {work_hash_storage_file} --suppress-hash-file-name-postfix --use-absolute-file-names " \ f"--preserve-unused-hash-records" # Run command which does not handle any files cmd_line_adapter = cmd_line.CommandLineAdapter() exit_code = cmd_line_adapter.run_cmd_line(cl) # --suppress-console-reporting-output self.assertEqual(exit_code, cmd_line.ExitCode.OK) self.assertTrue(filecmp.cmp(work_hash_storage_file, data_hash_storage_file + ".save", shallow=False), f"Wrong data on output (file '{data_hash_storage_file}')")
def test_calc_hash_for_three_small_files_sha1(self): for i in range(1, 4): shutil.copyfile(f'{self.data_path}/file{i}.txt', f'{self.work_path}/file{i}.txt') cmd_line_adapter = cmd_line.CommandLineAdapter() exit_code = cmd_line_adapter.run_cmd_line(f'--input-folder {self.work_path} --suppress-console-reporting-output --suppress-output-file-comments') self.assertEqual(exit_code, cmd_line.ExitCode.OK) for i in range(1, 4): with open(f'{self.data_path}/file{i}.txt.sha1', mode='r') as sha1_expected_file: sha1_expected = sha1_expected_file.read() with open(f'{self.work_path}/file{i}.txt.sha1', mode='r') as sha1_actual_file: sha1_actual = sha1_actual_file.read() sha1_actual = sha1_actual[:40] with self.subTest(i = i): self.assertEqual(sha1_expected, sha1_actual, f'Wrong sha1-hash for file "file{i}.txt". Expected: "{sha1_expected}", actual: "{sha1_actual}"') with self.subTest(i = i): # Ref: https://docs.python.org/3.7/library/filecmp.html self.assertTrue(filecmp.cmp(f'{self.data_path}/file{i}.txt', f'{self.work_path}/file{i}.txt', shallow=False), f"Input data file is corrupted! ({'self.work_path}/file{i}.txt'})")
def test_cli_error_on_equal_data_and_hash_file_names(self): for single_hash_file in [False, True]: tests.util_test.clean_work_dir() hash_storage_file = "dummy_hash_storage_1_general_abs.sha1" data_hash_storage_file = os.path.join(self.data_path, "hash_storages", hash_storage_file) work_hash_storage_file = os.path.join(self.work_path, hash_storage_file) shutil.copyfile(data_hash_storage_file, work_hash_storage_file) # cmd_line = f"smart_hasher --input-file {work_hash_storage_file} --single-hash-file-name-base {work_hash_storage_file} --suppress-hash-file-name-postfix " \ cl = f"--input-file {work_hash_storage_file} --suppress-hash-file-name-postfix" \ " --suppress-console-reporting-output" if single_hash_file: cl += f" --single-hash-file-name-base {work_hash_storage_file}" # Run command which does not handle any files cmd_line_adapter = cmd_line.CommandLineAdapter() exit_code = cmd_line_adapter.run_cmd_line(cl) # --suppress-console-reporting-output self.assertEqual(exit_code, cmd_line.ExitCode.APP_USAGE_ERROR) self.assertTrue(filecmp.cmp(work_hash_storage_file, data_hash_storage_file, shallow=False), f"Input hash file is corrupted (file '{work_hash_storage_file}')")
def _test_sandbox(self): # Ref: https://docs.python.org/3/library/tracemalloc.html tracemalloc.start() print("test_dummy run") shutil.copyfile(f'{self.data_path}/file1.txt', f'{self.work_path}/file1.txt') cmd_line_adapter = cmd_line.CommandLineAdapter() cmd_line_adapter.run_cmd_line(f'--input-folder {self.work_path} --suppress-console-reporting-output') with open(f'{self.data_path}/file1.txt.sha1', mode='r') as sha1_expected_file: sha1_expected = sha1_expected_file.read() with open(f'{self.work_path}/file1.txt.sha1', mode='r') as sha1_actual_file: sha1_actual = sha1_actual_file.read() sha1_actual = sha1_actual[:40] self.assertEqual(sha1_expected, sha1_actual, f'Wrong sha1-hash for file "file1.txt". Expected: "{sha1_expected}", actual: "{sha1_actual}"') snapshot = tracemalloc.take_snapshot() top_stats = snapshot.statistics('lineno') print("[ Top 10 ]") for stat in top_stats[:10]: print(stat)
def test_specify_non_existent_file(self): cmd_line_adapter = cmd_line.CommandLineAdapter() exit_code = cmd_line_adapter.run_cmd_line(f'--input-file {self.work_path}/nofile.txt --suppress-console-reporting-output --retry-pause-on-data-read-error 0') self.assertEqual(exit_code, cmd_line.ExitCode.DATA_READ_ERROR)
import sys import cmd_line # breakpoint() cmd_line_args = None if __name__ == '__main__': cmd_line_adapter = cmd_line.CommandLineAdapter() exit_code = cmd_line_adapter.run(sys.argv[1:]) sys.exit(int(exit_code))