コード例 #1
0
    def test_find_duplicates(self):
        self.assertDictEqual(dict(find_duplicates([], [self.directory])),
                             {self.file2: [self.file1]})
        self.assertDictEqual(
            dict(find_duplicates([
                self.file1,
                self.file2,
                self.file3,
            ], [])), {self.file1: [self.file2]})
        # Raises FileNotFoundError on non existent file
        with self.assertRaises(FileNotFoundError):
            find_duplicates([FileObject('spam'), FileObject('eggs')], [])

        # Test if symlinks are treated as dupes of original file.
        ret_val = dict(find_duplicates([self.symlink1], [self.symdirectory]))

        # symlink1 is a duplicate of symlink1.
        self.assertIn(self.symlink1, ret_val[self.symlink1])

        # symlink2 is dup of symlink1, because they both point to same file.
        self.assertIn(self.symlink2, ret_val[self.symlink1])

        # symfile must not be a duplicate of any symbolic link which points
        # to it.
        self.assertNotIn(self.symfile, ret_val[self.symlink1])
コード例 #2
0
 def test_hash(self):
     self.assertEqual(
         hash(self.fsobject1),
         hash(
             FileObject(
                 'tests/hashchecker_test_files/file_with_some_text.txt')))
     self.assertNotEqual(hash(self.fsobject1), hash(self.fsobject2))
コード例 #3
0
ファイル: hashchecker.py プロジェクト: advaithhl/Hashchecker
def get_files_dirs(arg_list):
    files = []
    dirs = []
    for arg in arg_list:
        if os_path.isfile(arg):
            files.append(FileObject(arg))
        elif os_path.isdir(arg):
            dirs.append(DirectoryObject(arg))
        else:
            print(f"- I did not find a file named '{arg}'")
    return files, dirs
コード例 #4
0
ファイル: hashchecker.py プロジェクト: advaithhl/Hashchecker
def cli_verify(arg_list, plaintext):
    print('+ I will automatically identify these checksums:', CHECKSUMS)
    print('+ Please enter any of the above checksum for each file\n')
    global cols, rows
    l = []
    correct_checksums = []
    for arg in arg_list:
        try:
            f = FileObject(arg)
            if not f.exists:
                print(f"- I did not find a file named '{arg}'")
                continue
            l.append(f)
            c = input(f'{arg}: ')
            while len(c) not in (32, 40, 64, 128):
                print(
                    f"? Sorry, this does not look like a valid checksum "
                    "produced by any of the supported algorithms:", CHECKSUMS)
                print(
                    "? Please verify the whether you have entered the actual "
                    f"checksum of \'{f.name}\'.\n")
                c = input(f'{arg}: ')
            correct_checksums.append(c)
        except IsADirectoryError as iade:
            print(iade)
    if not l:
        exit(1)
    print('\n+ Verifying... ', end='', flush=True)
    result = verify(l, correct_checksums)
    print('Done!')

    def y_or_n(x):
        if x:
            return fgc.GREEN + 'Valid' + fgc.RESET
        return fgc.RED + 'Corrupt' + fgc.RESET

    if plaintext:
        for (file_object, status) in result.items():
            print(file_object.fspath + ': ' + y_or_n(status))
    else:
        headers = ['Filename', 'Status']
        table = {
            pretty_table_left(k.name): pretty_table_right(y_or_n(v))
            for (k, v) in result.items()
        }
        pretty_print(table, headers)
コード例 #5
0
ファイル: core_actions.py プロジェクト: advaithhl/Hashchecker
def find_duplicates(file_objects, directory_objects):
    """
    Find duplicate files.

    A file2 is a duplicate/copy of file1 if they have the same cryptographic
    hash. However, instead of comparing hashes of each and every single file
    with each and every other file, we first compare the sizes of file1 and
    file2, i.e., the hashes (SHA1) of file1 and file2 are computed and
    calculated if and only if their file sizes are the same. This brings down
    reduntant checksum calculations to nearly none.

    For comparing the file sizes, a modified BST is used. Refer
    `core.utils.BST`.

    We add `FileObjects` from file_objects and those found by performing DFS on
    directories in direcory_objects. All files in all directories are thus added
    and compared recursively.

    Params
    ------

    `file_objects`:         A list of `FileObject`s
    `directory_objects`:    A list of `DirectoryObject`s
    """
    duplicates = defaultdict(list)
    with NamedTemporaryFile(delete=True) as dummy_file:
        t = BST(FileObject(dummy_file.name))
        for file_object in file_objects:
            d = t.insert(file_object)
            if d and not d[0].islink ^ d[1].islink:
                if d[0].sha1() == d[1].sha1():
                    duplicates[d[0]].append(d[1])

        for directory_object in directory_objects:
            for file_object in directory_object.file_objects(show_hidden=True,
                                                             recursive=True):
                d = t.insert(file_object)
                if d and not d[0].islink ^ d[1].islink:
                    if d[0].sha1() == d[1].sha1():
                        duplicates[d[0]].append(d[1])

    return duplicates
コード例 #6
0
 def setUp(self):
     self.file1 = FileObject(
         'tests/hashchecker_test_files/actions_test/file1')
     self.file2 = FileObject(
         'tests/hashchecker_test_files/actions_test/file2')
     self.file3 = FileObject(
         'tests/hashchecker_test_files/actions_test/.file3')
     self.directory = DirectoryObject(
         'tests/hashchecker_test_files/actions_test')
     self.symdirectory = DirectoryObject(
         'tests/hashchecker_test_files/symlinks')
     self.symlink1 = FileObject(
         'tests/hashchecker_test_files/symlinks/sym_link1')
     self.symlink2 = FileObject(
         'tests/hashchecker_test_files/symlinks/sym_link2')
     self.symfile = FileObject(
         'tests/hashchecker_test_files/symlinks/file.txt')
コード例 #7
0
 def setUp(self):
     self.fobject = FileObject(
         'tests/hashchecker_test_files/file_with_some_text.txt')
     self.non_existent_fobject = FileObject('spam')
コード例 #8
0
class TestFileObject(unittest.TestCase):
    def setUp(self):
        self.fobject = FileObject(
            'tests/hashchecker_test_files/file_with_some_text.txt')
        self.non_existent_fobject = FileObject('spam')

    def test_size(self):
        self.assertEqual(self.fobject.size, 46)
        with self.assertRaises(FileNotFoundError):
            self.non_existent_fobject.size

    def test_md5(self):
        self.assertEqual(self.fobject.md5(),
                         'ce90a5f32052ebbcd3b20b315556e154')
        with self.assertRaises(FileNotFoundError):
            self.non_existent_fobject.md5()

    def test_sha1(self):
        self.assertEqual(self.fobject.sha1(),
                         'bae5ed658ab3546aee12f23f36392f35dba1ebdd')
        with self.assertRaises(FileNotFoundError):
            self.non_existent_fobject.sha1()

    def test_sha256(self):
        self.assertEqual(
            self.fobject.sha256(),
            '40d5c6f7fe5672fb52269f651c2b985867dfcfa4a5c5258e3d4'
            'f8736d5095037')
        with self.assertRaises(FileNotFoundError):
            self.non_existent_fobject.sha256()

    def test_sha512(self):
        self.assertEqual(
            self.fobject.sha512(),
            '1bb6557a6b5bbd39241a6417b0300a78e5e1cf9fe9abb7dd63e36be5df3c2a1ac242'
            '8134ede695e2238e6afcee3b405845b2e543991a3dc29d3dc1793b4cfa77')
        with self.assertRaises(FileNotFoundError):
            self.non_existent_fobject.sha512()