def test_not_a_directory(self): self.mkdirs('root') self.mkfile('root/f1') # does not exist with pytest.raises(ValueError): get_included_paths(self.path_to('wrong_root')) with pytest.raises(ValueError): get_included_paths(self.path_to('root/f1'))
def test_raise_on_infinite_recursion(self): self.mkdirs('root/d1') self.symlink('root', 'root/d1/link_back') with pytest.raises(RecursionError) as exc_info: get_included_paths(self.path_to('root'), follow_links=True) assert exc_info.value.real_path == os.path.realpath( self.path_to('root')) assert exc_info.value.first_path == self.path_to('root/') assert exc_info.value.second_path == self.path_to('root/d1/link_back') assert str(exc_info.value).startswith('Symlink recursion:')
def test_symlinked_file(self): self.mkdirs('root') self.mkfile('root/f1') self.mkfile('linked_file') self.symlink('linked_file', 'root/f2') # NOTE `follow_links` hash no effect if only the file is linked (as is the # case here), linked _files_ are always included. filepaths = get_included_paths(self.path_to('root'), follow_links=False) assert filepaths == ['f1', 'f2'] filepaths = get_included_paths(self.path_to('root'), follow_links=True) assert filepaths == ['f1', 'f2']
def test_basic(self): self.mkdirs('root/d1/d11') self.mkdirs('root/d2') self.mkfile('root/f1') self.mkfile('root/d1/f1') self.mkfile('root/d1/d11/f1') self.mkfile('root/d2/f1') expected_filepaths = ['d1/d11/f1', 'd1/f1', 'd2/f1', 'f1'] filepaths = get_included_paths(self.path_to('root')) assert filepaths == expected_filepaths # end with '/' or not should not matter filepaths = get_included_paths(self.path_to('root/')) assert filepaths == expected_filepaths
def test_exclude_hidden_dirs_and_files(self): self.mkdirs('root/d1') self.mkdirs('root/.d2') self.mkfile('root/f1') self.mkfile('root/.f2') self.mkfile('root/d1/f1') self.mkfile('root/d1/.f2') self.mkfile('root/.d2/f1') # no ignore filepaths = get_included_paths(self.path_to('root')) assert filepaths == ['.d2/f1', '.f2', 'd1/.f2', 'd1/f1', 'f1'] # using ignore filepaths = get_included_paths(self.path_to('root'), match=['*', '!.*/', '!.*']) assert filepaths == ['d1/f1', 'f1']
def test_symlinked_dir(self): self.mkdirs('root') self.mkfile('root/f1') self.mkdirs('linked_dir') self.mkfile('linked_dir/f1') self.mkfile('linked_dir/f2') self.symlink('linked_dir', 'root/d1') filepaths = get_included_paths(self.path_to('root'), follow_links=False) assert filepaths == ['f1'] filepaths = get_included_paths(self.path_to('root'), follow_links=True) assert filepaths == ['d1/f1', 'd1/f2', 'f1'] # default is `follow_links=True` filepaths = get_included_paths(self.path_to('root')) assert filepaths == ['d1/f1', 'd1/f2', 'f1']
def test_empty_dir_not_included_due_to_not_match(self): self.mkdirs('root/d1') self.mkdirs('root/.d2') filepaths = get_included_paths(self.path_to('root'), match=['*', '!.*'], include_empty=True) assert filepaths == ['d1/.'] # NOTE that empty dirs are matched as is they were files (leafs!) # TODO better option? filepaths = get_included_paths(self.path_to('root'), match=['*', '!.*/'], include_empty=True) assert filepaths == ['.d2/.', 'd1/.'] filepaths = get_included_paths(self.path_to('root'), match=['*', '!d1/'], include_empty=True) assert filepaths == ['.d2/.', 'd1/.']
def test_empty_dirs_include_vs_exclude(self): self.mkdirs('root/d1') self.mkdirs('root/d2') self.mkdirs('root/d3/d31') self.mkdirs('root/d4/d41') self.mkfile('root/d1/f') self.mkfile('root/d3/d31/f') filepaths = get_included_paths(self.path_to('root'), include_empty=False) assert filepaths == ['d1/f', 'd3/d31/f'] # `include_empty=False` is default filepaths = get_included_paths(self.path_to('root')) assert filepaths == ['d1/f', 'd3/d31/f'] filepaths = get_included_paths(self.path_to('root'), include_empty=True) assert filepaths == ['d1/f', 'd2/.', 'd3/d31/f', 'd4/d41/.']
def test_empty_dirs_because_of_filter_include_vs_exclude(self): self.mkdirs('root/d1') self.mkdirs('root/d2') self.mkfile('root/d1/f') self.mkfile('root/d2/.f') filepaths = get_included_paths(self.path_to('root'), match=['*', '!.*'], include_empty=False) assert filepaths == ['d1/f'] # `include_empty=False` is default filepaths = get_included_paths( self.path_to('root'), match=['*', '!.*'], ) assert filepaths == ['d1/f'] filepaths = get_included_paths(self.path_to('root'), match=['*', '!.*'], include_empty=True) assert filepaths == ['d1/f', 'd2/.']
def test_exclude_extensions(self): self.mkdirs('root/d1') self.mkfile('root/f') self.mkfile('root/f.txt') self.mkfile('root/f.skip1') self.mkfile('root/fskip1') self.mkfile('root/f.skip2') self.mkfile('root/f.skip1.txt') self.mkfile('root/f.skip1.skip2') self.mkfile('root/f.skip1skip2') self.mkfile('root/d1/f.txt') self.mkfile('root/d1/f.skip1') filepaths = get_included_paths(self.path_to('root'), match=['*', '!*.skip1', '!*.skip2']) assert filepaths == [ 'd1/f.txt', 'f', 'f.skip1.txt', 'f.skip1skip2', 'f.txt', 'fskip1' ]
def main(): parser = argparse.ArgumentParser( description='Determine the hash for directory.') parser.add_argument('-v', '--version', action='version', version='dirhash {}'.format(dirhash.__version__)) parser.add_argument('directory', help='Directory to hash.') parser.add_argument( '-a', '--algorithm', choices=dirhash.algorithms_available, default='md5', help= ('Hashing algorithm to use. Always available: {}. Additionally available ' 'on current platform: {}. Note that the same algorithm may appear ' 'multiple times in this set under different names (thanks to ' 'OpenSSL) [https://docs.python.org/2/library/hashlib.html]'.format( sorted(dirhash.algorithms_guaranteed), sorted(dirhash.algorithms_available - dirhash.algorithms_guaranteed))), metavar='') parser.add_argument( '-m', '--match', type=str, default='*', help='String of match-patterns, separated by blank space.') parser.add_argument( '-i', '--ignore', type=str, default=None, help='String of ignore-patterns, separated by blank space.', ) parser.add_argument( '-d', '--ignore-hidden', action='store_true', default=False, help='Ignore hidden ("dot") files and directories (short for ' '`-ignore ".*, "`).') parser.add_argument('-x', '--ignore-extensions', nargs='+', help='List of file extensions to ignore.', metavar='') target_group = parser.add_mutually_exclusive_group(required=False) target_group.add_argument( '-c', '--content-only', action='store_true', default=False, help= 'Hash only the content of files, not the name and location of files ' 'within the directory. NOTE (!) the hash will be different if the ' '(alpha numerical) order of file paths changes.') target_group.add_argument( '-p', '--paths-only', action='store_true', default=False, help='Hash only the file paths, i.e. the name and location of files ' 'within the directory.') parser.add_argument( '--no-follow-links', dest='follow_links', action='store_false', help= 'Do not follow symbolic links to other *directories*. NOTE: directly ' 'linked files are always included.') parser.set_defaults(follow_links=True) parser.add_argument( '--include-empty', action='store_true', default=False, help= 'Include empty directories (containing no files that meet the matching ' 'criteria). Note that the path to the directory itself must still meet ' 'the matching criteria (matched as if it was a file).') parser.add_argument('-s', '--chunk-size', default=2**20, type=int, help='The chunk size (in bytes) for reading fo files.') parser.add_argument('-w', '--workers', type=int, default=1, help='Number of workers (parallel processes) to use.') parser.add_argument( '-l', '--list', action='store_true', default=False, help= 'List the file paths that will be taken into account, followed by the ' 'hash of directory structure') args = parser.parse_args() try: kwargs = preprocess_kwargs(vars(args)) if kwargs.pop('list'): # kwargs below have no effect when listing for k in [ 'chunk_size', 'content_only', 'paths_only', 'algorithm', 'workers' ]: kwargs.pop(k) for leafpath in dirhash.get_included_paths(**kwargs): print(leafpath) else: print(dirhash.dirhash(**kwargs)) except Exception as e: sys.stderr.write('dirhash: {}\n'.format(e)) sys.exit(1)