예제 #1
0
 def test_not_a_directory(self):
     self.mkdirs('root')
     self.mkfile('root/f1')
     # does not exist
     with pytest.raises(ValueError):
         get_included_paths(self.path_to('wrong_root'))
     with pytest.raises(ValueError):
         get_included_paths(self.path_to('root/f1'))
예제 #2
0
 def test_raise_on_infinite_recursion(self):
     self.mkdirs('root/d1')
     self.symlink('root', 'root/d1/link_back')
     with pytest.raises(RecursionError) as exc_info:
         get_included_paths(self.path_to('root'), follow_links=True)
     assert exc_info.value.real_path == os.path.realpath(
         self.path_to('root'))
     assert exc_info.value.first_path == self.path_to('root/')
     assert exc_info.value.second_path == self.path_to('root/d1/link_back')
     assert str(exc_info.value).startswith('Symlink recursion:')
예제 #3
0
    def test_symlinked_file(self):
        self.mkdirs('root')
        self.mkfile('root/f1')
        self.mkfile('linked_file')
        self.symlink('linked_file', 'root/f2')

        # NOTE `follow_links` hash no effect if only the file is linked (as is the
        # case here), linked _files_ are always included.
        filepaths = get_included_paths(self.path_to('root'),
                                       follow_links=False)
        assert filepaths == ['f1', 'f2']

        filepaths = get_included_paths(self.path_to('root'), follow_links=True)
        assert filepaths == ['f1', 'f2']
예제 #4
0
    def test_basic(self):
        self.mkdirs('root/d1/d11')
        self.mkdirs('root/d2')

        self.mkfile('root/f1')
        self.mkfile('root/d1/f1')
        self.mkfile('root/d1/d11/f1')
        self.mkfile('root/d2/f1')

        expected_filepaths = ['d1/d11/f1', 'd1/f1', 'd2/f1', 'f1']
        filepaths = get_included_paths(self.path_to('root'))
        assert filepaths == expected_filepaths

        # end with '/' or not should not matter
        filepaths = get_included_paths(self.path_to('root/'))
        assert filepaths == expected_filepaths
예제 #5
0
    def test_exclude_hidden_dirs_and_files(self):
        self.mkdirs('root/d1')
        self.mkdirs('root/.d2')

        self.mkfile('root/f1')
        self.mkfile('root/.f2')
        self.mkfile('root/d1/f1')
        self.mkfile('root/d1/.f2')
        self.mkfile('root/.d2/f1')

        # no ignore
        filepaths = get_included_paths(self.path_to('root'))
        assert filepaths == ['.d2/f1', '.f2', 'd1/.f2', 'd1/f1', 'f1']

        # using ignore
        filepaths = get_included_paths(self.path_to('root'),
                                       match=['*', '!.*/', '!.*'])
        assert filepaths == ['d1/f1', 'f1']
예제 #6
0
    def test_symlinked_dir(self):
        self.mkdirs('root')
        self.mkfile('root/f1')
        self.mkdirs('linked_dir')
        self.mkfile('linked_dir/f1')
        self.mkfile('linked_dir/f2')
        self.symlink('linked_dir', 'root/d1')

        filepaths = get_included_paths(self.path_to('root'),
                                       follow_links=False)
        assert filepaths == ['f1']

        filepaths = get_included_paths(self.path_to('root'), follow_links=True)
        assert filepaths == ['d1/f1', 'd1/f2', 'f1']

        # default is `follow_links=True`
        filepaths = get_included_paths(self.path_to('root'))
        assert filepaths == ['d1/f1', 'd1/f2', 'f1']
예제 #7
0
    def test_empty_dir_not_included_due_to_not_match(self):
        self.mkdirs('root/d1')
        self.mkdirs('root/.d2')

        filepaths = get_included_paths(self.path_to('root'),
                                       match=['*', '!.*'],
                                       include_empty=True)
        assert filepaths == ['d1/.']

        # NOTE that empty dirs are matched as is they were files (leafs!)
        # TODO better option?
        filepaths = get_included_paths(self.path_to('root'),
                                       match=['*', '!.*/'],
                                       include_empty=True)
        assert filepaths == ['.d2/.', 'd1/.']

        filepaths = get_included_paths(self.path_to('root'),
                                       match=['*', '!d1/'],
                                       include_empty=True)
        assert filepaths == ['.d2/.', 'd1/.']
예제 #8
0
    def test_empty_dirs_include_vs_exclude(self):
        self.mkdirs('root/d1')
        self.mkdirs('root/d2')
        self.mkdirs('root/d3/d31')
        self.mkdirs('root/d4/d41')

        self.mkfile('root/d1/f')
        self.mkfile('root/d3/d31/f')

        filepaths = get_included_paths(self.path_to('root'),
                                       include_empty=False)
        assert filepaths == ['d1/f', 'd3/d31/f']

        # `include_empty=False` is default
        filepaths = get_included_paths(self.path_to('root'))
        assert filepaths == ['d1/f', 'd3/d31/f']

        filepaths = get_included_paths(self.path_to('root'),
                                       include_empty=True)
        assert filepaths == ['d1/f', 'd2/.', 'd3/d31/f', 'd4/d41/.']
예제 #9
0
    def test_empty_dirs_because_of_filter_include_vs_exclude(self):
        self.mkdirs('root/d1')
        self.mkdirs('root/d2')

        self.mkfile('root/d1/f')
        self.mkfile('root/d2/.f')

        filepaths = get_included_paths(self.path_to('root'),
                                       match=['*', '!.*'],
                                       include_empty=False)
        assert filepaths == ['d1/f']

        # `include_empty=False` is default
        filepaths = get_included_paths(
            self.path_to('root'),
            match=['*', '!.*'],
        )
        assert filepaths == ['d1/f']

        filepaths = get_included_paths(self.path_to('root'),
                                       match=['*', '!.*'],
                                       include_empty=True)
        assert filepaths == ['d1/f', 'd2/.']
예제 #10
0
    def test_exclude_extensions(self):
        self.mkdirs('root/d1')

        self.mkfile('root/f')
        self.mkfile('root/f.txt')
        self.mkfile('root/f.skip1')
        self.mkfile('root/fskip1')
        self.mkfile('root/f.skip2')
        self.mkfile('root/f.skip1.txt')
        self.mkfile('root/f.skip1.skip2')
        self.mkfile('root/f.skip1skip2')
        self.mkfile('root/d1/f.txt')
        self.mkfile('root/d1/f.skip1')

        filepaths = get_included_paths(self.path_to('root'),
                                       match=['*', '!*.skip1', '!*.skip2'])
        assert filepaths == [
            'd1/f.txt', 'f', 'f.skip1.txt', 'f.skip1skip2', 'f.txt', 'fskip1'
        ]
예제 #11
0
def main():
    parser = argparse.ArgumentParser(
        description='Determine the hash for directory.')
    parser.add_argument('-v',
                        '--version',
                        action='version',
                        version='dirhash {}'.format(dirhash.__version__))
    parser.add_argument('directory', help='Directory to hash.')
    parser.add_argument(
        '-a',
        '--algorithm',
        choices=dirhash.algorithms_available,
        default='md5',
        help=
        ('Hashing algorithm to use. Always available: {}. Additionally available '
         'on current platform: {}. Note that the same algorithm may appear '
         'multiple times in this set under different names (thanks to '
         'OpenSSL) [https://docs.python.org/2/library/hashlib.html]'.format(
             sorted(dirhash.algorithms_guaranteed),
             sorted(dirhash.algorithms_available -
                    dirhash.algorithms_guaranteed))),
        metavar='')
    parser.add_argument(
        '-m',
        '--match',
        type=str,
        default='*',
        help='String of match-patterns, separated by blank space.')
    parser.add_argument(
        '-i',
        '--ignore',
        type=str,
        default=None,
        help='String of ignore-patterns, separated by blank space.',
    )
    parser.add_argument(
        '-d',
        '--ignore-hidden',
        action='store_true',
        default=False,
        help='Ignore hidden ("dot") files and directories (short for '
        '`-ignore ".*, "`).')
    parser.add_argument('-x',
                        '--ignore-extensions',
                        nargs='+',
                        help='List of file extensions to ignore.',
                        metavar='')

    target_group = parser.add_mutually_exclusive_group(required=False)
    target_group.add_argument(
        '-c',
        '--content-only',
        action='store_true',
        default=False,
        help=
        'Hash only the content of files, not the name and location of files '
        'within the directory. NOTE (!) the hash will be different if the '
        '(alpha numerical) order of file paths changes.')
    target_group.add_argument(
        '-p',
        '--paths-only',
        action='store_true',
        default=False,
        help='Hash only the file paths, i.e. the name and location of files '
        'within the directory.')

    parser.add_argument(
        '--no-follow-links',
        dest='follow_links',
        action='store_false',
        help=
        'Do not follow symbolic links to other *directories*. NOTE: directly '
        'linked files are always included.')
    parser.set_defaults(follow_links=True)
    parser.add_argument(
        '--include-empty',
        action='store_true',
        default=False,
        help=
        'Include empty directories (containing no files that meet the matching '
        'criteria). Note that the path to the directory itself must still meet '
        'the matching criteria (matched as if it was a file).')
    parser.add_argument('-s',
                        '--chunk-size',
                        default=2**20,
                        type=int,
                        help='The chunk size (in bytes) for reading fo files.')
    parser.add_argument('-w',
                        '--workers',
                        type=int,
                        default=1,
                        help='Number of workers (parallel processes) to use.')
    parser.add_argument(
        '-l',
        '--list',
        action='store_true',
        default=False,
        help=
        'List the file paths that will be taken into account, followed by the '
        'hash of directory structure')

    args = parser.parse_args()

    try:
        kwargs = preprocess_kwargs(vars(args))
        if kwargs.pop('list'):
            # kwargs below have no effect when listing
            for k in [
                    'chunk_size', 'content_only', 'paths_only', 'algorithm',
                    'workers'
            ]:
                kwargs.pop(k)
            for leafpath in dirhash.get_included_paths(**kwargs):
                print(leafpath)
        else:
            print(dirhash.dirhash(**kwargs))
    except Exception as e:
        sys.stderr.write('dirhash: {}\n'.format(e))
        sys.exit(1)