def compute_digests(rootdir, paths, num_threads=None): """Return a dict with (path, digest).""" # First, we write the temporary file with filenames in the format # expected by hashdeep rootdir = os.path.normpath(rootdir) fd, tempfilename = tempfile.mkstemp() with os.fdopen(fd, 'w') as tmpfile: for p in paths: tmpfile.write(utils.build_native_path(rootdir, p)) tmpfile.write('\n') # Run hashdeep -c sha1,sha256 -f tempfilename -l -d (-j num_threads) cmd = ['hashdeep', '-c', 'sha1,sha256', '-f', tempfilename, '-l', '-d'] if num_threads is not None: cmd.extend(['-j', str(num_threads)]) output = subprocess.check_output(cmd) # Delete temporary file os.remove(tempfilename) # Parse output root = ET.fromstring(output) res = {} for fileobj in root: if fileobj.tag != 'fileobject': continue name = None sha1 = None sha256 = None for child in fileobj: if child.tag == 'hashdigest': if child.attrib['type'] == 'SHA1': sha1 = child.text elif child.attrib['type'] == 'SHA256': sha256 = child.text else: raise ValueError('Unexpected hash type "{}".'.format( child.attrib['type'])) if child.tag == 'filename': name = utils.get_path_from_native_path(rootdir, child.text) if not name or not sha1 or not sha256: raise ValueError('Could not extract all required information from ' 'digest.') res[name] = Digest(sha1, sha256) keys = res.keys() if len(keys) != len(paths) or set(keys) != set(paths): raise ValueError('List of filenames returned by hashdeep does not ' 'match the input list.') return res
def assemble_paths(rootdir, patterns): filenames = [] symlinks = [] directories = [] errors = [] ignored = [] def listdir_onerror(error): errors.append(error) rootdir = os.path.normpath(rootdir) # Handle root separately because the os.walk code below is not going to # process it. decision = pattern_decision(os.sep, patterns) if decision == INCLUDE: # If we want to include the directory entry, we have to find out # its type. if os.path.isdir(rootdir): directories.append(os.sep) else: raise ValueError('The root is not a directory, which should not happen.') elif decision != EXCLUDE: raise ValueError('Unknown file decision {}.'.format(decision)) # Now recursively traverse the file system for root, dirs, files in os.walk(rootdir, topdown=True, onerror=listdir_onerror, followlinks=False): for f in itertools.chain(files, dirs): native_path = os.path.join(root, f) path = utils.get_path_from_native_path(rootdir, native_path) decision = pattern_decision(path, patterns) if decision == INCLUDE: # If we want to include the directory entry, we have to find out # its type. if os.path.islink(native_path): symlinks.append(path) elif os.path.isfile(native_path): filenames.append(path) elif os.path.isdir(native_path): directories.append(path) else: ignored.append(path) elif decision != EXCLUDE: raise ValueError('Unknown file decision {}.'.format(decision)) # Also, we remove all mount points from dirs so that os.walk does not # recurse into a different file system. dirs[:] = [d for d in dirs if not os.path.ismount(os.path.join(rootdir, d))] return MatchingResult(filenames, symlinks, directories, errors, ignored)
def scan_backup(rootdir): files = [] symlinks = [] directories = [] errors = [] ignored = [] def listdir_onerror(error): errors.append(error) rootdir = os.path.normpath(rootdir) if os.path.isdir(rootdir): directories.append(os.sep) else: raise ValueError('The root is not a directory, which should not happen.') # Now recursively traverse the file system for root, cur_dirs, cur_files in os.walk(rootdir, topdown=True, onerror=listdir_onerror, followlinks=False): for f in itertools.chain(cur_files, cur_dirs): native_path = os.path.join(root, f) path = utils.get_path_from_native_path(rootdir, native_path) if path == '/.go_backup': ignored.append(path) elif os.path.islink(native_path): symlinks.append(path) elif os.path.isfile(native_path): files.append(path) elif os.path.isdir(native_path): directories.append(path) else: ignored.append(path) # Also, we remove all mount points from dirs so that os.walk does not # recurse into a different file system. cur_dirs[:] = [d for d in cur_dirs if not os.path.ismount(os.path.join(rootdir, d))] return ScanResult(files, symlinks, directories, errors, ignored)
def test_get_path_from_native_path_non_normalized_rootdir(): rootdir = '//foo' native_path = '/foo' with pytest.raises(ValueError): res = utils.get_path_from_native_path(rootdir, native_path)
def test_get_path_from_native_path_non_absolute_native_path(): rootdir = '/' native_path = 'foo' with pytest.raises(ValueError): res = utils.get_path_from_native_path(rootdir, native_path)
def test_get_path_from_native_path_equal_paths(): rootdir = '/home/x' native_path = '/home/x' res = utils.get_path_from_native_path(rootdir, native_path) assert res == '/'
def test_get_path_from_native_path_both_dirs_base(): rootdir = '/' native_path = '/' res = utils.get_path_from_native_path(rootdir, native_path) assert res == '/'
def test_get_path_from_native_path_base_rootdir(): rootdir = '/' native_path = '/foo/bar' res = utils.get_path_from_native_path(rootdir, native_path) assert res == '/foo/bar'
def test_get_path_from_native_path_simple_2(): rootdir = '/foo' native_path = '/foo/bar/baz' res = utils.get_path_from_native_path(rootdir, native_path) assert res == '/bar/baz'
def test_get_path_from_native_path_simple_1(): rootdir = '/home/x' native_path = '/home/x/foo' res = utils.get_path_from_native_path(rootdir, native_path) assert res == '/foo'
def test_get_path_from_native_path_out_of_dir_path_1(): rootdir = '/foo/bar' native_path = '/foobar' with pytest.raises(ValueError): res = utils.get_path_from_native_path(rootdir, native_path)