def get_file_node(rootdir, path, hash_cache, uid_map=None, gid_map=None): metadata = get_default_metadata(rootdir, path, uid_map=uid_map, gid_map=gid_map) native_path = utils.build_native_path(rootdir, path) stat = os.lstat(native_path) metadata['hash'] = hash_cache[path] metadata['size'] = stat.st_size return FileNode(**metadata)
def compute_digests(rootdir, paths, num_threads=None): """Return a dict with (path, digest).""" # First, we write the temporary file with filenames in the format # expected by hashdeep rootdir = os.path.normpath(rootdir) fd, tempfilename = tempfile.mkstemp() with os.fdopen(fd, 'w') as tmpfile: for p in paths: tmpfile.write(utils.build_native_path(rootdir, p)) tmpfile.write('\n') # Run hashdeep -c sha1,sha256 -f tempfilename -l -d (-j num_threads) cmd = ['hashdeep', '-c', 'sha1,sha256', '-f', tempfilename, '-l', '-d'] if num_threads is not None: cmd.extend(['-j', str(num_threads)]) output = subprocess.check_output(cmd) # Delete temporary file os.remove(tempfilename) # Parse output root = ET.fromstring(output) res = {} for fileobj in root: if fileobj.tag != 'fileobject': continue name = None sha1 = None sha256 = None for child in fileobj: if child.tag == 'hashdigest': if child.attrib['type'] == 'SHA1': sha1 = child.text elif child.attrib['type'] == 'SHA256': sha256 = child.text else: raise ValueError('Unexpected hash type "{}".'.format( child.attrib['type'])) if child.tag == 'filename': name = utils.get_path_from_native_path(rootdir, child.text) if not name or not sha1 or not sha256: raise ValueError('Could not extract all required information from ' 'digest.') res[name] = Digest(sha1, sha256) keys = res.keys() if len(keys) != len(paths) or set(keys) != set(paths): raise ValueError('List of filenames returned by hashdeep does not ' 'match the input list.') return res
def get_default_metadata(rootdir, path, uid_map=None, gid_map=None): """Returns a dictonary, whose keys are default_metadata (as defined above) and entries are the results of the corresponding os.stat call on native_path.""" if uid_map is None: uid_map = utils.get_uid_name_map() if gid_map is None: gid_map = utils.get_gid_name_map() native_path = utils.build_native_path(rootdir, path) stat = os.lstat(native_path) metadata = {} metadata['name'] = os.path.basename(path) # TODO: use strings instead of UNIX epoch seconds # TODO: fields we are currently not using: st_mode, st_ino, st_dev, st_nlink # - is this intentional? metadata['mtime'] = stat.st_mtime metadata['user'] = uid_map.get(stat.st_uid, str(stat.st_uid)) metadata['group'] = gid_map.get(stat.st_gid, str(stat.st_gid)) metadata['permissions'] = '%04o' % stat.st_mode return metadata
def get_symlink_node(rootdir, path, uid_map=None, gid_map=None): metadata = get_default_metadata(rootdir, path, uid_map=uid_map, gid_map=gid_map) native_path = utils.build_native_path(rootdir, path) metadata['link_target'] = os.readlink(native_path) return SymlinkNode(**metadata)
def test_build_native_path_simple_1(): rootdir = '/home/x' path = '/foo' res = utils.build_native_path(rootdir, path) assert res == '/home/x/foo'
def test_build_native_path_non_normalized_path(): rootdir = '/home/x' path = '/foo/../bar' with pytest.raises(ValueError): res = utils.build_native_path(rootdir, path)
def test_build_native_path_non_absolute_rootdir(): rootdir = 'root' path = '/' with pytest.raises(ValueError): res = utils.build_native_path(rootdir, path)
def test_build_native_path_simple_base_rootdir_base_path(): rootdir = '/' path = '/' res = utils.build_native_path(rootdir, path) assert res == '/'