def walk_data_tree(current_path, toplevel=False, pgfid_path='00/00/00000000-0000-0000-0000-000000000001'): l = os.listdir(current_path) for direntry in l: if direntry in ('.', '..') or toplevel and direntry == '.glusterfs': continue ln = None ln_full = None link_stat = None depath = os.path.join(current_path, direntry) data_stat = os.lstat(depath) if not S_ISLNK(data_stat.st_mode): # Links don't have a gfid of their own. try: x = xattr.get(depath, 'trusted.gfid') ln = format_gfid(x) ln_full = os.path.join(os.getcwd(), ln) try: link_stat = os.lstat(ln) except OSError, e: print "Missing expected link tree object at %s" % ln_full except IOError, e: print "Missing trusted.gfid extended attribute for %s" % depath if S_ISDIR(data_stat.st_mode): # This is a directory, link tree object should be a symbolinc link to it's directory in the parent structure # if not link_stat: print "Missing link for directory for %s, you should probably run " % depath print "ln -s '../../%s/%s' '%s'" % (pgfid_path, direntry, ln_full) else: if not S_ISLNK(link_stat.st_mode) or not os.readlink( ln) == '../../%s/%s' % (pgfid_path, direntry): print "Unexpected: %s does not link to what we expected it to (../../%s/%s)." % ( ln_full, pgfid_path, direntry) walk_data_tree(depath, pgfid_path=ln) if S_ISREG(data_stat.st_mode): if not link_stat: print "Missing hard link for file %s, you should probably run " % depath print "ln '%s' '%s'" % (depath, ln_full) else: if link_stat.st_dev != data_stat.st_dev or link_stat.st_ino != data_stat.st_ino: print "Unexpected: %s is not what we expected (hard link to %s)" % ( ln_full, depath)
def _hasFileConflict(self, pkg1, pkg2, filename): """RpmPackage's pkg1 and pkg2 share filename. Return 1 if the conflict is "real", 0 if it should be ignored. pkg1_fi is RpmFileInfo of filename in pkg1.""" # pkg1_fi = pkg1.getRpmFileInfo(idx1) pkg1_fi = pkg1.getRpmFileInfo(filename) pkg2_fi = pkg2.getRpmFileInfo(filename) # do not check packages with the same NEVR which are # not buildarchtranslate compatible if pkg1.getNEVR() == pkg2.getNEVR() and \ buildarchtranslate[pkg1["arch"]] != \ buildarchtranslate[pkg2["arch"]] and \ pkg1["arch"] != "noarch" and \ pkg2["arch"] != "noarch" and \ pkg1_fi.filecolor != pkg2_fi.filecolor and \ pkg1_fi.filecolor > 0 and pkg2_fi.filecolor > 0: return 0 # check if data is sufficient if not pkg1_fi.mode: raise ValueError, \ "Package '%s': File mode is not set for file '%s'" % \ (pkg1.getNEVRA(), filename) if not pkg2_fi.mode: raise ValueError, \ "Package '%s': File mode is not set for file '%s'" % \ (pkg2.getNEVRA(), filename) # check if user and group are identical if pkg1_fi.uid != pkg2_fi.uid and \ pkg1_fi.gid != pkg2_fi.gid: return 1 # ignore directories if S_ISDIR(pkg1_fi.mode) and S_ISDIR(pkg2_fi.mode): return 0 # ignore links if S_ISLNK(pkg1_fi.mode) and S_ISLNK(pkg2_fi.mode) and \ (pkg1_fi.linkto == pkg2_fi.linkto): return 0 # ignore identical files if pkg1_fi.mode == pkg2_fi.mode and \ pkg1_fi.filesize == pkg2_fi.filesize and \ pkg1_fi.md5sum == pkg2_fi.md5sum: return 0 # ignore ghost files if pkg1_fi.flags & base.RPMFILE_GHOST or \ pkg2_fi.flags & base.RPMFILE_GHOST: return 0 return 1
def findall(self): """Find all files under the base and set ``allfiles`` to the absolute pathnames of files found. """ from stat import S_ISREG, S_ISDIR, S_ISLNK self.allfiles = allfiles = [] root = self.base stack = [root] pop = stack.pop push = stack.append while stack: root = pop() names = os.listdir(root) for name in names: fullname = os.path.join(root, name) # Avoid excess stat calls -- just one will do, thank you! stat = os.stat(fullname) mode = stat.st_mode if S_ISREG(mode): allfiles.append(fsdecode(fullname)) elif S_ISDIR(mode) and not S_ISLNK(mode): push(fullname)
def inode_type(self, inode, symtab, addr_space): imode = inode.m('i_mode').v() type = "UNKN" if S_ISREG(imode): type = "REG" elif S_ISLNK(imode): type = "LNK" elif S_ISCHR(imode): type = "CHR" elif S_ISBLK(imode): type = "BLK" elif S_ISDIR(imode): type = "DIR" elif S_ISSOCK(imode): type = "SOCK" elif S_ISFIFO(imode): type = "FIFO" if symtab.lookup("rdwr_pipe_fops"): i_fop_offset = inode.get_member_offset('i_fop') if i_fop_offset > 0: i_fop = inode.get_member('i_fop').v() if i_fop == symtab.lookup("rdwr_pipe_fops"): type = "PIPE" return type
def check_for_deletion(self, relative_path=None): """Traverse the entire remote_path tree. Find files/directories that need to be deleted, not being present in the local folder. """ if not relative_path: relative_path = str() # root of shared directory tree remote_path = path_join(self.remote_path, relative_path) local_path = path_join(self.local_path, relative_path) for remote_st in self.sftp.listdir_attr(remote_path): r_lstat = self.sftp.lstat( path_join(remote_path, remote_st.filename)) inner_remote_path = path_join(remote_path, remote_st.filename) inner_local_path = path_join(local_path, remote_st.filename) # check if remote_st is a symlink # otherwise could delete file outside shared directory if S_ISLNK(r_lstat.st_mode): if self._must_be_deleted(inner_local_path, r_lstat): self.remote_delete(inner_remote_path, r_lstat) continue if self._must_be_deleted(inner_local_path, remote_st): self.remote_delete(inner_remote_path, remote_st) elif S_ISDIR(remote_st.st_mode): self.check_for_deletion( path_join(relative_path, remote_st.filename))
def verify_files(current_directory, cur): #print(current_directory) from os import listdir items = listdir(current_directory) for item in items: if not item.startswith("."): from os.path import join fullpath = join(current_directory, item) #print("FullPath: \"%s\"" % fullpath) from os import stat st_mode = stat(fullpath).st_mode from stat import S_ISDIR, S_ISLNK, S_ISREG if S_ISLNK(st_mode): pass elif S_ISDIR(st_mode): verify_files(current_directory=fullpath, cur=cur) elif S_ISREG(st_mode): #print("%s 0x%s" % (item, sum_from_file_CRC32(fullpath))) verify_file(target_file_name=item, fullpath=fullpath, checksum="0x" + sum_from_file_CRC32(fullpath), cur=cur)
def findall(dirname=os.curdir): from stat import ST_MODE, S_ISREG, S_ISDIR, S_ISLNK file_list = [] stack = [dirname] pop = stack.pop push = stack.append while stack: dirname = pop() names = os.listdir(dirname) for name in names: if dirname != os.curdir: # avoid the dreaded "./" syndrome fullname = os.path.join(dirname, name) else: fullname = name # Avoid excess stat calls -- just one will do, thank you! stat = os.lstat(fullname) mode = stat[ST_MODE] if S_ISREG(mode): file_list.append(fullname) elif S_ISDIR(mode) and not S_ISLNK(mode): push(fullname) return file_list
def symlink(src, dst): if dst.endswith('/'): raise NotImplementedError('Creating link inside directory not ' 'implemented') lst = remote.lstat(dst) if lst: if not S_ISLNK(lst.st_mode): raise RemotePathIsNotALinkError('Already exists and not a link: ' '{}'.format(dst)) # remote is a link rsrc = remote.readlink(dst) if rsrc == src: return Unchanged(msg='Unchanged link: {} -> {}'.format(dst, src)) # we need to update the link, unfortunately, this is often not possible # atomically remote.unlink(dst) remote.symlink(src, dst) return Changed(msg='Changed link: {} -> {} (previously -> {})'.format( dst, src, rsrc)) remote.symlink(src, dst) return Changed(msg='Created link: {} -> {}'.format(dst, src))
def walk(top, topdown=True, onerror=None, followlinks=False): try: names = remote.listdir(top) except OSError as e: if onerror: onerror(e) return dirs, files = [], [] for name in names: fn = remote.path.join(top, name) st = remote.lstat(fn) if S_ISDIR(st.st_mode): dirs.append(name) else: files.append(name) if topdown: yield top, dirs, files for name in dirs: fn = remote.path.join(top, name) st = remote.lstat(fn) if followlinks or not S_ISLNK(st.st_mode): for rv in walk(fn, topdown, onerror, followlinks): yield rv if not topdown: yield top, dirs, files
def findall (dir = os.curdir): """Find all files under 'dir' and return the list of full filenames (relative to 'dir'). """ from stat import ST_MODE, S_ISREG, S_ISDIR, S_ISLNK list = [] stack = [dir] pop = stack.pop push = stack.append while stack: dir = pop() names = os.listdir(dir) for name in names: if dir != os.curdir: # avoid the dreaded "./" syndrome fullname = os.path.join(dir, name) else: fullname = name # Avoid excess stat calls -- just one will do, thank you! stat = os.stat(fullname) mode = stat[ST_MODE] if S_ISREG(mode): list.append(fullname) elif S_ISDIR(mode) and not S_ISLNK(mode): push(fullname) return list
def _expand_remote_dest(local_path, remote_path): if remote_path is None: if local_path is None: raise RuntimeError('one of local_path, remote_path is required') remote_path = local_path st = remote.lstat(remote_path) if st: # file exists, check if it is a link if S_ISLNK(st.st_mode): # normalize (dangling links will raise an exception) remote_path = remote.normalize(remote_path) # update stat st = remote.lstat(remote_path) # dir-expansion, since st is guaranteed not be a link if st and S_ISDIR(st.st_mode): if local_path is None: raise RemoteFailureError( 'Is a directory: {}'.format(remote_path)) # if it's a directory, correct path remote_path = remote.path.join(remote_path, remote.path.basename(local_path)) st = remote.lstat(remote_path) log.debug('Expanded remote_path to {!r}'.format(remote_path)) # ensure st is either non-existant, or a regular file if st and not S_ISREG(st.st_mode): raise RemoteFailureError( 'Not a regular file: {!r}'.format(remote_path)) return st, remote_path
def list_dir(self, dirname: str) -> None: self.log.info("Contents of %s", dirname) cur_year = localtime().tm_year results = [] for filename in listdir(dirname): s = lstat(f"{dirname}/{filename}") if S_ISLNK(s.st_mode): target = readlink(f"{dirname}/{filename}") filename = f"{filename} -> {target}" mod_time = localtime(s.st_mtime) if mod_time.tm_year == cur_year: mod_time_str = strftime("%b %d %H:%M", mod_time) else: mod_time_str = strftime("%b %d %Y", mod_time) results.append( (filemode(s.st_mode), str(s.st_nlink), uid_to_username(s.st_uid), gid_to_groupname(s.st_gid), mod_time_str, filename)) col_size = [max([len(el[i]) for el in results]) for i in range(6)] for line in results: self.log.info("%*s %*s %*s %*s %*s %s", col_size[0], line[0], col_size[1], line[1], col_size[2], line[2], col_size[3], line[3], col_size[4], line[4], line[5])
def augment_item_meta(repo, item, include_size=False): """Ensure item has a Metadata instance for item.meta. If item.meta is currently a mode, replace it with a compatible "fake" Metadata instance. If include_size is true, ensure item.meta.size is correct, computing it if needed. If item.meta is a Metadata instance, this call may modify it in place or replace it. """ # If we actually had parallelism, we'd need locking... assert repo m = item.meta if isinstance(m, Metadata): if include_size and m.size is None: m.size = _compute_item_size(repo, item) return item._replace(meta=m) return item # m is mode meta = Metadata() meta.mode = m meta.uid = meta.gid = None meta.atime = meta.mtime = meta.ctime = 0 if S_ISLNK(m): if isinstance(item, FakeLink): target = item.target else: target = _readlink(repo, item.oid) meta.symlink_target = target meta.size = len(target) elif include_size: meta.size = _compute_item_size(repo, item) return item._replace(meta=meta)
def dup_ovfs_object(inode_map, source_obj, target_dir): """Given a relative pathname to copy, and a new target root, duplicate the source object in the target root, using hardlinks for regular files.""" source_full_path = os.path.join(OVERLAY_MERGED, source_obj) st = os.lstat(source_full_path) target_full_path = os.path.join(target_dir, source_obj) if S_ISREG(st[ST_MODE]): # Hardlink all regular files; ownership and permissions are shared. link(inode_map[st[ST_INO]], target_full_path) else: # Recreate all directories and symlinks; copy ownership and permissions. if S_ISDIR(st[ST_MODE]): os.mkdir(os.path.join(FINALIZED, source_obj), S_IMODE(st[ST_MODE])) elif S_ISLNK(st[ST_MODE]): os.symlink(os.readlink(source_full_path), target_full_path) os.chmod(target_full_path, S_IMODE(st[ST_MODE]), follow_symlinks=False) else: # Ran into a FIFO, socket, etc. Should not happen in OP install dir. # Ignore without copying for the time being; revisit later if needed. cloudlog.error("can't copy this file type: %s" % source_full_path) os.chown(target_full_path, st[ST_UID], st[ST_GID], follow_symlinks=False) # Sync target mtimes to the cached lstat() value from each source object. # Restores shared inode mtimes after linking, fixes symlinks and dirs. os.utime(target_full_path, (st[ST_ATIME], st[ST_MTIME]), follow_symlinks=False)
def walk(self, top: "_SPATH", topdown: bool = True, onerror=None, followlinks: bool = False) -> "_WALK": remote_path = self.c._path2str(top) files = [] folders = [] for f in self.c.sftp.listdir_attr(remote_path): try: mode = f.st_mode if S_ISLNK(mode) and followlinks: mode = self.c.os.stat(os.path.join(remote_path, f.filename)).st_mode if S_ISDIR(mode): folders.append(f.filename) else: files.append(f.filename) except OSError as e: if onerror is not None: onerror(e) # TODO join might be wrong for some host systems if topdown: yield remote_path, folders, files sub_folders = [os.path.join(remote_path, f) for f in folders] else: sub_folders = [os.path.join(remote_path, f) for f in folders] yield remote_path, folders, files for sub_folder in sub_folders: for x in self.walk(sub_folder, topdown, onerror, followlinks): yield x
def walk_files(self, directory): from stat import S_ISLNK, S_ISDIR, S_ISREG self._sftp_connect() try: dir_entries = self._sftp.listdir_attr(directory) except IOError as exc: raise DvcException( "couldn't get the '{}' remote directory files list".format( directory), cause=exc, ) for entry in dir_entries: path = posixpath.join(directory, entry.filename) if S_ISLNK(entry.st_mode): path = self._sftp.readlink(directory) entry = self._sftp.stat(path) if S_ISDIR(entry.st_mode): for inner_path in self.walk_files(path): yield inner_path elif S_ISREG(entry.st_mode): yield path
def _default_mode_for_gitmode(gitmode): if S_ISREG(gitmode): return default_file_mode if S_ISDIR(gitmode): return default_dir_mode if S_ISLNK(gitmode): return default_symlink_mode raise Exception('unexpected git mode ' + oct(gitmode))
def _compute_item_size(repo, item): mode = item_mode(item) if S_ISREG(mode): size = _normal_or_chunked_file_size(repo, item.oid) return size if S_ISLNK(mode): return len(_readlink(repo, item.oid)) return 0
def islink(self): st = self.statinfo if not st: self.restat(False) st = self.statinfo if not st: return False return S_ISLNK(st[ST_MODE])
def __eq__(self, other): if isinstance(other, stat_result): return (self.size == other.st_size and self.mtime == other.st_mtime and ((self.type == HashEntryType.TYPE_FILE and S_ISREG(other.st_mode)) or (self.type == HashEntryType.TYPE_SYMLINK and S_ISLNK(other.st_mode)))) return super().__eq__(other)
def islink(self, path): from stat import S_ISLNK self._sftp_connect() try: return S_ISLNK(self._sftp.stat(path).st_mode) except IOError: return False
def stat_mode_to_index_mode(mode): """Convert the given mode from a stat call to the corresponding index mode and return it""" if S_ISLNK(mode): # symlinks return S_IFLNK if S_ISDIR(mode) or S_IFMT(mode) == S_IFGITLINK: # submodules return S_IFGITLINK return S_IFREG | 0o644 | (mode & 0o111) # blobs with or without executable bit
def resolve(repo, path, parent=None, want_meta=True, follow=True): """Follow the path in the virtual filesystem and return a tuple representing the location, if any, denoted by the path. Each element in the result tuple will be (name, info), where info will be a VFS item that can be passed to functions like item_mode(). If follow is false, and if the final path element is a symbolic link, don't follow it, just return it in the result. If a path segment that does not exist is encountered during resolution, the result will represent the location of the missing item, and that item in the result will be None. Any attempt to traverse a non-directory will raise a VFS ENOTDIR IOError exception. Any symlinks along the path, including at the end, will be resolved. A VFS IOError with the errno attribute set to ELOOP will be raised if too many symlinks are traversed while following the path. That exception is effectively like a normal ELOOP IOError exception, but will include a terminus element describing the location of the failure, which will be a tuple of (name, info) elements. The parent, if specified, must be a sequence of (name, item) tuples, and will provide the starting point for the resolution of the path. If no parent is specified, resolution will start at '/'. The result may include elements of parent directly, so they must not be modified later. If this is a concern, pass in "name, copy_item(item) for name, item in parent" instead. When want_meta is true, detailed metadata will be included in each result item if it's avaiable, otherwise item.meta will be an integer mode. The metadata size may or may not be provided, but can be computed by item_size() or augment_item_meta(..., include_size=True). Setting want_meta=False is rarely desirable since it can limit the VFS to just the metadata git itself can represent, and so, as an example, fifos and sockets will appear to be regular files (e.g. S_ISREG(item_mode(item)) will be true) . But the option is provided because it may be more efficient when only the path names or the more limited metadata is sufficient. Do not modify any item.meta Metadata instances directly. If needed, make a copy via item.meta.copy() and modify that instead. """ if repo.is_remote(): # Redirect to the more efficient remote version return repo.resolve(path, parent=parent, want_meta=want_meta, follow=follow) result = _resolve_path(repo, path, parent=parent, want_meta=want_meta, follow=follow) _, leaf_item = result[-1] if leaf_item and follow: assert not S_ISLNK(item_mode(leaf_item)) return result
def analyze(src, length=io.DEFAULT_BUFFER_SIZE): md5 = hashlib.md5() src = os.path.abspath(src) try: mode = os.stat(src).st_mode if S_ISREG(mode): upsert_file_metadata(src, stat_types['REGULAR'], size=os.path.getsize(src), extension=os.path.splitext(src)[1]) elif S_ISDIR(mode): upsert_file_metadata(src, stat_types['DIRECTORY']) elif S_ISCHR(mode): upsert_file_metadata(src, stat_types['CHAR']) elif S_ISBLK(mode): upsert_file_metadata(src, stat_types['BLOCK']) elif S_ISFIFO(mode): upsert_file_metadata(src, stat_types['FIFO']) elif S_ISLNK(mode): upsert_file_metadata(src, stat_types['SYMLINK']) elif S_ISSOCK(mode): upsert_file_metadata(src, stat_types['SOCKET']) else: upsert_file_metadata(src, stat_types['UNKNOWN']) except FileNotFoundError: mode = os.stat(src, follow_symlinks=False).st_mode if S_ISLNK(mode): upsert_file_metadata(src, stat_types['BROKEN_SYMLINK']) # Just return the MD5 hash of an empty string for non-regular files if not S_ISREG(mode): return md5 try: upsert_file_metadata(src, mime_type=(magic.from_file(src, mime=True)), mime_detail=magic.from_file(src)) with io.open(src, mode="rb") as fd: for chunk in iter(lambda: fd.read(length), b''): md5.update(chunk) except OSError: upsert_file_metadata(src, stat_types['ERROR']) pass return md5
def gen_obj(path, stat=None, chksum_handlers=None, real_location=None, stat_func=os.lstat, **overrides): """ given a fs path, and an optional stat, create an appropriate fs obj. :param stat: stat object to reuse if available :param real_location: real path to the object if path is the desired location, rather then existent location. :raise KeyError: if no obj type matches the stat checks :return: :obj:`pkgcore.fs.fs.fsBase` derivative """ if real_location is None: real_location = path if stat is None: try: stat = stat_func(real_location) except EnvironmentError as e: if stat_func == os.lstat or e.errno != errno.ENOENT: raise stat = os.lstat(real_location) mode = stat.st_mode d = { "mtime": stat.st_mtime, "mode": S_IMODE(mode), "uid": stat.st_uid, "gid": stat.st_gid } if S_ISREG(mode): d["size"] = stat.st_size d["data"] = local_source(real_location) d["dev"] = stat.st_dev d["inode"] = stat.st_ino if chksum_handlers is not None: d["chf_types"] = chksum_handlers d.update(overrides) return fsFile(path, **d) d.update(overrides) if S_ISDIR(mode): return fsDir(path, **d) elif S_ISLNK(mode): d["target"] = os.readlink(real_location) return fsSymlink(path, **d) elif S_ISFIFO(mode): return fsFifo(path, **d) else: major, minor = get_major_minor(stat) d["minor"] = minor d["major"] = major d["mode"] = mode return fsDev(path, **d)
def recursive_remove(self, host, remotedir): for attr in host.sftp.listdir_attr(remotedir): if S_ISDIR(attr.st_mode): self.recursive_remove(host, path.join(remotedir, attr.filename)) elif S_ISREG(attr.st_mode) or S_ISLNK(attr.st_mode): host.sftp.remove(path.join(remotedir, attr.filename)) else: raise IOError("Cannot remove remote file:" + str(attr)) host.sftp.rmdir(remotedir)
def is_symlink(self): """ Return True if this entry is a symbolic link The result is cached on the os.DirEntry object. Returns: bool: True if symbolic link. """ return bool(S_ISLNK(self.stat().st_mode))
def symlink_exists(p): """Check if symlink exists and raise if another type of file exists""" try: st = os.lstat(p) if not S_ISLNK(st.st_mode): raise Exception('Path exists and is not a symlink: %s' % p) return True except OSError, e: if e.errno != errno.ENOENT: raise
def readlink(repo, item): """Return the link target of item, which must be a symlink. Reads the target from the repository if necessary.""" assert repo assert S_ISLNK(item_mode(item)) if isinstance(item.meta, Metadata): target = item.meta.symlink_target if target: return target return _readlink(repo, item.oid)
def _compute_item_size(repo, item): mode = item_mode(item) if S_ISREG(mode): size = _normal_or_chunked_file_size(repo, item.oid) return size if S_ISLNK(mode): if isinstance(item, FakeLink): return len(item.target) return len(_readlink(repo, item.oid)) return 0