def __init__(self, entry: DirEntry): self.name = entry.name.decode() self.path = entry.path.decode() self.rel_path = relpath(self.path, models_dir) self.is_dir = entry.is_dir() # save created_time and modified_time as date objects to provide # better date comparison self.created_time = datetime.fromtimestamp(entry.stat().st_ctime) self.modified_time = datetime.fromtimestamp(entry.stat().st_mtime) self.size = self._human_readable_size(self._get_size(entry.path))
def _insert_sorted(self, item: os.DirEntry, sort_by: SortBy) -> None: """Insert every scanned item into the local `_items` list on-the-fly by the given `sort_by` parameter. :param item: DirEntry object from `_iter_items()` async iteration within the async parallel scanning. :type item: posix.DirEntry :param sort_by: SortBy enum attribute :type sort_by: SortBy :rtype: None """ attrs = self._get_attributes(item) # It is an empty folder, grab folder timestamps if attrs["atime"] == 0 and attrs["mtime"] == 0 and attrs["ctime"] == 0: stat = item.stat(follow_symlinks=False) attrs["atime"] = int(stat.st_atime) attrs["mtime"] = int(stat.st_mtime) attrs["ctime"] = int(stat.st_ctime) summary = { "name": os.path.relpath(item.path, self._root), "size": attrs["size"], "depth": attrs["depth"], "num_of_files": attrs["num_of_files"], "atime": attrs["atime"], "mtime": attrs["mtime"], "ctime": attrs["ctime"], } index = self._find_index(summary, sort_by) self._total_size += summary["size"] self._items_len += 1 self._items.insert(index, summary)
def should_copy_file_DirEntry(self, src: os.DirEntry, dst: Path): retVal = True if not self.top_destination_does_not_exist: try: dst_stats = dst.stat() src_stats = src.stat() if src_stats.st_ino == 0: # on windows os.DirEntry.stat sets st_ino to zero and os.stat should be called # see https://docs.python.org/3.6/library/os.html#os.DirEntry src_stats = os.stat(src.path, follow_symlinks=False) if src_stats.st_ino == dst_stats.st_ino: retVal = False log.debug( f"{self.progress_msg()} skip copy file, same inode '{src.path}' to '{dst}'" ) elif src_stats.st_size == dst_stats.st_size and src_stats.st_mtime == dst_stats.st_mtime: retVal = False log.debug( f"{self.progress_msg()} skip copy file, same time and size '{src.path}' to '{dst}'" ) if retVal: # destination exists and file should be copied, so make sure it's writable with Chmod(dst, "a+rw", own_progress_count=0) as mod_changer: mod_changer() if self.should_no_flags_file(dst): with ChFlags(dst, "nohidden", "nosystem", "unlocked", ignore_all_errors=True, own_progress_count=0) as flags_changer: flags_changer() except Exception as ex: # most likely dst.stat() failed because dst does not exist retVal = True return retVal
def _dirEntryToFileInfo(dirEntry: os.DirEntry, path: str, realpath: str): try: linkname = os.readlink(realpath) if dirEntry.is_symlink() else "" except OSError: linkname = "" return FolderMountSource._statsToFileInfo( dirEntry.stat(follow_symlinks=False), linkname, path)
def from_dir_entry(self, dir_entry: os.DirEntry, file_hash: str or None = None, dont_hash: bool = False) -> None: if not isinstance(dir_entry, os.DirEntry): raise ValueError( 'Unsupported data type {type}'.format(type=type(dir_entry))) self.hash = file_hash self.name = dir_entry.name self.path = dir_entry.path self.size = dir_entry.stat().st_size self.ctime = self._strip_millis(dir_entry.stat().st_ctime) self.mtime = self._strip_millis(dir_entry.stat().st_mtime) self.inode = dir_entry.stat().st_ino if not dont_hash: self.calculate_hash()
def print_dir_entry(entry: DirEntry, *, path: bool = False, path_field_size=70): stat = entry.stat() kind = 'DIR' if entry.is_dir() else 'FILE' data = entry.path if path else entry.name print( f"{data:<{path_field_size}s} {kind:<4s} {str(stat.st_size) if entry.is_file() else '':>8s}" )
def get_file_info(entry: DirEntry): os.stat(entry.path) stat = entry.stat() return { 'size': stat[6], 'lastAccessed': datetime.fromtimestamp(stat[7]).strftime("%A, %B %d, %Y %I:%M:%S"), 'lastModified': datetime.fromtimestamp(stat[8]).strftime("%A, %B %d, %Y %I:%M:%S"), }
def from_dir_entry(cls, dir: "File", entry: os.DirEntry) -> "File": try: st = entry.stat() except FileNotFoundError: log.warning("%s: cannot stat() file: broken symlink?", os.path.join(dir.abspath, entry.name)) st = None return cls(relpath=os.path.join(dir.relpath, entry.name), abspath=os.path.join(dir.abspath, entry.name), stat=st)
def _get_attributes(self, item: os.DirEntry) -> dict: """Parse entire item and subdirectories. Returns: * Total size in bytes * Maximum folder depth of item * Total number of files this item contains * Access timestamp * Modification timestamp * Change timestamp in the same order as tuple. :param item: DirEntry object :type item: posix.DirEntry :return: Dictionary of {size, depth, num_of_files, atime, mtime, ctime} :rtype: dict """ # it's a file or symlink, size is already on item stat if not item.is_dir(follow_symlinks=False): stat = item.stat(follow_symlinks=False) return { "size": stat.st_size, "depth": self._get_depth(item.path) - self._level, "num_of_files": 1, "atime": int(stat.st_atime), "mtime": int(stat.st_mtime), "ctime": int(stat.st_ctime), } # It is a folder, recursive size check else: total_size = num_of_files = depth = 0 atime = mtime = ctime = 0 # TODO: try/except catch PermissionError with os.scandir(item.path) as directory: for i in directory: attrs = self._get_attributes(i) total_size += attrs["size"] num_of_files += attrs["num_of_files"] atime = max(atime, attrs["atime"]) mtime = max(mtime, attrs["mtime"]) ctime = max(ctime, attrs["ctime"]) depth = max(depth, attrs["depth"]) return { "size": total_size, "depth": depth, "num_of_files": num_of_files, "atime": atime, "mtime": mtime, "ctime": ctime, }
def os_dir_entry_to_directory_list_entry( virtual_path: str, dir_entry: os.DirEntry) -> DirectoryListEntry: """Convert an `os.DirEntry` instance to a `DirectoryListEntry`.""" s: os.stat_result = dir_entry.stat() return DirectoryListEntry( dir_entry.name.decode("utf8"), utf8_path_join(virtual_path, dir_entry.name), DirectoryEntryType.DIRECTORY if dir_entry.is_dir() else DirectoryEntryType.FILE, DiskSource(), datetime.datetime.fromtimestamp(s.st_mtime), )
def __init__(self, dir_entry: os.DirEntry): self.file_path = dir_entry.path self.file_name = dir_entry.name self.is_dir = self.file_name in [ARTICLE_FILENAME, SERIES_FILENAME] self.dir_path = os.path.dirname(self.file_path) \ if self.is_dir else None self.dir_name = self.dir_path.rsplit(os.path.sep, 1)[1] \ if self.is_dir else None self.last_updated = timestamp_to_datetime(dir_entry.stat().st_mtime, tzlocal.get_localzone()) with open(self.file_path) as f: data = frontmatter.load(f) self.frontmatter = data.metadata self.markdown = data.content
def copy_file_to_file_DirEntry(self, src: os.DirEntry, dst: Path, follow_symlinks=True): """ copy the file src to the file dst. dst should either be an existing file or not exists at all - i.e. dst cannot be a folder. The parent folder of dst is assumed to exist. src is assumed to be of type os.DirEntry """ self.last_src, self.last_dst = os.fspath(src), os.fspath(dst) self.doing = f"""copy file '{self.last_src}' to '{self.last_dst}'""" if self.should_copy_file_DirEntry(src, dst): try: if not self.should_hard_link_file_DirEntry(src): log.debug( f"copy file '{self.last_src}' to '{self.last_dst}'") if not self.dry_run: _fast_copy_file(src, dst) shutil.copystat(src, dst, follow_symlinks=follow_symlinks) else: # try to create hard link try: self.dry_run or os.link(src, dst) log.debug( f"hard link file '{self.last_src}' to '{self.last_dst}'" ) self.statistics['hard_links'] += 1 except OSError as ose: self.hard_links_failed = True log.debug( f"copy file '{self.last_src}' to '{self.last_dst}'" ) if not self.dry_run: _fast_copy_file(src, dst) shutil.copystat(src, dst, follow_symlinks=follow_symlinks) if self.copy_owner and self.has_chown: src_st = src.stat() # ! os.chown(dst, src_st[stat.ST_UID], src_st[stat.ST_GID]) except Exception as ex: self.who_locks_file_error_dict(_fast_copy_file, self.last_dst) raise else: self.statistics['skipped_files'] += 1 return dst
def digestEntry(self, entry:os.DirEntry ): _,ext = os.path.splitext(entry.name) if not ext in self.extDicts: self.extDicts[ext] = { "num":0, "bytes":0,"maxbytes":0,"maxname":"" } exd = self.extDicts[ext] exd["num"] += 1 esize = entry.stat().st_size exd["bytes"] += esize if esize>exd["maxbytes"]: exd["maxbytes"] = esize exd["maxname"] = entry.path if esize>10e6: self.bigFileList.append(entry) emb = "%.3f" % round(esize/1e6,3) lgg.info(f" big file: {emb} mb - {entry.path}")
def validate_file(self, dir_entry: os.DirEntry) -> bool: """Validates given DirEntry. Returns False if entry should be completely ignored, or True if we want to keep it for further processing. Ignore all zero length files. There are usually there for a purpose like .dummy etc, so there can be tons of it with the same name even, so by default, ignore them completely. Also ignore all symlinks.""" from .log import Log if dir_entry.is_symlink(): Log.vv('{name}: It is the symbolic link. Skipping.'.format( name=dir_entry.name)) return False # NOTE: do not call is_file() on DirEntry. It will fail in endless # recursion for invalid (dead) symbolic links. os.path.isfile() works). if not dir_entry.is_file(): Log.vv('{name}: This is not a file. Skipping.'.format( name=dir_entry.name)) return False item_size = dir_entry.stat().st_size if item_size == 0: Log.vv('{name}: File is 0 bytes long. Skipping.'.format( name=dir_entry.name)) return False if self.min_size > 0 and item_size < self.min_size: Log.vv('{name}: File is shorter than min size ({size}). Skipping.'. format(name=dir_entry.name, size=item_size)) return False if 0 < self.max_size < item_size: Log.vv('{name}: File is biger than max size ({size}). Skipping.'. format(name=dir_entry.name, size=item_size)) return False for list_item in self._file_name_blacklist: match = re.match(list_item, dir_entry.name) if match is not None: Log.vv('File "{name}" blacklisted by "{re}" rule. Skipping.'. format(name=dir_entry.name, re=list_item)) return False return True
def __init__(self, dir_entry: os.DirEntry, category_manager: CategoryManager = injectable, tag_manager: TagManager = injectable): self.category_manager = category_manager self.tag_manager = tag_manager self.file_path = dir_entry.path self.file_name = dir_entry.name self.is_dir = self.file_name in [Config.BLOG_ARTICLE_FILENAME, Config.BLOG_SERIES_FILENAME] self.dir_path = os.path.dirname(self.file_path) \ if self.is_dir else None self.dir_name = self.dir_path.rsplit(os.path.sep, 1)[1] \ if self.is_dir else None self.last_updated = timestamp_to_datetime(dir_entry.stat().st_mtime, tzlocal.get_localzone()) with open(self.file_path) as f: data = frontmatter.load(f) self.frontmatter = data.metadata self.markdown = data.content
def add(self, entry: os.DirEntry, local_zip_path: typing.Optional[str] = None) -> None: stats = entry.stat(follow_symlinks=False) handle = self._get_handle() self._iteration_total_bytes += stats.st_size handle.write(entry.path, local_zip_path if local_zip_path else None)
def create_time(f: os.DirEntry) -> float: return f.stat().st_ctime
def from_dir_entry(cls, dir: "File", entry: os.DirEntry) -> "File": return cls(relpath=os.path.join(dir.relpath, entry.name), abspath=os.path.join(dir.abspath, entry.name), stat=entry.stat())
def _get_timestamp(entry: os.DirEntry) -> datetime: return datetime.fromtimestamp(entry.stat().st_mtime)
def get_file_date(entry: os.DirEntry) -> datetime.date: return datetime.fromtimestamp(entry.stat().st_mtime).date()
def hardlink_identical_files(*, dir_entry: os.DirEntry, args: argparse.Namespace) -> None: """hardlink identical files The purpose of this function is to hardlink files together if the files are the same. To be considered the same they must be equal in the following criteria: * file size * file contents * file mode (default) * owner user id (default) * owner group id (default) * modified time (default) Also, files will only be hardlinked if they are on the same device. This is because hardlink does not allow you to hardlink across file systems. The basic idea on how this is done is as follows: Walk the directory tree building up a list of the files. For each file, generate a simple hash based on the size and modified time. For any other files which share this hash make sure that they are not identical to this file. If they are identical then hardlink the files. Add the file info to the list of files that have the same hash value. """ for exclude in args.excludes: if re.search(exclude, dir_entry.path): return stat_info = dir_entry.stat(follow_symlinks=False) # Is it a regular file? if stat.S_ISREG(stat_info.st_mode): # Create the hash for the file. file_hash = hash_value( size=stat_info.st_size, time=stat_info.st_mtime, notimestamp=(args.notimestamp or args.content_only), ) # Bump statistics count of regular files found. gStats.found_regular_file() if args.verbose >= 2: print(f"File: {dir_entry.path}") work_file_info = (dir_entry.path, stat_info) work_file_info = FileInfo(filename=dir_entry.path, stat_info=stat_info) if file_hash in file_hashes: # We have file(s) that have the same hash as our current file. # Let's go through the list of files with the same hash and see if # we are already hardlinked to any of them. for temp_file_info in file_hashes[file_hash]: if is_already_hardlinked(st1=stat_info, st2=temp_file_info.stat_info): gStats.found_hardlink( temp_file_info.filename, dir_entry.path, temp_file_info.stat_info, ) break else: # We did not find this file as hardlinked to any other file # yet. So now lets see if our file should be hardlinked to any # of the other files with the same hash. for temp_file_info in file_hashes[file_hash]: if are_files_hardlinkable( file_info_1=work_file_info, # file_info_2=(temp_filename, temp_stat_info), file_info_2=temp_file_info, args=args, ): hardlink_files( sourcefile=temp_file_info.filename, destfile=dir_entry.path, stat_info=temp_file_info.stat_info, args=args, ) break else: # The file should NOT be hardlinked to any of the other # files with the same hash. So we will add it to the list # of files. file_hashes[file_hash].append(work_file_info) else: # There weren't any other files with the same hash value so we will # create a new entry and store our file. file_hashes[file_hash] = [work_file_info]