def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None: """Process a JSON file during the main walk.""" global _sys_event_tables def is_leaf_dir(path: str) -> bool: for item in os.scandir(path): if item.is_dir(): return False return True # model directory, reset topic if item.is_dir() and is_leaf_dir(item.path): print_events_table_suffix() tblname = file_name_to_table_name(parents, item.name) if item.name == 'sys': _sys_event_tables.append(tblname) print_events_table_prefix(tblname) return # base dir or too deep level = len(parents) if level == 0 or level > 4: return # Ignore other directories. If the file name does not have a .json # extension, ignore it. It could be a readme.txt for instance. if not item.is_file() or not item.name.endswith('.json'): return add_events_table_entries(item, get_topic(item.name))
def _dirEntryToFileInfo(dirEntry: os.DirEntry, path: str, realpath: str): try: linkname = os.readlink(realpath) if dirEntry.is_symlink() else "" except OSError: linkname = "" return FolderMountSource._statsToFileInfo( dirEntry.stat(follow_symlinks=False), linkname, path)
def print_dir_entry(entry: DirEntry, *, path: bool = False, path_field_size=70): stat = entry.stat() kind = 'DIR' if entry.is_dir() else 'FILE' data = entry.path if path else entry.name print( f"{data:<{path_field_size}s} {kind:<4s} {str(stat.st_size) if entry.is_file() else '':>8s}" )
def button_made_from(entry: os.DirEntry) -> Gtk.Button: if entry.is_file(): return Copy(Snippet().load(entry.path)) if entry.is_dir(): return GoTo( name=entry.name, position=os.path.dirname(entry.path), destination=entry.path, )
def _get_attributes(self, item: os.DirEntry) -> dict: """Parse entire item and subdirectories. Returns: * Total size in bytes * Maximum folder depth of item * Total number of files this item contains * Access timestamp * Modification timestamp * Change timestamp in the same order as tuple. :param item: DirEntry object :type item: posix.DirEntry :return: Dictionary of {size, depth, num_of_files, atime, mtime, ctime} :rtype: dict """ # it's a file or symlink, size is already on item stat if not item.is_dir(follow_symlinks=False): stat = item.stat(follow_symlinks=False) return { "size": stat.st_size, "depth": self._get_depth(item.path) - self._level, "num_of_files": 1, "atime": int(stat.st_atime), "mtime": int(stat.st_mtime), "ctime": int(stat.st_ctime), } # It is a folder, recursive size check else: total_size = num_of_files = depth = 0 atime = mtime = ctime = 0 # TODO: try/except catch PermissionError with os.scandir(item.path) as directory: for i in directory: attrs = self._get_attributes(i) total_size += attrs["size"] num_of_files += attrs["num_of_files"] atime = max(atime, attrs["atime"]) mtime = max(mtime, attrs["mtime"]) ctime = max(ctime, attrs["ctime"]) depth = max(depth, attrs["depth"]) return { "size": total_size, "depth": depth, "num_of_files": num_of_files, "atime": atime, "mtime": mtime, "ctime": ctime, }
def _get_entry_attributes(entry: os.DirEntry): attrs = FileAttributes.NONE if entry.is_dir(): attrs |= FileAttributes.IS_DIR elif entry.is_file(): attrs |= FileAttributes.IS_FILE if entry.is_symlink(): attrs |= FileAttributes.IS_LINK if entry.name.startswith('.'): attrs |= FileAttributes.IS_HIDDEN return attrs
def os_dir_entry_to_directory_list_entry( virtual_path: str, dir_entry: os.DirEntry) -> DirectoryListEntry: """Convert an `os.DirEntry` instance to a `DirectoryListEntry`.""" s: os.stat_result = dir_entry.stat() return DirectoryListEntry( dir_entry.name.decode("utf8"), utf8_path_join(virtual_path, dir_entry.name), DirectoryEntryType.DIRECTORY if dir_entry.is_dir() else DirectoryEntryType.FILE, DiskSource(), datetime.datetime.fromtimestamp(s.st_mtime), )
def __init__(self, entry: DirEntry): self.name = entry.name.decode() self.path = entry.path.decode() self.rel_path = relpath(self.path, models_dir) self.is_dir = entry.is_dir() # save created_time and modified_time as date objects to provide # better date comparison self.created_time = datetime.fromtimestamp(entry.stat().st_ctime) self.modified_time = datetime.fromtimestamp(entry.stat().st_mtime) self.size = self._human_readable_size(self._get_size(entry.path))
def create_link(entry: os.DirEntry, dotconfig: bool): src = entry.path if dotconfig: dst = os.path.join(DOT_CONFIG, entry.name) else: dst = os.path.join(HOME, entry.name) # If dst is a symlink if os.path.islink(dst): # If dst links to src, all is good if Path(os.readlink(dst)) == Path(src): logger.debug(f"{dst} already linked correctly, skipping ...") return elif ARGS.remove_symlinks: # Remove symlinks if arg is set logger.info(f"Removing {dst}") os.remove(dst) elif os.path.exists(dst): # Not a symlink, but file exists logger.warning(f"Destination: {dst} already exists (dir)") logger.warning(f"Moving {dst} to {dst}.backup") shutil.move(dst, dst + ".backup") # No case catched -> create symlink os.symlink(src, dst, target_is_directory=entry.is_dir()) logger.info(f"{src} -> {dst}")
def _is_restaurant_module(entry: DirEntry) -> bool: """Checks if entry is a Python file, ignores if name starts with underscore.""" return all([ entry.is_file(), entry.name.endswith('.py'), # type: ignore not entry.name.startswith('_') # type: ignore ])
def isToBeIgnored(self,entry:os.DirEntry): if not entry.is_dir(): return False if entry.name in self.ignoreDirList: lgg.info(f" Ignoring {entry.name}",lgg.cR) return True return False
def _insert_sorted(self, item: os.DirEntry, sort_by: SortBy) -> None: """Insert every scanned item into the local `_items` list on-the-fly by the given `sort_by` parameter. :param item: DirEntry object from `_iter_items()` async iteration within the async parallel scanning. :type item: posix.DirEntry :param sort_by: SortBy enum attribute :type sort_by: SortBy :rtype: None """ attrs = self._get_attributes(item) # It is an empty folder, grab folder timestamps if attrs["atime"] == 0 and attrs["mtime"] == 0 and attrs["ctime"] == 0: stat = item.stat(follow_symlinks=False) attrs["atime"] = int(stat.st_atime) attrs["mtime"] = int(stat.st_mtime) attrs["ctime"] = int(stat.st_ctime) summary = { "name": os.path.relpath(item.path, self._root), "size": attrs["size"], "depth": attrs["depth"], "num_of_files": attrs["num_of_files"], "atime": attrs["atime"], "mtime": attrs["mtime"], "ctime": attrs["ctime"], } index = self._find_index(summary, sort_by) self._total_size += summary["size"] self._items_len += 1 self._items.insert(index, summary)
def is_game_dir(self, entry: os.DirEntry, ignore_dirs: Optional[List[str]] = None) -> bool: if ignore_dirs is None: ignore_dirs = self.ignore_dirs return (entry.is_dir() and self._is_game_match(entry.name) and entry.name.lower() not in ignore_dirs)
def should_copy_file_DirEntry(self, src: os.DirEntry, dst: Path): retVal = True if not self.top_destination_does_not_exist: try: dst_stats = dst.stat() src_stats = src.stat() if src_stats.st_ino == 0: # on windows os.DirEntry.stat sets st_ino to zero and os.stat should be called # see https://docs.python.org/3.6/library/os.html#os.DirEntry src_stats = os.stat(src.path, follow_symlinks=False) if src_stats.st_ino == dst_stats.st_ino: retVal = False log.debug( f"{self.progress_msg()} skip copy file, same inode '{src.path}' to '{dst}'" ) elif src_stats.st_size == dst_stats.st_size and src_stats.st_mtime == dst_stats.st_mtime: retVal = False log.debug( f"{self.progress_msg()} skip copy file, same time and size '{src.path}' to '{dst}'" ) if retVal: # destination exists and file should be copied, so make sure it's writable with Chmod(dst, "a+rw", own_progress_count=0) as mod_changer: mod_changer() if self.should_no_flags_file(dst): with ChFlags(dst, "nohidden", "nosystem", "unlocked", ignore_all_errors=True, own_progress_count=0) as flags_changer: flags_changer() except Exception as ex: # most likely dst.stat() failed because dst does not exist retVal = True return retVal
def validate_file(self, dir_entry: os.DirEntry) -> bool: """Validates given DirEntry. Returns False if entry should be completely ignored, or True if we want to keep it for further processing. Ignore all zero length files. There are usually there for a purpose like .dummy etc, so there can be tons of it with the same name even, so by default, ignore them completely. Also ignore all symlinks.""" from .log import Log if dir_entry.is_symlink(): Log.vv('{name}: It is the symbolic link. Skipping.'.format( name=dir_entry.name)) return False # NOTE: do not call is_file() on DirEntry. It will fail in endless # recursion for invalid (dead) symbolic links. os.path.isfile() works). if not dir_entry.is_file(): Log.vv('{name}: This is not a file. Skipping.'.format( name=dir_entry.name)) return False item_size = dir_entry.stat().st_size if item_size == 0: Log.vv('{name}: File is 0 bytes long. Skipping.'.format( name=dir_entry.name)) return False if self.min_size > 0 and item_size < self.min_size: Log.vv('{name}: File is shorter than min size ({size}). Skipping.'. format(name=dir_entry.name, size=item_size)) return False if 0 < self.max_size < item_size: Log.vv('{name}: File is biger than max size ({size}). Skipping.'. format(name=dir_entry.name, size=item_size)) return False for list_item in self._file_name_blacklist: match = re.match(list_item, dir_entry.name) if match is not None: Log.vv('File "{name}" blacklisted by "{re}" rule. Skipping.'. format(name=dir_entry.name, re=list_item)) return False return True
def filter_only_tif_files(item: os.DirEntry) -> bool: if not item.is_file(): return False _, ext = os.path.splitext(item.name) if ext.lower() != ".tif": return False return True
def _is_backup_entity(backup_entry: os.DirEntry) -> bool: """ Check if entity_path is a single backup dir. """ if not backup_entry.is_dir(): return False try: datetime.strptime(backup_entry.name, BACKUP_ENT_FMT) return True except ValueError: return False
def filter_alebmic_ini(entry: os.DirEntry) -> bool: if not entry.is_file(): return False if entry.name != "alembic.ini": return False return True
def from_dir_entry(self, dir_entry: os.DirEntry, file_hash: str or None = None, dont_hash: bool = False) -> None: if not isinstance(dir_entry, os.DirEntry): raise ValueError( 'Unsupported data type {type}'.format(type=type(dir_entry))) self.hash = file_hash self.name = dir_entry.name self.path = dir_entry.path self.size = dir_entry.stat().st_size self.ctime = self._strip_millis(dir_entry.stat().st_ctime) self.mtime = self._strip_millis(dir_entry.stat().st_mtime) self.inode = dir_entry.stat().st_ino if not dont_hash: self.calculate_hash()
def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None: if item.is_dir(): return # base dir or too deep level = len(parents) if level == 0 or level > 4: return # Ignore other directories. If the file name does not have a .json # extension, ignore it. It could be a readme.txt for instance. if not item.is_file() or not item.name.endswith('.json'): return topic = get_topic(item.name) for event in read_json_events(item.path, topic): _bcs.add(event.build_c_string())
def _component_filter(self, entry: os.DirEntry): if not entry.is_file(): return False base, ext = os.path.splitext(entry.name) if not self._component_mask.fullmatch(base): return False return True
def is_editor_file(entry: os.DirEntry) -> bool: FOLDER_PATTERNS = ('.idea', '.vscode') FILE_PATTERNS = (r'\.sw.$', r'~$') if entry.is_dir(): g = (_ == entry.name for _ in FOLDER_PATTERNS) else: g = (re.search(_, entry.name) for _ in FILE_PATTERNS) return any(g)
def filter_bib_id_folders(item: os.DirEntry): if not item.is_dir(): return False if "v" not in item.name and not isinstance(eval(item.name), int): return False return True
def __try_read_file(dir_entry: os.DirEntry, scan_dir: str, base_path: str): """Attempt to read a file. Internal. If the file given by ``dir_entry`` is a Maven config object, return a tuple of (True, JSON dict) representing that object. If the object could be read but wasn't a part, return (False, None). If the object couldn't be read, raise an exception. :param dir_entry: A DirEntry object, as returned by os.scandir :param scan_dir: The directory on which os.scandir was called. May be an empty string. :param base_path: The base path of the config, where "/" is the root of the config. """ name, ext = os.path.splitext(dir_entry.path) if dir_entry.is_dir() or ext != ".part": return (False, None) file_path = os.path.join(base_path, scan_dir, dir_entry.path) try: with open(file_path, 'r') as f: data = json.load(f) path = ConfigObjectTracker.__norm_path(file_path, base_path) # return a corrected dict obj = { # todo: Should we generate a uuid1 from the file's inode? # Technically IDs should be unique to a config and # immutable across a given object's lifetime, however # inodes are only unique to a block device and may change # with renames/moves. This means a config with a symlink to # a network drive or another disk might have duplicate # inodes, and with uuid1s, they will have duplicate IDs. "id": data["id"], "typeName": data["typeName"], # Because this is a directory store, we can safely ignore # whatever the file says is true, because the filesystem # is more accurate in these regards "lastModified": os.path.getmtime(file_path), "name": os.path.basename(name), "path": path, } if "data" in data: # this is a FileConfigObject, attach the data obj["data"] = data["data"] if "functionType" in data and "arguments" in data: # this is a UDF or UDP, attach those args obj["functionType"] = data["functionType"] obj["arguments"] = data["arguments"] return (True, obj) except (json.JSONDecodeError, KeyError): # Not a part, or a malformed part return (False, None) except UnicodeDecodeError as e: # Corrupted file, could not read warnings.warn("Failed to read file" + file_path) warnings.warn(e) return (False, None)
def get_file_info(entry: DirEntry): os.stat(entry.path) stat = entry.stat() return { 'size': stat[6], 'lastAccessed': datetime.fromtimestamp(stat[7]).strftime("%A, %B %d, %Y %I:%M:%S"), 'lastModified': datetime.fromtimestamp(stat[8]).strftime("%A, %B %d, %Y %I:%M:%S"), }
def _survives(self, base_path: str, entry: os.DirEntry) -> bool: """ Determines if a single file entry survives the ignore filter. """ for p in self._processed_patterns: if (entry.is_dir() and p.endswith("/") and fnmatch.fnmatch(entry.path, p[:-1])): return False if fnmatch.fnmatch(entry.path, p): return False return True
def from_dir_entry(cls, dir: "File", entry: os.DirEntry) -> "File": try: st = entry.stat() except FileNotFoundError: log.warning("%s: cannot stat() file: broken symlink?", os.path.join(dir.abspath, entry.name)) st = None return cls(relpath=os.path.join(dir.relpath, entry.name), abspath=os.path.join(dir.abspath, entry.name), stat=st)
def directory_only_filter(item: os.DirEntry): if not item.is_dir(): return False if not os.access(item.path, os.F_OK): return False if not os.access(item.path, os.R_OK): return False return True
def _process_file_or_dir(context: str, dir_entry: os.DirEntry) -> [[AssetFile]]: if dir_entry.is_file(): return AssetFile.build(dir_entry=dir_entry, context=context) else: new_context = os.path.join(context, dir_entry.name) return [ _process_file_or_dir(new_context, f) for f in os.scandir(dir_entry.path) if f.name[:1] != "." and f.name[:1] != "_" ]
def should_hard_link_file_DirEntry(self, a_file: os.DirEntry): assert isinstance(a_file, os.DirEntry) retVal = False if self.hard_links and not self.hard_links_failed and not a_file.is_symlink(): for no_hard_link_pattern in self.__all_no_hard_link_patterns: file_path = Path(a_file) # todo: avoid using Path.match, since converting DirEntry toPath is not efficient if file_path.match(no_hard_link_pattern): log.debug(f"not hard linking {a_file.path} because it matches pattern {no_hard_link_pattern}") break else: retVal = True return retVal