def iterfiles(self, include_pattern=None, abspath=False, force_refresh=False): """ Generator for all the files matching pattern and not already excluded. Uses cached file list if available. Args: pattern (str): Unix style (glob like/gitignore like) pattern abspath (bool): Whether to use absolute or relative (default) paths. force_refresh (bool): Whether to refresh from disk or use the cache. """ self.populate_dir(force_refresh) if include_pattern is not None: globster = Globster([include_pattern]) for f in self._files_cache: if include_pattern is None or globster.match(f): if abspath: yield os.path.join(self.path, f) else: yield f
def __init__(self): excludes = ['.git/', '.hg/', '.svn/', 'node_modules'] self.directory = os.path.basename(self.getRootPath()) self.path = os.path.abspath(self.getRootPath()) self.parent = os.path.dirname(self.path) self.exclude_file = self.load_ignore() self.patterns = excludes if self.exclude_file is not None: self.patterns.extend(self.load_patterns(self.exclude_file)) self.globster = Globster(self.patterns)
def __init__(self, directory=".", exclude_file=".exclude", excludes=['.git/', '.hg/', '.svn/']): if not os.path.isdir(directory): raise TypeError("Directory must be a directory.") self.directory = os.path.basename(directory) self.path = os.path.abspath(directory) self.parent = os.path.dirname(self.path) self.exclude_file = os.path.join(self.path, exclude_file) self.patterns = excludes if os.path.isfile(self.exclude_file): self.patterns.extend(load_patterns(self.exclude_file)) self.globster = Globster(self.patterns)
def __init__(self, directory=".", exclude_file=".exclude", excludes=('.git/', '.hg/', '.svn/'), directory_filter=lambda root, dirs, files: True): if not os.path.isdir(directory): raise TypeError("Directory must be a directory.") self.directory = os.path.basename(directory) self.path = os.path.abspath(directory) self.parent = os.path.dirname(self.path) self.exclude_file = os.path.join(self.path, exclude_file) self.patterns = list(excludes) if os.path.isfile(self.exclude_file): self.patterns.extend(load_patterns(self.exclude_file)) self.globster = Globster(self.patterns) self.directory_filter=directory_filter
def itersubdirs(self, pattern=None, abspath=False): """ Generator for all subdirs (except excluded). :type pattern: str :param pattern: Unix style (glob like/gitignore like) pattern """ if pattern is not None: globster = Globster([pattern]) for root, dirs, files in self.walk(): for d in dirs: if pattern is None or (pattern is not None and globster.match(d)): if abspath: yield os.path.join(root, d) else: yield self.relpath(os.path.join(root, d))
def iterfiles(self, pattern=None, abspath=False): """ Generator for all the files not excluded recursively. Return relative path. :type pattern: str :param pattern: Unix style (glob like/gitignore like) pattern """ if pattern is not None: globster = Globster([pattern]) for root, dirs, files in self.walk(): for f in files: if pattern is None or (pattern is not None and globster.match(f)): if abspath: yield os.path.join(root, f) else: yield self.relpath(os.path.join(root, f))
def __init__(self, directory=".", exclude_file=None, excludes=['.git/', '.hg/', '.svn/']): if not os.path.isdir(directory): raise TypeError("Directory must be a directory.") self.directory = os.path.basename(directory) self.path = os.path.abspath(directory) self.parent = os.path.dirname(self.path) self.patterns = excludes self._files_cache = [] self._sub_dirs_cache = [] self._is_populated = False if exclude_file: self.exclude_file = os.path.join(self.path, exclude_file) if os.path.isfile(self.exclude_file): file_patt = filter(None, open(exclude_file).read().split("\n")) self.patterns.extend(file_patt) self.globster = Globster(self.patterns)
def itersubdirs(self, pattern=None, abspath=False, force_refresh=False): """ Generator for all subdirs matching pattern and not excluded. Uses cached dir list if available. Args: pattern (str): Unix style (glob like/gitignore like) pattern abspath (bool): whether to use absolute or relative (default) paths. force_refresh (bool): Whether to refresh from disk or use the cache. """ self.populate_dir(force_refresh) if pattern is not None: globster = Globster([pattern]) for d in self._sub_dirs_cache: if pattern is None or globster.match(d): if abspath: yield os.path.join(self.directory, d) else: yield d
class Dir(object): """ Wrapper for dirtools arround a path. Try to load a .exclude file, ready to compute hashdir, :type directory: str :param directory: Root directory for initialization :type exclude_file: str :param exclude_file: File containing exclusion pattern, .exclude by default, you can also load .gitignore files. :type excludes: list :param excludes: List of additionals patterns for exclusion, by default: ['.git/', '.hg/', '.svn/'] """ def __init__(self, directory=".", exclude_file=".exclude", excludes=['.git/', '.hg/', '.svn/']): if not os.path.isdir(directory): raise TypeError("Directory must be a directory.") self.directory = os.path.basename(directory) self.path = os.path.abspath(directory) self.parent = os.path.dirname(self.path) self.exclude_file = os.path.join(self.path, exclude_file) self.patterns = excludes if os.path.isfile(self.exclude_file): self.patterns.extend(load_patterns(self.exclude_file)) self.globster = Globster(self.patterns) def hash(self, index_func=os.path.getmtime): """ Hash for the entire directory (except excluded files) recursively. Use mtime instead of sha256 by default for a faster hash. >>> dir.hash(index_func=dirtools.filehash) """ # TODO alternative to filehash => mtime as a faster alternative shadir = hashlib.sha256() for f in self.files(): try: shadir.update(str(index_func(os.path.join(self.path, f)))) except (IOError, OSError): pass return shadir.hexdigest() def iterfiles(self, pattern=None, abspath=False): """ Generator for all the files not excluded recursively. Return relative path. :type pattern: str :param pattern: Unix style (glob like/gitignore like) pattern """ if pattern is not None: globster = Globster([pattern]) for root, dirs, files in self.walk(): for f in files: if pattern is None or (pattern is not None and globster.match(f)): if abspath: yield os.path.join(root, f) else: yield self.relpath(os.path.join(root, f)) def files(self, pattern=None, sort_key=lambda k: k, sort_reverse=False, abspath=False): """ Return a sorted list containing relative path of all files (recursively). :type pattern: str :param pattern: Unix style (glob like/gitignore like) pattern :param sort_key: key argument for sorted :param sort_reverse: reverse argument for sorted :rtype: list :return: List of all relative files paths. """ return sorted(self.iterfiles(pattern, abspath=abspath), key=sort_key, reverse=sort_reverse) def get(self, pattern, sort_key=lambda k: k, sort_reverse=False, abspath=False): res = self.files(pattern, sort_key=sort_key, sort_reverse=sort_reverse, abspath=abspath) if res: return res[0] def itersubdirs(self, pattern=None, abspath=False): """ Generator for all subdirs (except excluded). :type pattern: str :param pattern: Unix style (glob like/gitignore like) pattern """ if pattern is not None: globster = Globster([pattern]) for root, dirs, files in self.walk(): for d in dirs: if pattern is None or (pattern is not None and globster.match(d)): if abspath: yield os.path.join(root, d) else: yield self.relpath(os.path.join(root, d)) def subdirs(self, pattern=None, sort_key=lambda k: k, sort_reverse=False, abspath=False): """ Return a sorted list containing relative path of all subdirs (recursively). :type pattern: str :param pattern: Unix style (glob like/gitignore like) pattern :param sort_key: key argument for sorted :param sort_reverse: reverse argument for sorted :rtype: list :return: List of all relative files paths. """ return sorted(self.itersubdirs(pattern, abspath=abspath), key=sort_key, reverse=sort_reverse) def size(self): """ Return directory size in bytes. :rtype: int :return: Total directory size in bytes. """ dir_size = 0 for f in self.iterfiles(abspath=True): dir_size += os.path.getsize(f) return dir_size def is_excluded(self, path): """ Return True if `path' should be excluded given patterns in the `exclude_file'. """ match = self.globster.match(self.relpath(path)) if match: log.debug("{0} matched {1} for exclusion".format(path, match)) return True return False def walk(self): """ Walk the directory like os.path (yields a 3-tuple (dirpath, dirnames, filenames) except it exclude all files/directories on the fly. """ for root, dirs, files in os.walk(self.path, topdown=True): # TODO relative walk, recursive call if root excluder found??? #root_excluder = get_root_excluder(root) ndirs = [] # First we exclude directories for d in list(dirs): if self.is_excluded(os.path.join(root, d)): dirs.remove(d) elif not os.path.islink(os.path.join(root, d)): ndirs.append(d) nfiles = [] for fpath in (os.path.join(root, f) for f in files): if not self.is_excluded(fpath) and not os.path.islink(fpath): nfiles.append(os.path.relpath(fpath, root)) yield root, ndirs, nfiles def find_projects(self, file_identifier=".project"): """ Search all directory recursively for subdirs with `file_identifier' in it. :type file_identifier: str :param file_identifier: File identier, .project by default. :rtype: list :return: The list of subdirs with a `file_identifier' in it. """ projects = [] for d in self.subdirs(): project_file = os.path.join(self.directory, d, file_identifier) if os.path.isfile(project_file): projects.append(d) return projects def relpath(self, path): """ Return a relative filepath to path from Dir path. """ return os.path.relpath(path, start=self.path) def compress_to(self, archive_path=None): """ Compress the directory with gzip using tarlib. :type archive_path: str :param archive_path: Path to the archive, if None, a tempfile is created """ if archive_path is None: archive = tempfile.NamedTemporaryFile(delete=False) tar_args = [] tar_kwargs = {'fileobj': archive} _return = archive.name else: tar_args = [archive_path] tar_kwargs = {} _return = archive_path tar_kwargs.update({'mode': 'w:gz'}) with closing(tarfile.open(*tar_args, **tar_kwargs)) as tar: tar.add(self.path, arcname='', exclude=self.is_excluded) return _return
class Dir(object): """ Wrapper for dirstate for a path. Allow exclusions of files, listing of contents, cashing of iteration, size, walking, hashing of files and entire folder, compressing. Args: directory (str): Path of the dir to wrap. exclude_file (str): Path to file containing exclusion pattern, None by default, you can also load .gitignore files. excludes (list): List of additional patterns for exclusion, by default: ['.git/', '.hg/', '.svn/'] """ def __init__(self, directory=".", exclude_file=None, excludes=['.git/', '.hg/', '.svn/']): if not os.path.isdir(directory): raise TypeError("Directory must be a directory.") self.directory = os.path.basename(directory) self.path = os.path.abspath(directory) self.parent = os.path.dirname(self.path) self.patterns = excludes self._files_cache = [] self._sub_dirs_cache = [] self._is_populated = False if exclude_file: self.exclude_file = os.path.join(self.path, exclude_file) if os.path.isfile(self.exclude_file): file_patt = filter(None, open(exclude_file).read().split("\n")) self.patterns.extend(file_patt) self.globster = Globster(self.patterns) def is_excluded(self, path) -> bool: """ Return whether 'path' is ignored based on exclude patterns """ match = self.globster.match(self.relpath(path)) if match: log.debug("{0} matched {1} for exclusion".format(path, match)) return True return False def walk(self) -> tuple: """ Walk the directory like os.path (yields a 3-tuple (dirpath, dirnames, filenames) except it exclude all files/directories on the fly. """ for root, dirs, files in os.walk(self.path, topdown=True): # TODO relative walk, recursive call if root excluder found??? #root_excluder = get_root_excluder(root) ndirs = [] # First we exclude directories for d in list(dirs): if self.is_excluded(os.path.join(root, d)): dirs.remove(d) elif not os.path.islink(os.path.join(root, d)): ndirs.append(d) nfiles = [] for fpath in (os.path.join(root, f) for f in files): if not self.is_excluded(fpath) and not os.path.islink(fpath): nfiles.append(os.path.relpath(fpath, root)) yield root, ndirs, nfiles def populate_dir(self, force_refresh=False) -> None: """ Walk the directory recursively and populate a cache of it's contents. Dir.patterns are used for exclusion. Paths are stored as relative to the Dir.path Args: force_refresh (bool): Whether to refresh from disk if cache is already populated. """ if not force_refresh and self._is_populated: return self._files_cache.clear() self._sub_dirs_cache.clear() for root, dirs, files in self.walk(): for f in files: relpath = self.relpath(os.path.join(root, f)) self._files_cache.append(relpath) for d in dirs: relpath = self.relpath(os.path.join(root, d)) self._sub_dirs_cache.append(relpath) self._is_populated = True def depopulate(self) -> None: """ Clear the cached lists of files and folders, set depopulated state. """ self._files_cache.clear() self._sub_dirs_cache.clear() self._is_populated = False def iterfiles(self, include_pattern=None, abspath=False, force_refresh=False): """ Generator for all the files matching pattern and not already excluded. Uses cached file list if available. Args: pattern (str): Unix style (glob like/gitignore like) pattern abspath (bool): Whether to use absolute or relative (default) paths. force_refresh (bool): Whether to refresh from disk or use the cache. """ self.populate_dir(force_refresh) if include_pattern is not None: globster = Globster([include_pattern]) for f in self._files_cache: if include_pattern is None or globster.match(f): if abspath: yield os.path.join(self.path, f) else: yield f def itersubdirs(self, pattern=None, abspath=False, force_refresh=False): """ Generator for all subdirs matching pattern and not excluded. Uses cached dir list if available. Args: pattern (str): Unix style (glob like/gitignore like) pattern abspath (bool): whether to use absolute or relative (default) paths. force_refresh (bool): Whether to refresh from disk or use the cache. """ self.populate_dir(force_refresh) if pattern is not None: globster = Globster([pattern]) for d in self._sub_dirs_cache: if pattern is None or globster.match(d): if abspath: yield os.path.join(self.directory, d) else: yield d def files(self, pattern=None, sort_key=lambda k: k, sort_reverse=False, abspath=False, force_refresh=False) -> list: """ Return a sorted list containing relative path of all files (recursively). Uses cached file list if available. Args: pattern (str): Unix style (glob like/gitignore like) pattern. sort_key (lambda): key argument for sorted sort_reverse (bool): reverse argument for sorted abspath (bool): whether to use absolute or relative (default) paths. force_refresh (bool): Whether to refresh from disk or use the cache. Return: List of all relative file paths. """ return sorted(self.iterfiles(pattern, abspath, force_refresh), key=sort_key, reverse=sort_reverse) def subdirs(self, pattern=None, sort_key=lambda k: k, sort_reverse=False, abspath=False, force_refresh=False) -> list: """ Return a sorted list containing relative path of all subdirs(recursively). Uses cached file list if available. Args: pattern (str): Unix style (glob like/gitignore like) pattern. sort_key (lambda): key argument for sorted sort_reverse (bool): reverse argument for sorted abspath (bool): whether to use absolute or relative (default) paths. force_refresh (bool): Whether to refresh from disk or use the cache. Return: List of all relative subdirs paths. """ return sorted(self.itersubdirs(pattern, abspath, force_refresh), key=sort_key, reverse=sort_reverse) def relpath(self, path) -> str: """ Return a relative filepath to path from Dir path. """ return os.path.relpath(path, start=self.path) def abspath(self, relpath) -> str: """ Return an absolute filepath from a relative to the root dir one. """ return os.path.join(self.dir.path, relpath) def size(self) -> int: """ Return total directory size in bytes. Return: int: Total directory size in bytes. """ dir_size = 0 for f in self.iterfiles(abspath=True): dir_size += os.path.getsize(f) return dir_size def compress_to(self, archive_path=None): """ Compress the directory with gzip using tarlib. :type archive_path: str :param archive_path: Path to the archive, if None, a tempfile is created """ if archive_path is None: archive = tempfile.NamedTemporaryFile(delete=False) tar_args = [] tar_kwargs = {'fileobj': archive} _return = archive.name else: tar_args = [archive_path] tar_kwargs = {} _return = archive_path tar_kwargs.update({'mode': 'w:gz'}) with closing(tarfile.open(*tar_args, **tar_kwargs)) as tar: tar.add(self.path, arcname='', exclude=self.is_excluded) return _return
class Dir(object): def __init__(self): excludes = ['.git/', '.hg/', '.svn/', 'node_modules'] self.directory = os.path.basename(self.getRootPath()) self.path = os.path.abspath(self.getRootPath()) self.parent = os.path.dirname(self.path) self.exclude_file = self.load_ignore() self.patterns = excludes if self.exclude_file is not None: self.patterns.extend(self.load_patterns(self.exclude_file)) self.globster = Globster(self.patterns) def getRootPath(self): try: return check_output(["git", "rev-parse", "--show-toplevel"]).decode().strip('\n') except CalledProcessError: return '.' def load_ignore(self): if os.path.isfile(os.path.join(self.path, '.gitignore')): return os.path.join(self.path, '.gitignore') else: return None def load_patterns(self, exclude_file): res = [] ignore = filter(None, open(exclude_file).read().split("\n")) for val in ignore: if val.startswith('#'): pass else: res.append(val) return res def iterfiles(self): for root, dirs, files in self.walk(): for f in files: yield self.relpath(os.path.join(root, f)) def files(self): return sorted(self.iterfiles()) def is_excluded(self, path): match = self.globster.match(self.relpath(path)) if (match): return True return False def walk(self): for root, dirs, files in os.walk(self.path, topdown=True): ndirs = [] # First we exclude directories for d in list(dirs): if self.is_excluded(os.path.join(root, d)): dirs.remove(d) elif not os.path.islink(os.path.join(root, d) + '/'): ndirs.append(d) nfiles = [] for fpath in (os.path.join(root, f) for f in files): if not self.is_excluded(fpath): nfiles.append(os.path.relpath(fpath, root)) yield root, ndirs, nfiles def relpath(self, path): return os.path.relpath(path, start=self.path)