class ExperimentFileSource(object): """Treat an experiment directory as a potential source of files for copying to a temp directory as part of a deployment (debug or otherwise). """ def __init__(self, root_dir="."): self.root = root_dir self.git = GitClient() @property def files(self): """A Set of all files copyable in the source directory, accounting for exclusions. """ return {path for path in self._walk()} @property def size(self): """Combined size of all files, accounting for exclusions. """ return sum([os.path.getsize(path) for path in self._walk()]) def selective_copy_to(self, destination): """Write files from the source directory to another directory, skipping files excluded by the general exclusion_policy, plus any files ignored by git configuration. """ for path in self.files: subpath = os.path.relpath(path, start=self.root) target_folder = os.path.join(destination, os.path.dirname(subpath)) ensure_directory(target_folder) shutil.copy2(path, target_folder) def _walk(self): exclusions = exclusion_policy() git_files = set([os.path.join(self.root, f) for f in self.git.files()]) for dirpath, dirnames, filenames in os.walk(self.root, topdown=True): current_exclusions = exclusions(dirpath, os.listdir(dirpath)) # Modifying dirnames in-place will prune the subsequent files and # directories visited by os.walk. This is only possible when # topdown = True dirnames[:] = [d for d in dirnames if d not in current_exclusions] legit_files = { os.path.join(dirpath, f) for f in filenames if f not in current_exclusions and os.path.join(dirpath, f) } if git_files: legit_files = legit_files.intersection(git_files) for legit in legit_files: yield legit
class ExperimentFileSource(object): """Treat an experiment directory as a potential source of files for copying to a temp directory as part of a deployment (debug or otherwise). """ def __init__(self, root_dir="."): self.root = root_dir self.git = GitClient() @property def files(self): """A Set of all files copyable in the source directory, accounting for exclusions. """ return {path for path in self._walk()} @property def size(self): """Combined size of all files, accounting for exclusions. """ return sum([os.path.getsize(path) for path in self._walk()]) def selective_copy_to(self, destination): """Write files from the source directory to another directory, skipping files excluded by the general exclusion_policy, plus any files ignored by git configuration. """ for path in self.files: subpath = os.path.relpath(path, start=self.root) target_folder = os.path.join(destination, os.path.dirname(subpath)) ensure_directory(target_folder) shutil.copy2(path, target_folder) def _walk(self): # The GitClient and os.walk may return different representations of the # same unicode characters, so we use unicodedata.normalize() for # comparisons: # list(name_from_git) # ['å', ' ', 'f', 'i', 'l', 'e', '.', 't', 'x', 't'] # list(from_os_walk) # ['a', '̊', ' ', 'f', 'i', 'l', 'e', '.', 't', 'x', 't'] exclusions = exclusion_policy() git_files = { os.path.join(self.root, normalize("NFC", f)) for f in self.git.files() } for dirpath, dirnames, filenames in os.walk(self.root, topdown=True): current_exclusions = exclusions(dirpath, os.listdir(dirpath)) # Modifying dirnames in-place will prune the subsequent files and # directories visited by os.walk. This is only possible when # topdown = True dirnames[:] = [d for d in dirnames if d not in current_exclusions] legit_files = { os.path.join(dirpath, f) for f in filenames if f not in current_exclusions } if git_files: normalized = { normalize("NFC", six.text_type(f)): f for f in legit_files } legit_files = {v for k, v in normalized.items() if k in git_files} for legit in legit_files: yield legit