def _get_all_paths(self): paths = [] ignore_file_handler = DvcIgnoreFileHandler(self.dvc.tree) for root, dirs, files in self.dvc.tree.walk( self.dvc.root_dir, ignore_file_handler=ignore_file_handler): for dname in dirs: paths.append(os.path.join(root, dname)) for fname in files: paths.append(os.path.join(root, fname)) return paths
def stages(self, from_directory=None, check_dag=True): """ Walks down the root directory looking for Dvcfiles, skipping the directories that are related with any SCM (e.g. `.git`), DVC itself (`.dvc`), or directories tracked by DVC (e.g. `dvc add data` would skip `data/`) NOTE: For large repos, this could be an expensive operation. Consider using some memoization. """ from dvc.stage import Stage if not from_directory: from_directory = self.root_dir elif not os.path.isdir(from_directory): raise TargetNotDirectoryError(from_directory) stages = [] outs = [] ignore_file_handler = DvcIgnoreFileHandler(self.tree) for root, dirs, files in self.tree.walk( from_directory, ignore_file_handler=ignore_file_handler): for fname in files: path = os.path.join(root, fname) if not Stage.is_valid_filename(path): continue stage = Stage.load(self, path) for out in stage.outs: outs.append(out.path + out.sep) stages.append(stage) def filter_dirs(dname, root=root): if dname in ('image', 'image-aug'): return False path = os.path.join(root, dname) if path in (self.dvc_dir, self.scm.dir): return False for out in outs: if path == os.path.normpath(out) or path.startswith(out): return False return True dirs[:] = list(filter(filter_dirs, dirs)) if check_dag: self.check_dag(stages) return stages
def setUp(self): super(TestDvcIgnore, self).setUp() self.ignore_file_handler = DvcIgnoreFileHandler(self.dvc.tree)