def _collect_paths( repo: "Repo", targets: Iterable[str], recursive: bool = False, rev: str = None, ): from dvc.tree.repo import RepoTree path_infos = [PathInfo(os.path.abspath(target)) for target in targets] tree = RepoTree(repo) target_infos = [] for path_info in path_infos: if recursive and tree.isdir(path_info): target_infos.extend(tree.walk_files(path_info)) if not tree.exists(path_info): if not recursive: if rev == "workspace" or rev == "": logger.warning( "'%s' was not found in current workspace.", path_info, ) else: logger.warning( "'%s' was not found at: '%s'.", path_info, rev, ) continue target_infos.append(path_info) return target_infos
def _collect_metrics(repo, targets, recursive): if targets: target_infos = [ PathInfo(os.path.abspath(target)) for target in targets ] tree = RepoTree(repo) rec_files = [] if recursive: for target_info in target_infos: if tree.isdir(target_info): rec_files.extend(list(tree.walk_files(target_info))) result = [t for t in target_infos if tree.isfile(t)] result.extend(rec_files) return result metrics = set() for stage in repo.stages: for out in stage.outs: if not out.metric: continue metrics.add(out.path_info) return list(metrics)
def _collect_paths( repo: Repo, targets: Iterable[str], recursive: bool = False, rev: str = None, ): path_infos = {PathInfo(os.path.abspath(target)) for target in targets} tree = RepoTree(repo) target_infos = set() for path_info in path_infos: if recursive and tree.isdir(path_info): target_infos.update(set(tree.walk_files(path_info))) if not tree.exists(path_info): if not recursive: logger.warning( "'%s' was not found at: '%s'.", path_info, rev, ) continue target_infos.add(path_info) return target_infos
def collect( self, targets: List[str] = None, revs: List[str] = None, recursive: bool = False, ) -> Dict[str, Dict]: """Collects all props and data for plots. Returns a structure like: {rev: {plots.csv: { props: {x: ..., "header": ..., ...}, data: "...data as a string...", }}} Data parsing is postponed, since it's affected by props. """ from dvc.tree.repo import RepoTree targets = [targets] if isinstance(targets, str) else targets or [] data = {} for rev in self.repo.brancher(revs=revs): # .brancher() adds unwanted workspace if revs is not None and rev not in revs: continue rev = rev or "workspace" tree = RepoTree(self.repo) plots = _collect_plots(self.repo, targets, rev, recursive) for path_info, props in plots.items(): if rev not in data: data[rev] = {} if tree.isdir(path_info): plot_files = [] for pi in tree.walk_files(path_info): plot_files.append( (pi, relpath(pi, self.repo.root_dir)) ) else: plot_files = [ (path_info, relpath(path_info, self.repo.root_dir)) ] for path, repo_path in plot_files: data[rev].update({repo_path: {"props": props}}) # Load data from git or dvc cache try: with tree.open(path) as fd: data[rev][repo_path]["data"] = fd.read() except FileNotFoundError: # This might happen simply because cache is absent pass return data
def collect_files(tree: BaseTree, repo_tree: RepoTree): for fname in repo_tree.walk_files(repo_tree.root_dir, dvcfiles=True): if not repo_tree.isdvc(fname): yield tree.path_info / fname.relative_to(repo_tree.root_dir)