def imp_url( self, url, out=None, fname=None, erepo=None, frozen=True, no_exec=False, desc=None, jobs=None, ): from dvc.dvcfile import Dvcfile from dvc.stage import Stage, create_stage, restore_meta out = resolve_output(url, out) path, wdir, out = resolve_paths(self, out) # NOTE: when user is importing something from within their own repository if ( erepo is None and os.path.exists(url) and path_isin(os.path.abspath(url), self.root_dir) ): url = relpath(url, wdir) stage = create_stage( Stage, self, fname or path, wdir=wdir, deps=[url], outs=[out], erepo=erepo, ) restore_meta(stage) if stage.can_be_skipped: return None if desc: stage.outs[0].desc = desc dvcfile = Dvcfile(self, stage.path) dvcfile.remove() try: self.check_modified_graph([stage]) except OutputDuplicationError as exc: raise OutputDuplicationError(exc.output, set(exc.stages) - {stage}) if no_exec: stage.ignore_outs() else: stage.run(jobs=jobs) stage.frozen = frozen dvcfile.dump(stage) return stage
def create( self, single_stage: bool = False, validate: bool = True, fname: str = None, force: bool = False, **stage_data, ) -> Union["Stage", "PipelineStage"]: """Creates a stage. Args: single_stage: if true, the .dvc file based stage is created, fname is required in that case fname: name of the file to use, not used for dvc.yaml files validate: if true, the new created stage is checked against the stages in the repo. Eg: graph correctness, potential overwrites in dvc.yaml file (unless `force=True`). force: ignores overwrites in dvc.yaml file stage_data: Stage data to create from (see create_stage and loads_from for more information) """ from dvc.stage import PipelineStage, Stage, create_stage, restore_meta from dvc.stage.exceptions import InvalidStageName from dvc.stage.utils import ( is_valid_name, prepare_file_path, validate_kwargs, ) stage_data = validate_kwargs(single_stage=single_stage, fname=fname, **stage_data) if single_stage: stage_cls = Stage path = fname or prepare_file_path(stage_data) else: path = PIPELINE_FILE stage_cls = PipelineStage stage_name = stage_data["name"] if not (stage_name and is_valid_name(stage_name)): raise InvalidStageName stage = create_stage(stage_cls, repo=self.repo, path=path, **stage_data) if validate: if not force: from dvc.stage.utils import check_stage_exists check_stage_exists(self.repo, stage, stage.path) new_index = self.repo.index.add(stage) new_index.check_graph() restore_meta(stage) return stage
def _create_stages( repo, targets, fname, pbar=None, external=False, glob=False, desc=None, transfer=False, **kwargs, ): from dvc.dvcfile import Dvcfile from dvc.stage import Stage, create_stage, restore_meta expanded_targets = glob_targets(targets, glob=glob) stages = [] for out in Tqdm( expanded_targets, desc="Creating DVC files", disable=len(expanded_targets) < LARGE_DIR_SIZE, unit="file", ): if kwargs.get("out"): out = resolve_output(out, kwargs["out"]) path, wdir, out = resolve_paths(repo, out, always_local=transfer and not kwargs.get("out")) stage = create_stage( Stage, repo, fname or path, wdir=wdir, outs=[out], external=external, ) restore_meta(stage) Dvcfile(repo, stage.path).remove() if desc: stage.outs[0].desc = desc repo._reset() # pylint: disable=protected-access if not stage: if pbar is not None: pbar.total -= 1 continue stages.append(stage) if pbar is not None: pbar.update_msg(out) return stages
def _create_stages( repo, targets, fname, pbar=None, external=False, glob=False, desc=None, ): from glob import iglob from dvc.stage import Stage, create_stage, restore_meta if glob: expanded_targets = [ exp_target for target in targets for exp_target in iglob(target, recursive=True) ] else: expanded_targets = targets stages = [] for out in Tqdm( expanded_targets, desc="Creating DVC-files", disable=len(expanded_targets) < LARGE_DIR_SIZE, unit="file", ): path, wdir, out = resolve_paths(repo, out) stage = create_stage( Stage, repo, fname or path, wdir=wdir, outs=[out], external=external, ) restore_meta(stage) if stage.can_be_skipped: stage = None else: Dvcfile(repo, stage.path).remove() if desc: stage.outs[0].desc = desc repo._reset() # pylint: disable=protected-access if not stage: if pbar is not None: pbar.total -= 1 continue stages.append(stage) if pbar is not None: pbar.update_msg(out) return stages
def imp_url( self, url, out=None, fname=None, erepo=None, frozen=True, no_exec=False, remote=None, to_remote=False, desc=None, jobs=None, ): from dvc.dvcfile import Dvcfile from dvc.stage import Stage, create_stage, restore_meta out = resolve_output(url, out) path, wdir, out = resolve_paths(self, out, always_local=to_remote and not out) if to_remote and no_exec: raise InvalidArgumentError( "--no-exec can't be combined with --to-remote") if not to_remote and remote: raise InvalidArgumentError( "--remote can't be used without --to-remote") # NOTE: when user is importing something from within their own repository if (erepo is None and os.path.exists(url) and path_isin(os.path.abspath(url), self.root_dir)): url = relpath(url, wdir) stage = create_stage( Stage, self, fname or path, wdir=wdir, deps=[url], outs=[out], erepo=erepo, ) restore_meta(stage) if desc: stage.outs[0].desc = desc dvcfile = Dvcfile(self, stage.path) dvcfile.remove() try: new_index = self.index.add(stage) new_index.check_graph() except OutputDuplicationError as exc: raise OutputDuplicationError(exc.output, set(exc.stages) - {stage}) if no_exec: stage.ignore_outs() elif to_remote: remote_odb = self.cloud.get_remote_odb(remote, "import-url") stage.outs[0].transfer(url, odb=remote_odb, jobs=jobs) stage.save_deps() stage.md5 = stage.compute_md5() else: stage.run(jobs=jobs) stage.frozen = frozen dvcfile.dump(stage) return stage
def run(self, fname=None, no_exec=False, single_stage=False, **kwargs): from dvc.dvcfile import PIPELINE_FILE, Dvcfile from dvc.exceptions import InvalidArgumentError, OutputDuplicationError from dvc.stage import PipelineStage, Stage, create_stage, restore_meta from dvc.stage.exceptions import InvalidStageName if not kwargs.get("cmd"): raise InvalidArgumentError("command is not specified") stage_cls = PipelineStage path = PIPELINE_FILE stage_name = kwargs.get("name") if stage_name and single_stage: raise InvalidArgumentError( "`-n|--name` is incompatible with `--single-stage`") if stage_name and fname: raise InvalidArgumentError( "`--file` is currently incompatible with `-n|--name` " "and requires `--single-stage`") if not stage_name and not single_stage: raise InvalidArgumentError("`-n|--name` is required") if single_stage: kwargs.pop("name", None) stage_cls = Stage path = fname or _get_file_path(kwargs) else: if not is_valid_name(stage_name): raise InvalidStageName params = chunk_dict(parse_params_from_cli(kwargs.pop("params", []))) stage = create_stage(stage_cls, repo=self, path=path, params=params, **kwargs) restore_meta(stage) if kwargs.get("run_cache", True) and stage.can_be_skipped: return None dvcfile = Dvcfile(self, stage.path) try: if kwargs.get("force", True): with suppress(ValueError): self.stages.remove(stage) else: _check_stage_exists(dvcfile, stage) self.check_modified_graph([stage]) except OutputDuplicationError as exc: raise OutputDuplicationError(exc.output, set(exc.stages) - {stage}) if no_exec: stage.ignore_outs() else: stage.run( no_commit=kwargs.get("no_commit", False), run_cache=kwargs.get("run_cache", True), ) dvcfile.dump(stage, update_lock=not no_exec) return stage