def run(self, fname=None, no_exec=False, **kwargs): from dvc.stage import PipelineStage, Stage, create_stage from dvc.dvcfile import Dvcfile, PIPELINE_FILE stage_cls = PipelineStage path = PIPELINE_FILE stage_name = kwargs.get("name") if not stage_name: kwargs.pop("name", None) stage_cls = Stage path = fname or _get_file_path(kwargs) else: if not is_valid_name(stage_name): raise InvalidStageName stage = create_stage(stage_cls, repo=self, path=path, **kwargs) if stage is None: return None dvcfile = Dvcfile(self, stage.path) if dvcfile.exists(): if stage_name and stage_name in dvcfile.stages: raise DuplicateStageName(stage_name, dvcfile) if stage_cls != PipelineStage: dvcfile.remove_with_prompt(force=kwargs.get("overwrite", True)) try: self.check_modified_graph([stage]) except OutputDuplicationError as exc: raise OutputDuplicationError(exc.output, set(exc.stages) - {stage}) if not no_exec: stage.run(no_commit=kwargs.get("no_commit", False)) dvcfile.dump(stage, update_pipeline=True) return stage
def modify(self, path, props=None, unset=None): from dvc.dvcfile import Dvcfile props = props or {} template = props.get("template") if template: self.templates.get_template(template) (out,) = self.repo.find_outs_by_path(path) if not out.plot and unset is not None: raise NotAPlotError(out) # This out will become a plot unless it is one already if not isinstance(out.plot, dict): out.plot = {} if unset: self._unset(out, unset) out.plot.update(props) # Empty dict will move it to non-plots if not out.plot: out.plot = True out.verify_metric() dvcfile = Dvcfile(self.repo, out.stage.path) dvcfile.dump(out.stage, update_lock=False)
def run(self, fname=None, no_exec=False, **kwargs): from dvc.stage import Stage from dvc.dvcfile import Dvcfile, DVC_FILE_SUFFIX, DVC_FILE outs = ( kwargs.get("outs", []) + kwargs.get("outs_no_cache", []) + kwargs.get("metrics", []) + kwargs.get("metrics_no_cache", []) + kwargs.get("outs_persist", []) + kwargs.get("outs_persist_no_cache", []) ) if outs: base = os.path.basename(os.path.normpath(outs[0])) path = base + DVC_FILE_SUFFIX else: path = DVC_FILE stage = Stage.create(self, fname or path, **kwargs) if stage is None: return None dvcfile = Dvcfile(self, stage.path) dvcfile.overwrite_with_prompt(force=kwargs.get("overwrite", True)) self.check_modified_graph([stage]) if not no_exec: stage.run(no_commit=kwargs.get("no_commit", False)) dvcfile.dump(stage) return stage
def imp_url(self, url, out=None, fname=None, erepo=None, locked=True): from dvc.dvcfile import Dvcfile from dvc.stage import Stage out = resolve_output(url, out) path, wdir, out = resolve_paths(self, out) # NOTE: when user is importing something from within his own repository if os.path.exists(url) and path_isin(os.path.abspath(url), self.root_dir): url = relpath(url, wdir) stage = Stage.create( self, fname or path, wdir=wdir, deps=[url], outs=[out], erepo=erepo, ) if stage is None: return None dvcfile = Dvcfile(self, stage.path) dvcfile.overwrite_with_prompt(force=True) self.check_modified_graph([stage]) stage.run() stage.locked = locked dvcfile.dump(stage) return stage
def run(self, fname=None, no_exec=False, single_stage=False, **kwargs): from dvc.stage import PipelineStage, Stage, create_stage from dvc.dvcfile import Dvcfile, PIPELINE_FILE if not kwargs.get("cmd"): raise InvalidArgumentError("command is not specified") stage_cls = PipelineStage path = PIPELINE_FILE stage_name = kwargs.get("name") if stage_name and single_stage: raise InvalidArgumentError( "`-n|--name` is incompatible with `--single-stage`") if not stage_name and not single_stage: raise InvalidArgumentError("`-n|--name` is required") if single_stage: kwargs.pop("name", None) stage_cls = Stage path = fname or _get_file_path(kwargs) else: if not is_valid_name(stage_name): raise InvalidStageName params = parse_params(kwargs.pop("params", [])) stage = create_stage(stage_cls, repo=self, path=path, params=params, **kwargs) if stage is None: return None dvcfile = Dvcfile(self, stage.path) if dvcfile.exists(): if kwargs.get("overwrite", True): dvcfile.remove_stage(stage) elif stage_cls != PipelineStage: raise StageFileAlreadyExistsError(dvcfile.relpath) elif stage_name and stage_name in dvcfile.stages: raise DuplicateStageName(stage_name, dvcfile) try: self.check_modified_graph([stage]) except OutputDuplicationError as exc: raise OutputDuplicationError(exc.output, set(exc.stages) - {stage}) if no_exec: stage.ignore_outs() else: stage.run( no_commit=kwargs.get("no_commit", False), run_cache=kwargs.get("run_cache", True), ) dvcfile.dump(stage, update_pipeline=True, no_lock=no_exec) return stage
def imp_url( self, url, out=None, fname=None, erepo=None, frozen=True, no_exec=False, desc=None, jobs=None, ): from dvc.dvcfile import Dvcfile from dvc.stage import Stage, create_stage, restore_meta out = resolve_output(url, out) path, wdir, out = resolve_paths(self, out) # NOTE: when user is importing something from within their own repository if ( erepo is None and os.path.exists(url) and path_isin(os.path.abspath(url), self.root_dir) ): url = relpath(url, wdir) stage = create_stage( Stage, self, fname or path, wdir=wdir, deps=[url], outs=[out], erepo=erepo, ) restore_meta(stage) if stage.can_be_skipped: return None if desc: stage.outs[0].desc = desc dvcfile = Dvcfile(self, stage.path) dvcfile.remove() try: self.check_modified_graph([stage]) except OutputDuplicationError as exc: raise OutputDuplicationError(exc.output, set(exc.stages) - {stage}) if no_exec: stage.ignore_outs() else: stage.run(jobs=jobs) stage.frozen = frozen dvcfile.dump(stage) return stage
def add(self, tag, target=None, with_deps=False, recursive=False): stages = self.collect(target, with_deps=with_deps, recursive=recursive) for stage in stages: changed = False for out in stage.outs: if not out.info: logger.warning("missing checksum info for '{}'".format(out)) continue out.tags[tag] = copy(out.info) changed = True if changed: dvcfile = Dvcfile(self, stage.path) dvcfile.dump(stage)
def remove(self, tag, target=None, with_deps=False, recursive=False): stages = self.collect(target, with_deps=with_deps, recursive=recursive) for stage in stages: changed = False for out in stage.outs: if tag not in out.tags.keys(): logger.warning("tag '{}' not found for '{}'".format(tag, out)) continue del out.tags[tag] changed = True if changed: dvcfile = Dvcfile(self, stage.path) dvcfile.dump(stage)
def test_meta_is_preserved(tmp_dir, dvc): (stage, ) = tmp_dir.dvc_gen("foo", "foo content") # Add meta to DVC-file data = load_stage_file(stage.path) data["meta"] = {"custom_key": 42} dump_stage_file(stage.path, data) # Loading and dumping to test that it works and meta is retained dvcfile = Dvcfile(dvc, stage.path) new_stage = dvcfile.load() dvcfile.dump(new_stage) new_data = load_stage_file(stage.path) assert new_data["meta"] == data["meta"]
def modify(repo, path, delete=False): outs = repo.find_outs_by_path(path) assert len(outs) == 1 out = outs[0] if out.scheme != "local": msg = "output '{}' scheme '{}' is not supported for metrics" raise DvcException(msg.format(out.path, out.path_info.scheme)) if delete: out.metric = None out.verify_metric() dvcfile = Dvcfile(repo, out.stage.path) dvcfile.dump(out.stage)
def imp_url(self, url, out=None, fname=None, erepo=None, frozen=True): from dvc.dvcfile import Dvcfile from dvc.stage import Stage, create_stage out = resolve_output(url, out) path, wdir, out = resolve_paths(self, out) # NOTE: when user is importing something from within their own repository if ( erepo is None and os.path.exists(url) and path_isin(os.path.abspath(url), self.root_dir) ): url = relpath(url, wdir) stage = create_stage( Stage, self, fname or path, wdir=wdir, deps=[url], outs=[out], erepo=erepo, ) if stage is None: return None dvcfile = Dvcfile(self, stage.path) dvcfile.remove_with_prompt(force=True) try: self.check_modified_graph([stage]) except OutputDuplicationError as exc: raise OutputDuplicationError(exc.output, set(exc.stages) - {stage}) stage.run() stage.frozen = frozen dvcfile.dump(stage) return stage
def test_remove_stage_on_lockfile_format_error(tmp_dir, dvc, run_copy): tmp_dir.gen("foo", "foo") stage = run_copy("foo", "bar", name="copy-foo-bar") dvc_file = Dvcfile(dvc, stage.path) lock_file = dvc_file._lockfile data = dvc_file._load()[0] lock_data = lock_file.load() lock_data["gibberish"] = True data["gibberish"] = True dump_yaml(lock_file.relpath, lock_data) with pytest.raises(StageFileFormatError): dvc_file.remove_stage(stage) lock_file.remove() dvc_file.dump(stage, update_pipeline=False) dump_yaml(dvc_file.relpath, data) with pytest.raises(StageFileFormatError): dvc_file.remove_stage(stage)
def test(self): stages = self.dvc.add(self.FOO) self.assertEqual(len(stages), 1) stage = stages[0] self.assertTrue(stage is not None) d = load_stage_file(stage.relpath) # NOTE: checking that reloaded stage didn't change its checksum md5 = "11111111111111111111111111111111" d[stage.PARAM_MD5] = md5 dump_stage_file(stage.relpath, d) dvcfile = Dvcfile(self.dvc, stage.relpath) stage = dvcfile.load() self.assertTrue(stage is not None) dvcfile.dump(stage) d = load_stage_file(stage.relpath) self.assertEqual(d[stage.PARAM_MD5], md5)
def modify(repo, path, typ=None, xpath=None, delete=False): supported_types = ["raw", "json", "csv", "tsv", "hcsv", "htsv"] outs = repo.find_outs_by_path(path) assert len(outs) == 1 out = outs[0] if out.scheme != "local": msg = "output '{}' scheme '{}' is not supported for metrics" raise DvcException(msg.format(out.path, out.path_info.scheme)) if typ is not None: typ = typ.lower().strip() if typ not in ["raw", "json", "csv", "tsv", "hcsv", "htsv"]: msg = ( "metric type '{typ}' is not supported, " "must be one of [{types}]" ) raise DvcException( msg.format(typ=typ, types=", ".join(supported_types)) ) if not isinstance(out.metric, dict): out.metric = {} out.metric[out.PARAM_METRIC_TYPE] = typ if xpath is not None: if not isinstance(out.metric, dict): out.metric = {} out.metric[out.PARAM_METRIC_XPATH] = xpath if delete: out.metric = None out.verify_metric() dvcfile = Dvcfile(repo, out.stage.path) dvcfile.dump(out.stage)
def test_dump_stage(tmp_dir, dvc): stage = PipelineStage( dvc, cmd="command", name="stage_name", path="dvc.yaml" ) dvcfile = Dvcfile(dvc, "dvc.yaml") dvcfile.dump(stage, no_lock=True) assert not (tmp_dir / PIPELINE_FILE).exists() assert not (tmp_dir / PIPELINE_LOCK).exists() dvcfile.dump(stage, no_lock=False) assert not (tmp_dir / PIPELINE_FILE).exists() assert dvcfile._lockfile.load() dvcfile.dump(stage, update_pipeline=True, no_lock=False) assert (tmp_dir / PIPELINE_FILE).exists() assert (tmp_dir / PIPELINE_LOCK).exists() assert list(dvcfile.stages.values()) == [stage]
def imp_url( self, url, out=None, fname=None, erepo=None, frozen=True, no_exec=False, remote=None, to_remote=False, desc=None, jobs=None, ): from dvc.dvcfile import Dvcfile from dvc.stage import Stage, create_stage, restore_meta out = resolve_output(url, out) path, wdir, out = resolve_paths(self, out, always_local=to_remote and not out) if to_remote and no_exec: raise InvalidArgumentError( "--no-exec can't be combined with --to-remote") if not to_remote and remote: raise InvalidArgumentError( "--remote can't be used without --to-remote") # NOTE: when user is importing something from within their own repository if (erepo is None and os.path.exists(url) and path_isin(os.path.abspath(url), self.root_dir)): url = relpath(url, wdir) stage = create_stage( Stage, self, fname or path, wdir=wdir, deps=[url], outs=[out], erepo=erepo, ) restore_meta(stage) if desc: stage.outs[0].desc = desc dvcfile = Dvcfile(self, stage.path) dvcfile.remove() try: new_index = self.index.add(stage) new_index.check_graph() except OutputDuplicationError as exc: raise OutputDuplicationError(exc.output, set(exc.stages) - {stage}) if no_exec: stage.ignore_outs() elif to_remote: remote_odb = self.cloud.get_remote_odb(remote, "import-url") stage.outs[0].transfer(url, odb=remote_odb, jobs=jobs) stage.save_deps() stage.md5 = stage.compute_md5() else: stage.run(jobs=jobs) stage.frozen = frozen dvcfile.dump(stage) return stage
def run(self, fname=None, no_exec=False, single_stage=False, **kwargs): from dvc.dvcfile import PIPELINE_FILE, Dvcfile from dvc.stage import Stage, create_stage if not kwargs.get("cmd"): raise InvalidArgumentError("command is not specified") stage_cls = PipelineStage path = PIPELINE_FILE stage_name = kwargs.get("name") if stage_name and single_stage: raise InvalidArgumentError( "`-n|--name` is incompatible with `--single-stage`" ) if stage_name and fname: raise InvalidArgumentError( "`--file` is currently incompatible with `-n|--name` " "and requires `--single-stage`" ) if not stage_name and not single_stage: raise InvalidArgumentError("`-n|--name` is required") if single_stage: kwargs.pop("name", None) stage_cls = Stage path = fname or _get_file_path(kwargs) else: if not is_valid_name(stage_name): raise InvalidStageName params = parse_params(kwargs.pop("params", [])) stage = create_stage( stage_cls, repo=self, path=path, params=params, **kwargs ) if stage is None: return None dvcfile = Dvcfile(self, stage.path) try: if kwargs.get("force", True): with suppress(ValueError): self.stages.remove(stage) else: _check_stage_exists(dvcfile, stage) self.check_modified_graph([stage]) except OutputDuplicationError as exc: raise OutputDuplicationError(exc.output, set(exc.stages) - {stage}) if no_exec: stage.ignore_outs() else: stage.run( no_commit=kwargs.get("no_commit", False), run_cache=kwargs.get("run_cache", True), ) dvcfile.dump(stage, update_lock=not no_exec) return stage