def test_run_load_one_for_multistage_non_existing(tmp_dir, dvc): with pytest.raises(StageFileDoesNotExistError): assert Dvcfile(dvc, PIPELINE_FILE).stages.get("copy-foo-foo2")
def test_load_stage_outs_with_flags(dvc, stage_data, lock_data): stage_data["outs"] = [{"foo": {"cache": False}}] dvcfile = Dvcfile(dvc, PIPELINE_FILE) stage = StageLoader.load_stage(dvcfile, "stage-1", stage_data, lock_data) assert stage.outs[0].use_cache is False
def test_remove_stage(tmp_dir, dvc, run_copy): tmp_dir.gen("foo", "foo") stage = run_copy("foo", "bar", name="copy-foo-bar") stage2 = run_copy("bar", "foobar", name="copy-bar-foobar") dvc_file = Dvcfile(dvc, PIPELINE_FILE) assert dvc_file.exists() assert {"copy-bar-foobar", "copy-foo-bar"} == set(dvc_file._load()[0]["stages"].keys()) dvc_file.remove_stage(stage) assert ["copy-bar-foobar"] == list(dvc_file._load()[0]["stages"].keys()) # sanity check stage2.reload() # re-check to see if it fails if there's no stage entry dvc_file.remove_stage(stage) dvc_file.remove(force=True) # should not fail when there's no file at all. dvc_file.remove_stage(stage)
def test_remove_stage_dvcfiles(tmp_dir, dvc, run_copy): tmp_dir.gen("foo", "foo") stage = run_copy("foo", "bar", single_stage=True) dvc_file = Dvcfile(dvc, stage.path) assert dvc_file.exists() dvc_file.remove_stage(stage) assert not dvc_file.exists() # re-check to see if it fails if there's no stage entry dvc_file.remove_stage(stage) dvc_file.remove(force=True) # should not fail when there's no file at all. dvc_file.remove_stage(stage)
def run(self, fname=None, no_exec=False, single_stage=False, **kwargs): from dvc.dvcfile import PIPELINE_FILE, Dvcfile from dvc.stage import PipelineStage, Stage, create_stage, restore_meta if not kwargs.get("cmd"): raise InvalidArgumentError("command is not specified") stage_cls = PipelineStage path = PIPELINE_FILE stage_name = kwargs.get("name") if stage_name and single_stage: raise InvalidArgumentError( "`-n|--name` is incompatible with `--single-stage`") if stage_name and fname: raise InvalidArgumentError( "`--file` is currently incompatible with `-n|--name` " "and requires `--single-stage`") if not stage_name and not single_stage: raise InvalidArgumentError("`-n|--name` is required") if single_stage: kwargs.pop("name", None) stage_cls = Stage path = fname or _get_file_path(kwargs) else: if not is_valid_name(stage_name): raise InvalidStageName params = parse_params(kwargs.pop("params", [])) stage = create_stage(stage_cls, repo=self, path=path, params=params, **kwargs) restore_meta(stage) if kwargs.get("run_cache", True) and stage.can_be_skipped: return None dvcfile = Dvcfile(self, stage.path) try: if kwargs.get("force", True): with suppress(ValueError): self.stages.remove(stage) else: _check_stage_exists(dvcfile, stage) self.check_modified_graph([stage]) except OutputDuplicationError as exc: raise OutputDuplicationError(exc.output, set(exc.stages) - {stage}) if no_exec: stage.ignore_outs() else: stage.run( no_commit=kwargs.get("no_commit", False), run_cache=kwargs.get("run_cache", True), ) dvcfile.dump(stage, update_lock=not no_exec) return stage
def _collect_from_default_dvcfile(self, target): dvcfile = Dvcfile(self, PIPELINE_FILE) if dvcfile.exists(): return dvcfile.stages.get(target)
def test_run_dump_on_multistage(tmp_dir, dvc, run_head): from dvc.dvcfile import PIPELINE_FILE, Dvcfile tmp_dir.gen({ "dir": { "foo": "foo\nfoo", "bar": "bar\nbar", "foobar": "foobar\foobar", } }) dvc.run( cmd="cp foo foo2", deps=["foo"], name="copy-foo-foo2", wdir="dir", outs_persist=["foo2"], always_changed=True, ) data = Dvcfile(dvc, PIPELINE_FILE)._load()[0] assert data == { "stages": { "copy-foo-foo2": { "cmd": "cp foo foo2", "deps": ["foo"], "outs": [{ "foo2": { "persist": True } }], "always_changed": True, "wdir": "dir", }, }, } run_head( "foo", "bar", "foobar", name="head-files", outs=["bar-1"], outs_persist=["foo-1"], metrics_no_cache=["foobar-1"], wdir="dir", ) assert Dvcfile(dvc, PIPELINE_FILE)._load()[0] == { "stages": { "head-files": { "cmd": "python {} foo bar foobar".format( (tmp_dir / "head.py").resolve()), "wdir": "dir", "deps": ["bar", "foo", "foobar"], "outs": ["bar-1", { "foo-1": { "persist": True } }], "metrics": [{ "foobar-1": { "cache": False } }], }, **data["stages"], }, }
def test_update_recursive(tmp_dir, dvc, erepo_dir): with erepo_dir.branch("branch", new=True), erepo_dir.chdir(): erepo_dir.scm_gen( { "foo1": "text1", "foo2": "text2", "foo3": "text3" }, commit="add foo files", ) old_rev = erepo_dir.scm.get_rev() tmp_dir.gen({"dir": {"subdir": {}}}) stage1 = dvc.imp( os.fspath(erepo_dir), "foo1", os.path.join("dir", "foo1"), rev="branch", ) stage2 = dvc.imp( os.fspath(erepo_dir), "foo2", os.path.join("dir", "subdir", "foo2"), rev="branch", ) stage3 = dvc.imp( os.fspath(erepo_dir), "foo3", os.path.join("dir", "subdir", "foo3"), rev="branch", ) assert (tmp_dir / os.path.join("dir", "foo1")).read_text() == "text1" assert (tmp_dir / os.path.join("dir", "subdir", "foo2")).read_text() == "text2" assert (tmp_dir / os.path.join("dir", "subdir", "foo3")).read_text() == "text3" assert stage1.deps[0].def_repo["rev_lock"] == old_rev assert stage2.deps[0].def_repo["rev_lock"] == old_rev assert stage3.deps[0].def_repo["rev_lock"] == old_rev with erepo_dir.branch("branch", new=False), erepo_dir.chdir(): erepo_dir.scm_gen( { "foo1": "updated1", "foo2": "updated2", "foo3": "updated3" }, "", "update foo content", ) new_rev = erepo_dir.scm.get_rev() assert old_rev != new_rev dvc.update(["dir"], recursive=True) stage1 = Dvcfile(dvc, stage1.path).stage stage2 = Dvcfile(dvc, stage2.path).stage stage3 = Dvcfile(dvc, stage3.path).stage assert stage1.deps[0].def_repo["rev_lock"] == new_rev assert stage2.deps[0].def_repo["rev_lock"] == new_rev assert stage3.deps[0].def_repo["rev_lock"] == new_rev
def test_load_stage_no_lock(dvc, stage_data): dvcfile = Dvcfile(dvc, PIPELINE_FILE) stage = StageLoader.load_stage(dvcfile, "stage-1", stage_data) assert stage.deps[0].def_path == "foo" and stage.outs[0].def_path == "bar" assert not stage.deps[0].hash_info assert not stage.outs[0].hash_info
def test_load_stage_cmd_with_list(dvc, stage_data, lock_data): stage_data["cmd"] = ["cmd-0", "cmd-1"] dvcfile = Dvcfile(dvc, PIPELINE_FILE) stage = StageLoader.load_stage(dvcfile, "stage-1", stage_data, lock_data) assert stage.cmd == ["cmd-0", "cmd-1"]
def test_stage_load_on_non_file(tmp_dir, dvc, file): (tmp_dir / file).mkdir() dvcfile = Dvcfile(dvc, file) with pytest.raises(StageFileIsNotDvcFileError): assert dvcfile.stages.values()
def test_pipelines_single_stage_file(path): file_obj = Dvcfile(object(), path) assert isinstance(file_obj, SingleStageFile)
def test_pipelines_file(path): file_obj = Dvcfile(object(), path) assert isinstance(file_obj, PipelineFile)