def test_remove_stage_removes_dvcfiles_if_no_stages_left( tmp_dir, dvc, run_copy): tmp_dir.gen("foo", "foo") run_copy("foo", "bar", name="run_copy") dvc_file = Dvcfile(dvc, PIPELINE_FILE) assert dvc_file.exists() assert (tmp_dir / PIPELINE_LOCK).exists() assert (tmp_dir / "foo").exists() dvc_file.remove_stage(dvc_file.stages["run_copy"]) assert not dvc_file.exists() assert not (tmp_dir / PIPELINE_LOCK).exists()
def test_remove_stage_dvcfiles(tmp_dir, dvc, run_copy): tmp_dir.gen("foo", "foo") stage = run_copy("foo", "bar", single_stage=True) dvc_file = Dvcfile(dvc, stage.path) assert dvc_file.exists() dvc_file.remove_stage(stage) assert not dvc_file.exists() # re-check to see if it fails if there's no stage entry dvc_file.remove_stage(stage) dvc_file.remove(force=True) # should not fail when there's no file at all. dvc_file.remove_stage(stage)
def test_remove_stage_preserves_comment(tmp_dir, dvc, run_copy): tmp_dir.gen( "dvc.yaml", textwrap.dedent("""\ stages: generate-foo: cmd: "echo foo > foo" # This copies 'foo' text to 'foo' file. outs: - foo copy-foo-bar: cmd: "python copy.py foo bar" deps: - foo outs: - bar"""), ) dvc.reproduce(PIPELINE_FILE) dvc_file = Dvcfile(dvc, PIPELINE_FILE) assert dvc_file.exists() assert (tmp_dir / PIPELINE_LOCK).exists() assert (tmp_dir / "foo").exists() assert (tmp_dir / "bar").exists() dvc_file.remove_stage(dvc_file.stages["copy-foo-bar"]) assert ("# This copies 'foo' text to 'foo' file." in (tmp_dir / PIPELINE_FILE).read_text())
def run(self, fname=None, no_exec=False, **kwargs): from dvc.stage import PipelineStage, Stage, create_stage from dvc.dvcfile import Dvcfile, PIPELINE_FILE stage_cls = PipelineStage path = PIPELINE_FILE stage_name = kwargs.get("name") if not stage_name: kwargs.pop("name", None) stage_cls = Stage path = fname or _get_file_path(kwargs) else: if not is_valid_name(stage_name): raise InvalidStageName stage = create_stage(stage_cls, repo=self, path=path, **kwargs) if stage is None: return None dvcfile = Dvcfile(self, stage.path) if dvcfile.exists(): if stage_name and stage_name in dvcfile.stages: raise DuplicateStageName(stage_name, dvcfile) if stage_cls != PipelineStage: dvcfile.remove_with_prompt(force=kwargs.get("overwrite", True)) try: self.check_modified_graph([stage]) except OutputDuplicationError as exc: raise OutputDuplicationError(exc.output, set(exc.stages) - {stage}) if not no_exec: stage.run(no_commit=kwargs.get("no_commit", False)) dvcfile.dump(stage, update_pipeline=True) return stage
def run(self, fname=None, no_exec=False, single_stage=False, **kwargs): from dvc.stage import PipelineStage, Stage, create_stage from dvc.dvcfile import Dvcfile, PIPELINE_FILE if not kwargs.get("cmd"): raise InvalidArgumentError("command is not specified") stage_cls = PipelineStage path = PIPELINE_FILE stage_name = kwargs.get("name") if stage_name and single_stage: raise InvalidArgumentError( "`-n|--name` is incompatible with `--single-stage`") if not stage_name and not single_stage: raise InvalidArgumentError("`-n|--name` is required") if single_stage: kwargs.pop("name", None) stage_cls = Stage path = fname or _get_file_path(kwargs) else: if not is_valid_name(stage_name): raise InvalidStageName params = parse_params(kwargs.pop("params", [])) stage = create_stage(stage_cls, repo=self, path=path, params=params, **kwargs) if stage is None: return None dvcfile = Dvcfile(self, stage.path) if dvcfile.exists(): if kwargs.get("overwrite", True): dvcfile.remove_stage(stage) elif stage_cls != PipelineStage: raise StageFileAlreadyExistsError(dvcfile.relpath) elif stage_name and stage_name in dvcfile.stages: raise DuplicateStageName(stage_name, dvcfile) try: self.check_modified_graph([stage]) except OutputDuplicationError as exc: raise OutputDuplicationError(exc.output, set(exc.stages) - {stage}) if no_exec: stage.ignore_outs() else: stage.run( no_commit=kwargs.get("no_commit", False), run_cache=kwargs.get("run_cache", True), ) dvcfile.dump(stage, update_pipeline=True, no_lock=no_exec) return stage
def test_stage_load_on_not_existing_file(tmp_dir, dvc, file): dvcfile = Dvcfile(dvc, file) assert not dvcfile.exists() with pytest.raises(StageFileDoesNotExistError): assert dvcfile.stages.values() (tmp_dir / file).mkdir() with pytest.raises(StageFileIsNotDvcFileError): assert dvcfile.stages.values()
def test_stage_load_on_not_existing_file(tmp_dir, dvc, file, is_dvcignored): dvcfile = Dvcfile(dvc, file) if is_dvcignored: (tmp_dir / ".dvcignore").write_text(file) assert not dvcfile.exists() with pytest.raises(StageFileDoesNotExistError) as exc_info: assert dvcfile.stages.values() assert str(exc_info.value) == f"'{file}' does not exist"
def create(repo, path, **kwargs): from dvc.dvcfile import Dvcfile wdir = kwargs.get("wdir", None) or os.curdir wdir = os.path.abspath(wdir) path = os.path.abspath(path) Dvcfile.check_dvc_filename(path) Stage._check_stage_path(repo, wdir, is_wdir=kwargs.get("wdir")) Stage._check_stage_path(repo, os.path.dirname(path)) stage = Stage( repo=repo, path=path, wdir=wdir, cmd=kwargs.get("cmd", None), locked=kwargs.get("locked", False), always_changed=kwargs.get("always_changed", False), ) stage._fill_stage_outputs(**kwargs) stage._fill_stage_dependencies(**kwargs) stage._check_circular_dependency() stage._check_duplicated_arguments() Dvcfile.check_dvc_filename(path) dvcfile = Dvcfile(stage.repo, stage.path) if dvcfile.exists(): has_persist_outs = any(out.persist for out in stage.outs) ignore_build_cache = (kwargs.get("ignore_build_cache", False) or has_persist_outs) if has_persist_outs: logger.warning( "Build cache is ignored when persisting outputs.") if not ignore_build_cache and stage.can_be_skipped: logger.info("Stage is cached, skipping.") return None return stage
def test_remove_stage(tmp_dir, dvc, run_copy): tmp_dir.gen("foo", "foo") stage = run_copy("foo", "bar", name="copy-foo-bar") stage2 = run_copy("bar", "foobar", name="copy-bar-foobar") dvc_file = Dvcfile(dvc, PIPELINE_FILE) assert dvc_file.exists() assert {"copy-bar-foobar", "copy-foo-bar"} == set(dvc_file._load()[0]["stages"].keys()) dvc_file.remove_stage(stage) assert ["copy-bar-foobar"] == list(dvc_file._load()[0]["stages"].keys()) # sanity check stage2.reload() # re-check to see if it fails if there's no stage entry dvc_file.remove_stage(stage) dvc_file.remove(force=True) # should not fail when there's no file at all. dvc_file.remove_stage(stage)
def _collect_from_default_dvcfile(self, target): dvcfile = Dvcfile(self, PIPELINE_FILE) if dvcfile.exists(): return dvcfile.stages.get(target)