def test_fill_from_lock_deps_outs(dvc, lock_data): stage = create_stage( PipelineStage, dvc, PIPELINE_FILE, deps=["foo"], outs=["bar"] ) for item in chain(stage.deps, stage.outs): assert not item.hash_info StageLoader.fill_from_lock(stage, lock_data) assert stage.deps[0].hash_info == HashInfo("md5", "foo_checksum") assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum")
def test_fill_from_lock_missing_params_section(dvc, lock_data): stage = create_stage( PipelineStage, dvc, PIPELINE_FILE, deps=["foo"], outs=["bar"], params=["lorem", "lorem.ipsum", {"myparams.yaml": ["ipsum"]}], ) params_deps = split_params_deps(stage)[0] StageLoader.fill_from_lock(stage, lock_data) assert not params_deps[0].hash_info and not params_deps[1].hash_info
def test_fill_from_lock_use_appropriate_checksum(dvc, lock_data): stage = create_stage( PipelineStage, dvc, PIPELINE_FILE, deps=["s3://dvc-temp/foo"], outs=["bar"], ) lock_data["deps"] = [{"path": "s3://dvc-temp/foo", "etag": "e-tag"}] StageLoader.fill_from_lock(stage, lock_data) assert stage.deps[0].hash_info == HashInfo("etag", "e-tag") assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum")
def _create_stage(self, cache): from dvc.stage import create_stage, PipelineStage stage = create_stage( PipelineStage, repo=self.repo, path="dvc.yaml", cmd=cache["cmd"], deps=[dep["path"] for dep in cache["deps"]], outs=[out["path"] for out in cache["outs"]], ) StageLoader.fill_from_lock(stage, cache) return stage
def test_fill_from_lock_missing_checksums(dvc, lock_data): stage = create_stage( PipelineStage, dvc, PIPELINE_FILE, deps=["foo", "foo1"], outs=["bar", "bar1"], ) StageLoader.fill_from_lock(stage, lock_data) assert stage.deps[0].hash_info == HashInfo("md5", "foo_checksum") assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum") assert not stage.deps[1].hash_info and not stage.outs[1].hash_info
def test_fill_from_lock_with_missing_sections(dvc, lock_data): stage = create_stage( PipelineStage, dvc, PIPELINE_FILE, deps=["foo"], outs=["bar"] ) lock = deepcopy(lock_data) del lock["deps"] StageLoader.fill_from_lock(stage, lock) assert not stage.deps[0].hash_info assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum") lock = deepcopy(lock_data) del lock["outs"] StageLoader.fill_from_lock(stage, lock) assert stage.deps[0].hash_info == HashInfo("md5", "foo_checksum") assert not stage.outs[0].hash_info
def test_load_stage_wdir_and_path_correctly(dvc, stage_data, lock_data): stage_data["wdir"] = "dir" dvcfile = Dvcfile(dvc, PIPELINE_FILE) stage = StageLoader.load_stage(dvcfile, "stage-1", stage_data, lock_data) assert stage.wdir == os.path.abspath("dir") assert stage.path == os.path.abspath(PIPELINE_FILE)
def test_load_stage_with_metrics_and_plots(dvc, stage_data, lock_data, typ): stage_data[typ] = stage_data.pop("outs") dvcfile = Dvcfile(dvc, PIPELINE_FILE) stage = StageLoader.load_stage(dvcfile, "stage-1", stage_data, lock_data) assert stage.outs[0].def_path == "bar" assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum")
def test_fill_from_lock_outs_isexec(dvc): stage = create_stage(PipelineStage, dvc, PIPELINE_FILE, outs=["foo"]) assert not stage.outs[0].meta.isexec StageLoader.fill_from_lock( stage, { "cmd": "command", "outs": [{"path": "foo", "md5": "foo_checksum", "isexec": True}], }, ) assert stage.outs[0].def_path == "foo" assert stage.outs[0].hash_info == HashInfo("md5", "foo_checksum") assert stage.outs[0].meta.isexec
def stages(self): data, _ = self._load() if self.repo.config["feature"]["parametrization"]: with log_durations(logger.debug, "resolving values"): resolver = DataResolver(data) data = resolver.resolve() lockfile_data = self._lockfile.load() return StageLoader(self, data.get("stages", {}), lockfile_data)
def test_load_stage_with_params(dvc, stage_data, lock_data): lock_data["params"] = {"params.yaml": {"lorem": "ipsum"}} stage_data["params"] = ["lorem"] dvcfile = Dvcfile(dvc, PIPELINE_FILE) stage = StageLoader.load_stage(dvcfile, "stage-1", stage_data, lock_data) params, deps = split_params_deps(stage) assert deps[0].def_path == "foo" and stage.outs[0].def_path == "bar" assert params[0].def_path == "params.yaml" assert params[0].hash_info == HashInfo("params", {"lorem": "ipsum"}) assert deps[0].hash_info == HashInfo("md5", "foo_checksum") assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum")
def test_load_stage(dvc, stage_data, lock_data): dvcfile = Dvcfile(dvc, PIPELINE_FILE) stage = StageLoader.load_stage(dvcfile, "stage-1", stage_data, lock_data) assert stage.wdir == os.path.abspath(os.curdir) assert stage.name == "stage-1" assert stage.cmd == "command" assert stage.path == os.path.abspath(PIPELINE_FILE) assert stage.deps[0].def_path == "foo" assert stage.deps[0].hash_info == HashInfo("md5", "foo_checksum") assert stage.outs[0].def_path == "bar" assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum")
def get_used_cache(self, used_run_cache, *args, **kwargs): from dvc.cache import NamedCache from dvc.stage import create_stage, PipelineStage cache = NamedCache() for key, value in used_run_cache: entry = self._load_cache(key, value) if not entry: continue stage = create_stage( PipelineStage, repo=self.repo, path="dvc.yaml", cmd=entry["cmd"], deps=[dep["path"] for dep in entry["deps"]], outs=[out["path"] for out in entry["outs"]], ) StageLoader.fill_from_lock(stage, entry) cache.update(stage.get_used_cache(*args, **kwargs)) return cache
def test_load_stage_no_lock(dvc, stage_data): dvcfile = Dvcfile(dvc, PIPELINE_FILE) stage = StageLoader.load_stage(dvcfile, "stage-1", stage_data) assert stage.deps[0].def_path == "foo" and stage.outs[0].def_path == "bar" assert not stage.deps[0].hash_info assert not stage.outs[0].hash_info
def test_load_stage_outs_with_flags(dvc, stage_data, lock_data): stage_data["outs"] = [{"foo": {"cache": False}}] dvcfile = Dvcfile(dvc, PIPELINE_FILE) stage = StageLoader.load_stage(dvcfile, "stage-1", stage_data, lock_data) assert stage.outs[0].use_cache is False
def restore(self, stage): cache = self._load(stage) if not cache: return StageLoader.fill_from_lock(stage, cache)
def stages(self): data, _ = self._load() lockfile_data = self._lockfile.load() return StageLoader(self, data.get("stages", {}), lockfile_data)
def test_load_stage_cmd_with_list(dvc, stage_data, lock_data): stage_data["cmd"] = ["cmd-0", "cmd-1"] dvcfile = Dvcfile(dvc, PIPELINE_FILE) stage = StageLoader.load_stage(dvcfile, "stage-1", stage_data, lock_data) assert stage.cmd == ["cmd-0", "cmd-1"]