Example #1
0
def test_repro_when_new_out_overlaps_others_stage_outs(tmp_dir, dvc):
    from dvc.exceptions import OverlappingOutputPathsError

    tmp_dir.gen({"dir": {"file1": "file1"}, "foo": "foo"})
    dvc.add("dir")
    dump_yaml(
        PIPELINE_FILE,
        {
            "stages": {
                "run-copy": {
                    "cmd": "python copy {} {}".format("foo", "dir/foo"),
                    "deps": ["foo"],
                    "outs": ["dir/foo"],
                }
            }
        },
    )
    with pytest.raises(OverlappingOutputPathsError):
        dvc.reproduce(":run-copy")
Example #2
0
def test_repro_when_new_outs_added_does_not_exist(tmp_dir, dvc):
    from dvc.exceptions import ReproductionError

    tmp_dir.gen("copy.py", COPY_SCRIPT)
    tmp_dir.gen("foo", "foo")
    dump_yaml(
        PIPELINE_FILE,
        {
            "stages": {
                "run-copy": {
                    "cmd": "python copy.py {} {}".format("foo", "foobar"),
                    "deps": ["foo"],
                    "outs": ["foobar", "bar"],
                }
            }
        },
    )
    with pytest.raises(ReproductionError):
        dvc.reproduce(":run-copy")
Example #3
0
def test_remove_stage_on_lockfile_format_error(tmp_dir, dvc, run_copy):
    tmp_dir.gen("foo", "foo")
    stage = run_copy("foo", "bar", name="copy-foo-bar")
    dvc_file = Dvcfile(dvc, stage.path)
    lock_file = dvc_file._lockfile

    data = dvc_file._load()[0]
    lock_data = lock_file.load()
    lock_data["gibberish"] = True
    data["gibberish"] = True
    dump_yaml(lock_file.relpath, lock_data)
    with pytest.raises(StageFileFormatError):
        dvc_file.remove_stage(stage)

    lock_file.remove()
    dvc_file.dump(stage)

    dump_yaml(dvc_file.relpath, data)
    with pytest.raises(StageFileFormatError):
        dvc_file.remove_stage(stage)
Example #4
0
def test_repro_when_new_outs_is_added_in_dvcfile(tmp_dir, dvc):
    from dvc.dvcfile import Dvcfile

    tmp_dir.gen("copy.py", COPY_SCRIPT)
    tmp_dir.gen({"foo": "foo", "bar": "bar"})
    stage = dvc.run(
        cmd="python copy.py {} {}".format("foo", "foobar"),
        outs=[],  # scenario where user forgot to add
        deps=["foo"],
        name="copy-file",
    )
    target = ":copy-file"
    assert not dvc.reproduce(target)

    dvcfile = Dvcfile(dvc, stage.path)
    data, _ = dvcfile._load()
    data["stages"]["copy-file"]["outs"] = ["foobar"]
    dump_yaml(stage.path, data)

    assert dvc.reproduce(target)[0] == stage
Example #5
0
def test_repro_when_new_deps_is_added_in_dvcfile(tmp_dir, dvc, run_copy):
    from dvc.dvcfile import Dvcfile

    tmp_dir.gen("copy.py", COPY_SCRIPT)
    tmp_dir.gen({"foo": "foo", "bar": "bar"})
    stage = dvc.run(
        cmd="python copy.py {} {}".format("foo", "foobar"),
        outs=["foobar"],
        deps=["foo"],
        name="copy-file",
    )
    target = PIPELINE_FILE + ":copy-file"
    assert not dvc.reproduce(target)

    dvcfile = Dvcfile(dvc, stage.path)
    data, _ = dvcfile._load()
    data["stages"]["copy-file"]["deps"] += ["copy.py"]
    dump_yaml(stage.path, data)

    assert dvc.reproduce(target)[0] == stage
Example #6
0
def test_repro_when_cmd_changes(tmp_dir, dvc, run_copy, mocker):
    from dvc.dvcfile import SingleStageFile

    tmp_dir.gen("foo", "foo")
    stage = run_copy("foo", "bar", single_stage=True)
    assert not dvc.reproduce(stage.addressing)

    from dvc.stage.run import cmd_run

    m = mocker.patch("dvc.stage.run.cmd_run", wraps=cmd_run)

    data = SingleStageFile(dvc, stage.path)._load()[0]
    data["cmd"] = "  ".join(stage.cmd.split())  # change cmd spacing by two
    dump_yaml(stage.path, data)

    assert dvc.status([stage.addressing]) == {
        stage.addressing: ["changed checksum"]
    }
    assert dvc.reproduce(stage.addressing)[0] == stage
    m.assert_called_once_with(stage)
Example #7
0
    def test(self):
        stages = self.dvc.add(self.FOO)
        self.assertEqual(len(stages), 1)
        stage = stages[0]
        self.assertTrue(stage is not None)

        d = load_yaml(stage.relpath)

        # NOTE: checking that reloaded stage didn't change its checksum
        md5 = "11111111111111111111111111111111"
        d[stage.PARAM_MD5] = md5
        dump_yaml(stage.relpath, d)

        dvcfile = SingleStageFile(self.dvc, stage.relpath)
        stage = dvcfile.stage

        self.assertTrue(stage is not None)
        dvcfile.dump(stage)

        d = load_yaml(stage.relpath)
        self.assertEqual(d[stage.PARAM_MD5], md5)
Example #8
0
    def _test(self):
        url = Local.get_url()
        self.main(["remote", "add", "-d", TEST_REMOTE, url])

        stage = self.dvc.run(outs=["bar"],
                             cmd="echo bar > bar",
                             single_stage=True)
        self.main(["push"])

        stage_file_path = stage.relpath
        content = load_yaml(stage_file_path)
        del content["outs"][0]["md5"]
        dump_yaml(stage_file_path, content)

        with self._caplog.at_level(logging.WARNING, logger="dvc"):
            self._caplog.clear()
            self.main(["status", "-c"])
            expected_warning = (
                "Output 'bar'(stage: 'bar.dvc') is missing version info. "
                "Cache for it will not be collected. "
                "Use `dvc repro` to get your pipeline up to date.")

            assert expected_warning in self._caplog.text
Example #9
0
def test_repro_when_lockfile_gets_deleted(tmp_dir, dvc):
    tmp_dir.gen("copy.py", COPY_SCRIPT)
    tmp_dir.gen("foo", "foo")
    dump_yaml(
        PIPELINE_FILE,
        {
            "stages": {
                "run-copy": {
                    "cmd": "python copy.py {} {}".format("foo", "foobar"),
                    "deps": ["foo"],
                    "outs": ["foobar"],
                }
            }
        },
    )
    assert dvc.reproduce(":run-copy")
    assert os.path.exists(PIPELINE_LOCK)

    assert not dvc.reproduce(":run-copy")
    os.unlink(PIPELINE_LOCK)
    stages = dvc.reproduce(":run-copy")
    assert (stages and stages[0].relpath == PIPELINE_FILE
            and stages[0].name == "run-copy")
Example #10
0
    def _dump_pipeline_file(self, stage):
        data = {}
        if self.exists():
            with open(self.path) as fd:
                data = parse_yaml_for_update(fd.read(), self.path)
        else:
            logger.info("Creating '%s'", self.relpath)
            open(self.path, "w+").close()

        data["stages"] = data.get("stages", {})
        stage_data = serialize.to_pipeline_file(stage)
        if data["stages"].get(stage.name):
            orig_stage_data = data["stages"][stage.name]
            apply_diff(stage_data[stage.name], orig_stage_data)
        else:
            data["stages"].update(stage_data)

        logger.info(
            "Adding stage '%s' to '%s'",
            stage.name,
            self.relpath,
        )
        dump_yaml(self.path, data)
        self.repo.scm.track_file(self.relpath)
Example #11
0
    def save(self, stage):
        cache_key = _get_stage_hash(stage)
        if not cache_key:
            return

        cache = to_single_stage_lockfile(stage)
        cache_value = _get_cache_hash(cache)

        existing_cache = self._load_cache(cache_key, cache_value)
        cache = existing_cache or cache

        for out in self._uncached_outs(stage, cache):
            out.commit()

        if existing_cache:
            return

        # sanity check
        COMPILED_LOCK_FILE_STAGE_SCHEMA(cache)

        path = self._get_cache_path(cache_key, cache_value)
        dpath = os.path.dirname(path)
        makedirs(dpath, exist_ok=True)
        dump_yaml(path, cache)
Example #12
0
    def test(self):
        self._run(
            deps=[self.FOO],
            outs=["bar.txt"],
            cmd="echo bar > bar.txt",
            name="copybarbar-txt",
        )

        self._run(
            deps=["bar.txt"],
            outs=["baz.txt"],
            cmd="echo baz > baz.txt",
            name="copybazbaz-txt",
        )

        stage_dump = {
            "cmd": "echo baz > foo",
            "deps": [{"path": "baz.txt"}],
            "outs": [{"path": self.FOO}],
        }
        dump_yaml("cycle.dvc", stage_dump)

        with self.assertRaises(CyclicGraphError):
            self.dvc.reproduce("cycle.dvc")