Exemple #1
0
    def test(self):
        # File structure:
        #       .
        #       |-- dir1
        #       |  |__ dir2.dvc         (out.path == ../dir2)
        #       |__ dir2
        #           |__ something.dvc    (stage.cwd == ./dir2)

        os.mkdir(os.path.join(self.dvc.root_dir, "dir1"))

        self.dvc.run(
            cwd="dir1",
            outs=["../dir2"],
            cmd="mkdir {path}".format(path=os.path.join("..", "dir2")),
        )

        faulty_stage_path = os.path.join("dir2", "something.dvc")

        output = os.path.join("..", "something")
        stage_dump = {
            "cmd": "echo something > {}".format(output),
            "outs": [{
                "path": output
            }],
        }
        dump_stage_file(faulty_stage_path, stage_dump)

        with self.assertRaises(StagePathAsOutputError):
            self.dvc.reproduce(faulty_stage_path)
Exemple #2
0
    def test(self):
        d = load_stage_file(self.file1_stage)
        del d[Stage.PARAM_OUTS][0][RemoteLOCAL.PARAM_CHECKSUM]
        del d[Stage.PARAM_DEPS][0][RemoteLOCAL.PARAM_CHECKSUM]
        dump_stage_file(self.file1_stage, d)

        self.dvc.checkout(force=True)
Exemple #3
0
    def dump(self):
        fname = self.path

        self._check_dvc_filename(fname)

        logger.debug(
            "Saving information to '{file}'.".format(file=relpath(fname)))
        state = self.dumpd()

        # When we load a stage we parse yaml with a fast parser, which strips
        # off all the comments and formatting. To retain those on update we do
        # a trick here:
        # - reparse the same yaml text with a slow but smart ruamel yaml parser
        # - apply changes to a returned structure
        # - serialize it
        if self._stage_text is not None:
            saved_state = parse_stage_for_update(self._stage_text, fname)
            # Stage doesn't work with meta in any way, so .dumpd() doesn't
            # have it. We simply copy it over.
            if "meta" in saved_state:
                state["meta"] = saved_state["meta"]
            apply_diff(state, saved_state)
            state = saved_state

        dump_stage_file(fname, state)

        self.repo.scm.track_file(relpath(fname))
Exemple #4
0
def test_repro_when_lockfile_gets_deleted(tmp_dir, dvc):
    tmp_dir.gen("copy.py", COPY_SCRIPT)
    tmp_dir.gen("foo", "foo")
    dump_stage_file(
        PIPELINE_FILE,
        {
            "stages": {
                "run-copy": {
                    "cmd": "python copy.py {} {}".format("foo", "foobar"),
                    "deps": ["foo"],
                    "outs": ["foobar"],
                }
            }
        },
    )
    assert dvc.reproduce(":run-copy")
    assert os.path.exists(PIPELINE_LOCK)

    assert not dvc.reproduce(":run-copy")
    os.unlink(PIPELINE_LOCK)
    stages = dvc.reproduce(":run-copy")
    assert (
        stages
        and stages[0].relpath == PIPELINE_FILE
        and stages[0].name == "run-copy"
    )
Exemple #5
0
def test_repro_when_new_deps_is_moved(tmp_dir, dvc):
    from dvc.dvcfile import Dvcfile

    tmp_dir.gen("copy.py", COPY_SCRIPT)
    tmp_dir.gen({"foo": "foo", "bar": "foo"})
    stage = dvc.run(
        cmd="python copy.py {} {}".format("foo", "foobar"),
        outs=["foobar"],
        deps=["foo"],
        name="copy-file",
    )
    target = ":copy-file"
    assert not dvc.reproduce(target)

    tmp_dir.gen("copy.py", COPY_SCRIPT_FORMAT.format("'bar'", "'foobar'"))
    from shutil import move

    move("foo", "bar")

    dvcfile = Dvcfile(dvc, stage.path)
    data, _ = dvcfile._load()
    data["stages"]["copy-file"]["deps"] = ["bar"]
    dump_stage_file(stage.path, data)

    assert dvc.reproduce(target)[0] == stage
Exemple #6
0
    def test(self):
        d = load_stage_file(self.file1_stage)
        del d[Stage.PARAM_OUTS][0][LocalRemote.PARAM_CHECKSUM]
        del d[Stage.PARAM_DEPS][0][LocalRemote.PARAM_CHECKSUM]
        dump_stage_file(self.file1_stage, d)

        with pytest.raises(CheckoutError):
            self.dvc.checkout(force=True)
Exemple #7
0
    def test(self):
        d = load_stage_file(self.file1_stage)
        del d[Stage.PARAM_OUTS][0][RemoteLOCAL.PARAM_CHECKSUM]
        del d[Stage.PARAM_DEPS][0][RemoteLOCAL.PARAM_CHECKSUM]
        dump_stage_file(self.file1_stage, d)

        stages = self.dvc.reproduce(self.file1_stage)
        self.assertEqual(len(stages), 1)
Exemple #8
0
    def dump(self, stage, **kwargs):
        """Dumps given stage appropriately in the dvcfile."""
        from dvc.stage import PipelineStage

        assert not isinstance(stage, PipelineStage)
        check_dvc_filename(self.path)
        logger.debug(
            "Saving information to '{file}'.".format(file=relpath(self.path)))
        dump_stage_file(self.path, serialize.to_single_stage_file(stage))
        self.repo.scm.track_file(relpath(self.path))
Exemple #9
0
def test_commit_changed_md5(tmp_dir, dvc):
    tmp_dir.gen({"file": "file content"})
    (stage, ) = dvc.add("file", no_commit=True)

    stage_file_content = load_stage_file(stage.path)
    stage_file_content["md5"] = "1111111111"
    dump_stage_file(stage.path, stage_file_content)

    with pytest.raises(StageCommitError):
        dvc.commit(stage.path)

    dvc.commit(stage.path, force=True)
Exemple #10
0
    def dump(self):
        fname = self.path

        self._check_dvc_filename(fname)

        logger.info(
            "Saving information to '{file}'.".format(file=relpath(fname)))
        d = self.dumpd()
        apply_diff(d, self._state)
        dump_stage_file(fname, self._state)

        self.repo.scm.track_file(relpath(fname))
Exemple #11
0
    def dump(self, stage, **kwargs):
        stage_data = serialize.to_lockfile(stage)
        if not self.exists():
            data = stage_data
            open(self.path, "w+").close()
        else:
            with self.repo.tree.open(self.path, "r") as fd:
                data = parse_stage_for_update(fd.read(), self.path)
            data.update(stage_data)

        dump_stage_file(self.path, data)
        self.repo.scm.track_file(relpath(self.path))
Exemple #12
0
def test_commit_changed_md5(dvc_repo, repo_dir):
    stages = dvc_repo.add(repo_dir.FOO, no_commit=True)
    assert len(stages) == 1
    stage = stages[0]

    stage_file_content = load_stage_file(stage.path)
    stage_file_content["md5"] = "1111111111"
    dump_stage_file(stage.path, stage_file_content)

    with pytest.raises(StageCommitError):
        dvc_repo.commit(stage.path)

    dvc_repo.commit(stage.path, force=True)
Exemple #13
0
def test_meta_is_preserved(dvc_repo):
    stage, = dvc_repo.add("foo")

    # Add meta to stage file
    data = load_stage_file(stage.path)
    data["meta"] = {"custom_key": 42}
    dump_stage_file(stage.path, data)

    # Loading and dumping to test that it works and meta is retained
    new_stage = Stage.load(dvc_repo, stage.path)
    new_stage.dump()

    new_data = load_stage_file(stage.path)
    assert new_data["meta"] == data["meta"]
Exemple #14
0
def test_meta_is_preserved(tmp_dir, dvc):
    (stage, ) = tmp_dir.dvc_gen("foo", "foo content")

    # Add meta to DVC-file
    data = load_stage_file(stage.path)
    data["meta"] = {"custom_key": 42}
    dump_stage_file(stage.path, data)

    # Loading and dumping to test that it works and meta is retained
    new_stage = Stage.load(dvc, stage.path)
    new_stage.dump()

    new_data = load_stage_file(stage.path)
    assert new_data["meta"] == data["meta"]
Exemple #15
0
    def test_nested(self):
        from dvc.stage import Stage

        #
        #       .
        #       |-- a
        #       |  |__ nested
        #       |     |__ dir
        #       |       |__ error.dvc     (stage.cwd == 'a/nested/dir')
        #       |__ b
        #          |__ nested.dvc         (stage.out == 'a/nested')
        dir1 = "b"
        dir2 = "a"

        os.mkdir(dir1)
        os.mkdir(dir2)

        nested_dir = os.path.join(dir2, "nested")
        out_dir = os.path.relpath(nested_dir, dir1)

        nested_stage = self.dvc.run(
            cwd=dir1,  # b
            outs=[out_dir],  # ../a/nested
            cmd="mkdir {path}".format(path=out_dir),
        )

        os.mkdir(os.path.join(nested_dir, "dir"))

        error_stage_path = os.path.join(nested_dir, "dir", "error.dvc")

        output = os.path.join("..", "..", "something")
        stage_dump = {
            "cmd": "echo something > {}".format(output),
            "outs": [{
                "path": output
            }],
        }
        dump_stage_file(error_stage_path, stage_dump)

        # NOTE: os.walk() walks in a sorted order and we need dir2 subdirs to
        # be processed before dir1 to load error.dvc first.
        with patch.object(DvcRepo, "stages") as mock_stages:
            mock_stages.return_value = [
                nested_stage,
                Stage.load(self.dvc, error_stage_path),
            ]

            with self.assertRaises(StagePathAsOutputError):
                self.dvc.reproduce(error_stage_path)
Exemple #16
0
def test_cyclic_graph_error(tmp_dir, dvc, run_copy):
    tmp_dir.gen("foo", "foo")
    run_copy("foo", "bar", name="copy-foo-bar")
    run_copy("bar", "baz", name="copy-bar-baz")
    run_copy("baz", "foobar", name="copy-baz-foobar")

    with open(PIPELINE_FILE, "r") as f:
        data = parse_stage(f.read(), PIPELINE_FILE)
        data["stages"]["copy-baz-foo"] = {
            "cmd": "echo baz > foo",
            "deps": ["baz"],
            "outs": ["foo"],
        }
    dump_stage_file(PIPELINE_FILE, data)
    with pytest.raises(CyclicGraphError):
        dvc.reproduce(":copy-baz-foo")
Exemple #17
0
    def test_nested(self):
        #       .
        #       |-- a
        #       |  |__ nested
        #       |     |__ dir
        #       |       |__ error.dvc     (stage.cwd == 'a/nested/dir')
        #       |__ b
        #          |__ nested.dvc         (stage.out == 'a/nested')
        dir1 = "b"
        dir2 = "a"

        os.mkdir(dir1)
        os.mkdir(dir2)

        nested_dir = os.path.join(dir2, "nested")
        out_dir = relpath(nested_dir, dir1)

        nested_stage = self.dvc.run(
            fname=os.path.join(dir1, "b.dvc"),
            wdir=dir1,
            outs=[out_dir],  # ../a/nested
            cmd="mkdir {path}".format(path=out_dir),
        )

        os.mkdir(os.path.join(nested_dir, "dir"))

        error_stage_path = os.path.join(nested_dir, "dir", "error.dvc")

        output = os.path.join("..", "..", "something")
        stage_dump = {
            "cmd": "echo something > {}".format(output),
            "outs": [{"path": output}],
        }
        dump_stage_file(error_stage_path, stage_dump)

        # NOTE: os.walk() walks in a sorted order and we need dir2 subdirs to
        # be processed before dir1 to load error.dvc first.
        self.dvc.stages = [
            nested_stage,
            Stage.load(self.dvc, error_stage_path),
        ]

        with patch.object(self.dvc, "_reset"):  # to prevent `stages` resetting
            with self.assertRaises(StagePathAsOutputError):
                self.dvc.reproduce(error_stage_path)
Exemple #18
0
 def dump(self, stage, **kwargs):
     stage_data = serialize.to_lockfile(stage)
     if not self.exists():
         modified = True
         logger.info("Generating lock file '%s'", self.relpath)
         data = stage_data
         open(self.path, "w+").close()
     else:
         with self.repo.tree.open(self.path, "r") as fd:
             data = parse_stage_for_update(fd.read(), self.path)
         modified = data.get(stage.name, {}) != stage_data.get(
             stage.name, {})
         if modified:
             logger.info("Updating lock file '%s'", self.relpath)
         data.update(stage_data)
     dump_stage_file(self.path, data)
     if modified:
         self.repo.scm.track_file(self.relpath)
Exemple #19
0
    def test(self):
        self.dvc.run(
            deps=[self.FOO], outs=["bar.txt"], cmd="echo bar > bar.txt"
        )

        self.dvc.run(
            deps=["bar.txt"], outs=["baz.txt"], cmd="echo baz > baz.txt"
        )

        stage_dump = {
            "cmd": "echo baz > foo",
            "deps": [{"path": "baz.txt"}],
            "outs": [{"path": self.FOO}],
        }
        dump_stage_file("cycle.dvc", stage_dump)

        with self.assertRaises(CyclicGraphError):
            self.dvc.reproduce("cycle.dvc")
Exemple #20
0
    def _dump_pipeline_file(self, stage):
        data = {}
        if self.exists():
            with open(self.path) as fd:
                data = parse_stage_for_update(fd.read(), self.path)
        else:
            open(self.path, "w+").close()

        data["stages"] = data.get("stages", {})
        stage_data = serialize.to_pipeline_file(stage)
        if data["stages"].get(stage.name):
            orig_stage_data = data["stages"][stage.name]
            apply_diff(stage_data[stage.name], orig_stage_data)
        else:
            data["stages"].update(stage_data)

        dump_stage_file(self.path, data)
        self.repo.scm.track_file(relpath(self.path))
Exemple #21
0
    def save(self, stage):
        cache_key = _get_stage_hash(stage)
        if not cache_key:
            return

        cache = to_single_stage_lockfile(stage)
        cache_value = _get_cache_hash(cache)

        if self._load_cache(cache_key, cache_value):
            return

        # sanity check
        COMPILED_LOCK_FILE_STAGE_SCHEMA(cache)

        path = self._get_cache_path(cache_key, cache_value)
        dpath = os.path.dirname(path)
        makedirs(dpath, exist_ok=True)
        dump_stage_file(path, cache)
Exemple #22
0
def test_repro_when_new_outs_added_does_not_exist(tmp_dir, dvc):
    from dvc.exceptions import ReproductionError

    tmp_dir.gen("copy.py", COPY_SCRIPT)
    tmp_dir.gen("foo", "foo")
    dump_stage_file(
        PIPELINE_FILE,
        {
            "stages": {
                "run-copy": {
                    "cmd": "python copy {} {}".format("foo", "foobar"),
                    "deps": ["foo"],
                    "outs": ["foobar", "bar"],
                }
            }
        },
    )
    with pytest.raises(ReproductionError):
        dvc.reproduce(":run-copy")
Exemple #23
0
def test_repro_when_new_out_overlaps_others_stage_outs(tmp_dir, dvc):
    from dvc.exceptions import OverlappingOutputPathsError

    tmp_dir.gen({"dir": {"file1": "file1"}, "foo": "foo"})
    dvc.add("dir")
    dump_stage_file(
        PIPELINE_FILE,
        {
            "stages": {
                "run-copy": {
                    "cmd": "python copy {} {}".format("foo", "dir/foo"),
                    "deps": ["foo"],
                    "outs": ["dir/foo"],
                }
            }
        },
    )
    with pytest.raises(OverlappingOutputPathsError):
        dvc.reproduce(":run-copy")
Exemple #24
0
    def test(self):
        stages = self.dvc.add(self.FOO)
        self.assertEqual(len(stages), 1)
        stage = stages[0]
        self.assertTrue(stage is not None)

        d = load_stage_file(stage.relpath)

        # NOTE: checking that reloaded stage didn't change its checksum
        md5 = "11111111111111111111111111111111"
        d[stage.PARAM_MD5] = md5
        dump_stage_file(stage.relpath, d)

        stage = Stage.load(self.dvc, stage.relpath)
        self.assertTrue(stage is not None)
        stage.dump()

        d = load_stage_file(stage.relpath)
        self.assertEqual(d[stage.PARAM_MD5], md5)
Exemple #25
0
def test_repro_when_new_outs_is_added_in_dvcfile(tmp_dir, dvc):
    from dvc.dvcfile import Dvcfile

    tmp_dir.gen("copy.py", COPY_SCRIPT)
    tmp_dir.gen({"foo": "foo", "bar": "bar"})
    stage = dvc.run(
        cmd="python copy.py {} {}".format("foo", "foobar"),
        outs=[],  # scenario where user forgot to add
        deps=["foo"],
        name="copy-file",
    )
    target = ":copy-file"
    assert not dvc.reproduce(target)

    dvcfile = Dvcfile(dvc, stage.path)
    data, _ = dvcfile._load()
    data["stages"]["copy-file"]["outs"] = ["foobar"]
    dump_stage_file(stage.path, data)

    assert dvc.reproduce(target)[0] == stage
Exemple #26
0
def test_repro_when_new_deps_is_added_in_dvcfile(tmp_dir, dvc, run_copy):
    from dvc.dvcfile import Dvcfile

    tmp_dir.gen("copy.py", COPY_SCRIPT)
    tmp_dir.gen({"foo": "foo", "bar": "bar"})
    stage = dvc.run(
        cmd="python copy.py {} {}".format("foo", "foobar"),
        outs=["foobar"],
        deps=["foo"],
        name="copy-file",
    )
    target = PIPELINE_FILE + ":copy-file"
    assert not dvc.reproduce(target)

    dvcfile = Dvcfile(dvc, stage.path)
    data, _ = dvcfile._load()
    data["stages"]["copy-file"]["deps"] += ["copy.py"]
    dump_stage_file(stage.path, data)

    assert dvc.reproduce(target)[0] == stage
Exemple #27
0
    def _test(self):
        url = Local.get_url()
        self.main(["remote", "add", "-d", TEST_REMOTE, url])

        stage = self.dvc.run(outs=["bar"], cmd="echo bar > bar")
        self.main(["push"])

        stage_file_path = stage.relpath
        content = load_stage_file(stage_file_path)
        del content["outs"][0]["md5"]
        dump_stage_file(stage_file_path, content)

        with self._caplog.at_level(logging.WARNING, logger="dvc"):
            self._caplog.clear()
            self.main(["status", "-c"])
            expected_warning = (
                "Output 'bar'(Stage: 'bar.dvc') is missing version info."
                " Cache for it will not be collected."
                " Use dvc repro to get your pipeline up to date.")

            assert expected_warning in self._caplog.text
Exemple #28
0
    def test_ignored_in_checksum(self):
        stage = self.dvc.run(
            cmd="echo test > {}".format(self.FOO),
            deps=[self.BAR],
            outs=[self.FOO],
        )

        d = stage.dumpd()
        self.assertEqual(d[stage.PARAM_WDIR], ".")

        d = load_stage_file(stage.relpath)
        self.assertEqual(d[stage.PARAM_WDIR], ".")

        del d[stage.PARAM_WDIR]
        dump_stage_file(stage.relpath, d)

        d = load_stage_file(stage.relpath)
        self.assertIsNone(d.get(stage.PARAM_WDIR))

        with self.dvc.state:
            stage = Stage.load(self.dvc, stage.relpath)
            self.assertFalse(stage.changed())
Exemple #29
0
    def test_similar_paths(self):
        # File structure:
        #
        #       .
        #       |-- something.dvc   (out.path == something)
        #       |-- something
        #       |__ something-1
        #          |-- a
        #          |__ a.dvc        (stage.cwd == something-1)

        self.dvc.run(outs=["something"], cmd="mkdir something")

        os.mkdir("something-1")

        stage = os.path.join("something-1", "a.dvc")

        stage_dump = {"cmd": "echo a > a", "outs": [{"path": "a"}]}
        dump_stage_file(stage, stage_dump)

        try:
            self.dvc.reproduce(stage)
        except StagePathAsOutputError:
            self.fail("should not raise StagePathAsOutputError")