Esempio n. 1
0
    def is_hardlink(path):
        path = fspath(path)

        if System.is_unix():
            return os.stat(path).st_nlink > 1

        info = System._getdirinfo(path)
        return info.nNumberOfLinks > 1
Esempio n. 2
0
def test_non_cached_output(tmp_dir, erepo_dir, monkeypatch):
    src = "non_cached_file"
    dst = src + "_imported"

    with monkeypatch.context() as m:
        m.chdir(fspath(erepo_dir))
        erepo_dir.dvc.run(
            outs_no_cache=[src], cmd="echo hello > non_cached_file"
        )
        erepo_dir.scm.add([src, src + ".dvc"])
        erepo_dir.scm.commit("add non-cached output")

    Repo.get(fspath(erepo_dir), src, dst)

    assert (tmp_dir / dst).is_file()
    # NOTE: using strip() to account for `echo` differences on win and *nix
    assert (tmp_dir / dst).read_text().strip() == "hello"
Esempio n. 3
0
def test_import_rev(tmp_dir, scm, dvc, erepo_dir):
    with erepo_dir.chdir(), erepo_dir.branch("branch", new=True):
        erepo_dir.dvc_gen("foo", "foo content", commit="create foo on branch")

    dvc.imp(fspath(erepo_dir), "foo", "foo_imported", rev="branch")

    assert (tmp_dir / "foo_imported").read_text() == "foo content"
    assert scm.repo.git.check_ignore("foo_imported")
Esempio n. 4
0
def test_get_repo_file(tmp_dir, erepo_dir):
    with erepo_dir.chdir():
        erepo_dir.dvc_gen("file", "contents", commit="create file")

    Repo.get(fspath(erepo_dir), "file", "file_imported")

    assert os.path.isfile("file_imported")
    assert (tmp_dir / "file_imported").read_text() == "contents"
Esempio n. 5
0
def test_import_git_file(erepo_dir, tmp_dir, dvc, scm, src_is_dvc):
    if not src_is_dvc:
        erepo_dir.dvc.scm.repo.index.remove([".dvc"], r=True)
        erepo_dir.dvc.scm.commit("remove .dvc")

    src = "some_file"
    dst = "some_file_imported"

    erepo_dir.scm_gen({src: "hello"}, commit="add a regular file")

    tmp_dir.dvc.imp(fspath(erepo_dir), src, dst)

    assert (tmp_dir / dst).is_file()
    assert filecmp.cmp(fspath(erepo_dir / src),
                       fspath(tmp_dir / dst),
                       shallow=False)
    assert tmp_dir.scm.repo.git.check_ignore(fspath(tmp_dir / dst))
Esempio n. 6
0
def test_should_remove_outs_before_import(tmp_dir, dvc, mocker, erepo_dir):
    erepo_dir.gen({"foo": "foo"})

    remove_outs_call_counter = mocker.spy(Stage, "remove_outs")
    ret = main(["import-url", fspath(erepo_dir / "foo")])

    assert ret == 0
    assert remove_outs_call_counter.mock.call_count == 1
Esempio n. 7
0
def local_remote(request, tmp_dir, dvc, make_tmp_dir):
    path = make_tmp_dir("local-remote")
    with dvc.config.edit() as conf:
        conf["remote"]["upstream"] = {"url": fspath(path)}
        conf["core"]["remote"] = "upstream"
    if "scm" in request.fixturenames:
        tmp_dir.scm_add([dvc.config.files["repo"]], commit="add remote")
    return path
Esempio n. 8
0
def test_pull_non_workspace(tmp_dir, scm, dvc, erepo_dir):
    with erepo_dir.chdir():
        erepo_dir.dvc_gen("foo", "master content", commit="create foo")

        with erepo_dir.branch("branch", new=True):
            erepo_dir.dvc_gen("foo", "branch content", commit="modify foo")

    stage = dvc.imp(fspath(erepo_dir), "foo", "foo_imported", rev="branch")
    tmp_dir.scm_add([stage.relpath], commit="imported branch")
    scm.tag("ref-to-branch")

    # Overwrite via import
    dvc.imp(fspath(erepo_dir), "foo", "foo_imported", rev="master")

    os.remove(stage.outs[0].cache_path)
    dvc.fetch(all_tags=True)
    assert os.path.exists(stage.outs[0].cache_path)
Esempio n. 9
0
def test_update_import(tmp_dir, dvc, erepo_dir, cached):
    gen = erepo_dir.dvc_gen if cached else erepo_dir.scm_gen

    with erepo_dir.branch("branch", new=True), erepo_dir.chdir():
        gen("version", "branch", "add version file")
        old_rev = erepo_dir.scm.get_rev()

    stage = dvc.imp(fspath(erepo_dir), "version", "version", rev="branch")

    imported = tmp_dir / "version"
    assert imported.is_file()
    assert imported.read_text() == "branch"
    assert stage.deps[0].def_repo == {
        "url": fspath(erepo_dir),
        "rev": "branch",
        "rev_lock": old_rev,
    }

    with erepo_dir.branch("branch", new=False), erepo_dir.chdir():
        gen("version", "updated", "update version content")
        new_rev = erepo_dir.scm.get_rev()

    assert old_rev != new_rev

    # Caching in external repos doesn't see upstream updates within single
    # cli call, so we need to clean the caches to see the changes.
    clean_repos()

    status, = dvc.status([stage.path])["version.dvc"]
    changed_dep, = list(status["changed deps"].items())
    assert changed_dep[0].startswith("version ")
    assert changed_dep[1] == "update available"

    dvc.update(stage.path)

    assert dvc.status([stage.path]) == {}

    assert imported.is_file()
    assert imported.read_text() == "updated"

    stage = Stage.load(dvc, stage.path)
    assert stage.deps[0].def_repo == {
        "url": fspath(erepo_dir),
        "rev": "branch",
        "rev_lock": new_rev,
    }
Esempio n. 10
0
def dvc(tmp_dir, request):
    from dvc.repo import Repo

    if "scm" in request.fixturenames:
        if not hasattr(tmp_dir, "scm"):
            _git_init()

        dvc = Repo.init(fspath(tmp_dir))
        dvc.scm.commit("init dvc")
    else:
        dvc = Repo.init(fspath(tmp_dir), no_scm=True)

    try:
        tmp_dir.dvc = dvc
        yield dvc
    finally:
        dvc.close()
Esempio n. 11
0
def test_get_url_requires_dvc(tmp_dir, scm):
    tmp_dir.scm_gen({"foo": "foo"}, commit="initial")

    with pytest.raises(UrlNotDvcRepoError, match="not a DVC repository"):
        api.get_url("foo", repo=fspath(tmp_dir))

    with pytest.raises(UrlNotDvcRepoError):
        api.get_url("foo", repo="file://{}".format(tmp_dir))
Esempio n. 12
0
def test_get_url_positive(tmp_dir, erepo_dir, caplog):
    with erepo_dir.chdir():
        erepo_dir.dvc_gen("foo", "foo")

    caplog.clear()
    with caplog.at_level(logging.ERROR, logger="dvc"):
        assert main(["get", fspath(erepo_dir), "foo", "--show-url"]) == 0
        assert caplog.text == ""
Esempio n. 13
0
def test_cache_type_is_properly_overridden(tmp_dir, erepo_dir, monkeypatch):
    with monkeypatch.context() as m:
        m.chdir(fspath(erepo_dir))
        erepo_dir.dvc.config.set(
            Config.SECTION_CACHE, Config.SECTION_CACHE_TYPE, "symlink"
        )
        erepo_dir.dvc.cache = Cache(erepo_dir.dvc)
        erepo_dir.scm_add(
            [erepo_dir.dvc.config.config_file], "set cache type to symlinks"
        )
        erepo_dir.dvc_gen("file", "contents", "create file")
    assert System.is_symlink(erepo_dir / "file")

    Repo.get(fspath(erepo_dir), "file", "file_imported")

    assert not System.is_symlink("file_imported")
    assert (tmp_dir / "file_imported").read_text() == "contents"
Esempio n. 14
0
def test_update_import_after_remote_updates_to_dvc(tmp_dir, dvc, erepo_dir):
    old_rev = None
    with erepo_dir.branch("branch", new=True), erepo_dir.chdir():
        erepo_dir.scm_gen("version", "branch", commit="add version file")
        old_rev = erepo_dir.scm.get_rev()

    stage = dvc.imp(fspath(erepo_dir), "version", "version", rev="branch")

    imported = tmp_dir / "version"
    assert imported.is_file()
    assert imported.read_text() == "branch"
    assert stage.deps[0].def_repo == {
        "url": fspath(erepo_dir),
        "rev": "branch",
        "rev_lock": old_rev,
    }

    new_rev = None
    with erepo_dir.branch("branch", new=False), erepo_dir.chdir():
        erepo_dir.scm.repo.index.remove(["version"])
        erepo_dir.dvc_gen("version", "updated")
        erepo_dir.scm.add(["version", "version.dvc"])
        erepo_dir.scm.commit("upgrade to DVC tracking")
        new_rev = erepo_dir.scm.get_rev()

    assert old_rev != new_rev

    (status,) = dvc.status([stage.path])["version.dvc"]
    (changed_dep,) = list(status["changed deps"].items())
    assert changed_dep[0].startswith("version ")
    assert changed_dep[1] == "update available"

    dvc.update(stage.path)

    assert dvc.status([stage.path]) == {}

    assert imported.is_file()
    assert imported.read_text() == "updated"

    stage = Stage.load(dvc, stage.path)
    assert stage.deps[0].def_repo == {
        "url": fspath(erepo_dir),
        "rev": "branch",
        "rev_lock": new_rev,
    }
Esempio n. 15
0
def test_plot_multiple_revs(tmp_dir, scm, dvc):
    shutil.copy(fspath(tmp_dir / ".dvc" / "plot" / "default.json"),
                "template.json")

    metric_1 = [{"y": 2}, {"y": 3}]
    _write_json(tmp_dir, metric_1, "metric.json")
    _run_with_metric(tmp_dir, "metric.json", "init", "v1")

    metric_2 = [{"y": 3}, {"y": 5}]
    _write_json(tmp_dir, metric_2, "metric.json")
    _run_with_metric(tmp_dir, "metric.json", "second", "v2")

    metric_3 = [{"y": 5}, {"y": 6}]
    _write_json(tmp_dir, metric_3, "metric.json")
    _run_with_metric(tmp_dir, "metric.json", "third")

    plot_string = dvc.plot(
        "metric.json",
        template="template.json",
        revisions=["HEAD", "v2", "v1"],
    )

    plot_content = json.loads(plot_string)
    assert plot_content["data"]["values"] == [
        {
            "y": 5,
            PlotData.INDEX_FIELD: 0,
            "rev": "HEAD"
        },
        {
            "y": 6,
            PlotData.INDEX_FIELD: 1,
            "rev": "HEAD"
        },
        {
            "y": 3,
            PlotData.INDEX_FIELD: 0,
            "rev": "v2"
        },
        {
            "y": 5,
            PlotData.INDEX_FIELD: 1,
            "rev": "v2"
        },
        {
            "y": 2,
            PlotData.INDEX_FIELD: 0,
            "rev": "v1"
        },
        {
            "y": 3,
            PlotData.INDEX_FIELD: 1,
            "rev": "v1"
        },
    ]
    assert plot_content["encoding"]["x"]["field"] == PlotData.INDEX_FIELD
    assert plot_content["encoding"]["y"]["field"] == "y"
Esempio n. 16
0
def test_external_dir_resource_on_no_cache(tmp_dir, dvc, tmp_path_factory):
    # https://github.com/iterative/dvc/issues/2647, is some situations
    # (external dir dependency) cache is required to calculate dir md5
    external_dir = tmp_path_factory.mktemp("external_dir")
    (external_dir / "file").write_text("content")

    dvc.cache.local = None
    with pytest.raises(RemoteCacheRequiredError):
        dvc.run(deps=[fspath(external_dir)])
Esempio n. 17
0
    def pull_to(self, path, to_info):
        try:
            # Git handled files can't have absolute path
            if os.path.isabs(path):
                raise FileNotFoundError

            fs_copy(os.path.join(self.root_dir, path), fspath(to_info))
        except FileNotFoundError:
            raise PathMissingError(path, self.url)
Esempio n. 18
0
def test_import(tmp_dir, scm, dvc, erepo_dir):
    with erepo_dir.chdir():
        erepo_dir.dvc_gen("foo", "foo content", commit="create foo")

    dvc.imp(fspath(erepo_dir), "foo", "foo_imported")

    assert os.path.isfile("foo_imported")
    assert (tmp_dir / "foo_imported").read_text() == "foo content"
    assert scm.repo.git.check_ignore("foo_imported")
Esempio n. 19
0
def test_import_cached_file(erepo_dir, tmp_dir, dvc, scm, monkeypatch):
    src = "some_file"
    dst = "some_file_imported"

    with erepo_dir.chdir():
        erepo_dir.dvc_gen({src: "hello"}, commit="add a regular file")

    tmp_dir.dvc_gen({dst: "hello"})
    (tmp_dir / dst).unlink()

    remote_exception = NoRemoteError("dvc import")
    with patch.object(cloud.DataCloud, "pull", side_effect=remote_exception):
        tmp_dir.dvc.imp(fspath(erepo_dir), src, dst)

    assert (tmp_dir / dst).is_file()
    assert filecmp.cmp(fspath(erepo_dir / src),
                       fspath(tmp_dir / dst),
                       shallow=False)
Esempio n. 20
0
    def reflink(source, link_name):
        source, link_name = fspath(source), fspath(link_name)

        system = platform.system()
        try:
            if system == "Windows":
                ret = System._reflink_windows(source, link_name)
            elif system == "Darwin":
                ret = System._reflink_darwin(source, link_name)
            elif system == "Linux":
                ret = System._reflink_linux(source, link_name)
            else:
                ret = -1
        except IOError:
            ret = -1

        if ret != 0:
            raise DvcException("reflink is not supported")
Esempio n. 21
0
def test_ls_repo_with_target_dir(tmp_dir, dvc, scm):
    tmp_dir.scm_gen(FS_STRUCTURE, commit="init")
    tmp_dir.dvc_gen(DVC_STRUCTURE, commit="dvc")

    files = Repo.ls(fspath(tmp_dir), target="model")
    match_files(
        files,
        (("script.py", ), ("train.py", ), ("people.csv", ),
         ("people.csv.dvc", )),
    )
Esempio n. 22
0
def test_import_stage_accompanies_target(tmp_dir, dvc, erepo_dir):
    with erepo_dir.chdir():
        erepo_dir.dvc_gen("file1", "file1 content", commit="commit file")

    tmp_dir.gen({"dir": {}})
    erepo = {"url": fspath(erepo_dir)}
    dvc.imp_url("file1", out=os.path.join("dir", "imported_file"), erepo=erepo)

    assert (tmp_dir / "dir" / "imported_file").exists()
    assert (tmp_dir / "dir" / "imported_file.dvc").exists()
Esempio n. 23
0
def test_cache_type_is_properly_overridden(tmp_dir, scm, dvc, erepo_dir,
                                           monkeypatch):
    with monkeypatch.context() as m:
        m.chdir(fspath(erepo_dir))
        erepo_dir.dvc.config.set(Config.SECTION_CACHE,
                                 Config.SECTION_CACHE_TYPE, "symlink")
        erepo_dir.dvc.cache = Cache(erepo_dir.dvc)
        erepo_dir.scm_add(
            [erepo_dir.dvc.config.config_file],
            "set source repo cache type to symlink",
        )
        erepo_dir.dvc_gen("foo", "foo content", "create foo")
    assert System.is_symlink(erepo_dir / "foo")

    dvc.imp(fspath(erepo_dir), "foo", "foo_imported")

    assert not System.is_symlink("foo_imported")
    assert (tmp_dir / "foo_imported").read_text() == "foo content"
    assert scm.repo.git.check_ignore("foo_imported")
Esempio n. 24
0
def test_import_git_dir(erepo_dir, tmp_dir, dvc, scm, src_is_dvc):
    if not src_is_dvc:
        erepo_dir.dvc.scm.repo.index.remove([".dvc"], r=True)
        erepo_dir.dvc.scm.commit("remove .dvc")

    src = "some_directory"
    dst = "some_directory_imported"

    erepo_dir.scm_gen({src: {"file.txt": "hello"}}, commit="add a dir")

    stage = dvc.imp(fspath(erepo_dir), src, dst)

    assert (tmp_dir / dst).is_dir()
    trees_equal(fspath(erepo_dir / src), fspath(tmp_dir / dst))
    assert tmp_dir.scm.repo.git.check_ignore(fspath(tmp_dir / dst))
    assert stage.deps[0].def_repo == {
        "url": fspath(erepo_dir),
        "rev_lock": erepo_dir.scm.get_rev(),
    }
Esempio n. 25
0
def test_get_to_dir(tmp_dir, erepo_dir, dname):
    with erepo_dir.chdir():
        erepo_dir.dvc_gen("file", "contents", commit="create file")

    makedirs(dname, exist_ok=True)

    Repo.get(fspath(erepo_dir), "file", dname)

    assert (tmp_dir / dname).is_dir()
    assert (tmp_dir / dname / "file").read_text() == "contents"
Esempio n. 26
0
def test_get_git_file(tmp_dir, erepo_dir):
    src = "some_file"
    dst = "some_file_imported"

    erepo_dir.scm_gen({src: "hello"}, commit="add a regular file")

    Repo.get(fspath(erepo_dir), src, dst)

    assert (tmp_dir / dst).is_file()
    assert (tmp_dir / dst).read_text() == "hello"
Esempio n. 27
0
def test_ls_repo_with_path_subdir_outs_only_recursive(tmp_dir, dvc, scm):
    tmp_dir.scm_gen(FS_STRUCTURE, commit="init")
    tmp_dir.dvc_gen(DVC_STRUCTURE, commit="dvc")

    path = os.path.join("data", "subcontent")
    files = Repo.ls(fspath(tmp_dir), path, outs_only=True, recursive=True)
    match_files(files, (
        (("data.xml", ), True),
        (("statistics", "data.csv"), True),
    ))
Esempio n. 28
0
def test_gc_cloud_positive(tmp_dir, dvc, tmp_path_factory):
    with dvc.config.edit() as conf:
        storage = fspath(tmp_path_factory.mktemp("test_remote_base"))
        conf["remote"]["local_remote"] = {"url": storage}
        conf["core"]["remote"] = "local_remote"

    dvc.push()

    for flag in ["-cw", "-ca", "-cT", "-caT", "-cwT"]:
        assert main(["gc", "-vf", flag]) == 0
Esempio n. 29
0
def test_ls_repo_with_target_subdir_outs_only(tmp_dir, dvc, scm):
    tmp_dir.scm_gen(FS_STRUCTURE, commit="init")
    tmp_dir.dvc_gen(DVC_STRUCTURE, commit="dvc")

    target = os.path.join("data", "subcontent")
    files = Repo.ls(fspath(tmp_dir), target, outs_only=True)
    match_files(files, (
        (("data.xml", ), True),
        (("statistics", ), False),
    ))
Esempio n. 30
0
def test_update_import_url(tmp_dir, dvc, tmp_path_factory):
    import_src = tmp_path_factory.mktemp("import_url_source")
    src = import_src / "file"
    src.write_text("file content")

    dst = tmp_dir / "imported_file"
    stage = dvc.imp_url(fspath(src), fspath(dst))

    assert dst.is_file()
    assert dst.read_text() == "file content"

    # update data
    src.write_text("updated file content")

    assert dvc.status([stage.path]) == {}
    dvc.update(stage.path)
    assert dvc.status([stage.path]) == {}

    assert dst.is_file()
    assert dst.read_text() == "updated file content"