Example #1
0
def test_verify_checksums(tmp_dir, scm, dvc, mocker, tmp_path_factory):
    tmp_dir.dvc_gen({"file": "file1 content"}, commit="add file")
    tmp_dir.dvc_gen({"dir": {"subfile": "file2 content"}}, commit="add dir")

    RemoteConfig(dvc.config).add(
        "local_remote",
        fspath(tmp_path_factory.mktemp("local_remote")),
        default=True,
    )
    dvc.push()

    # remove artifacts and cache to trigger fetching
    os.remove("file")
    shutil.rmtree("dir")
    shutil.rmtree(dvc.cache.local.cache_dir)

    checksum_spy = mocker.spy(dvc.cache.local, "get_file_checksum")

    dvc.pull()
    assert checksum_spy.call_count == 0

    # Removing cache will invalidate existing state entries
    shutil.rmtree(dvc.cache.local.cache_dir)

    dvc.config.set(
        Config.SECTION_REMOTE_FMT.format("local_remote"),
        Config.SECTION_REMOTE_VERIFY,
        "True",
    )

    dvc.pull()
    assert checksum_spy.call_count == 3
Example #2
0
def test_partial_push_n_pull(tmp_dir, dvc, tmp_path_factory):
    remote_config = RemoteConfig(dvc.config)
    remote_config.add("upstream",
                      fspath(tmp_path_factory.mktemp("upstream")),
                      default=True)

    foo = tmp_dir.dvc_gen({"foo": "foo content"})[0].outs[0]
    bar = tmp_dir.dvc_gen({"bar": "bar content"})[0].outs[0]

    # Faulty upload version, failing on foo
    original = RemoteLOCAL._upload

    def unreliable_upload(self, from_file, to_info, name=None, **kwargs):
        if name == "foo":
            raise Exception("stop foo")
        return original(self, from_file, to_info, name, **kwargs)

    with patch.object(RemoteLOCAL, "_upload", unreliable_upload):
        with pytest.raises(UploadError) as upload_error_info:
            dvc.push()
        assert upload_error_info.value.amount == 1

        remote = dvc.cloud.get_remote("upstream")
        assert not remote.exists(remote.checksum_to_path_info(foo.checksum))
        assert remote.exists(remote.checksum_to_path_info(bar.checksum))

    # Push everything and delete local cache
    dvc.push()
    shutil.rmtree(dvc.cache.local.cache_dir)

    with patch.object(RemoteLOCAL, "_download", side_effect=Exception):
        with pytest.raises(DownloadError) as download_error_info:
            dvc.pull()
        assert download_error_info.value.amount == 2
Example #3
0
def _external_repo(url=None, rev=None, cache_dir=None):
    from dvc.config import Config
    from dvc.cache import CacheConfig
    from dvc.repo import Repo

    key = (url, rev, cache_dir)
    if key in REPO_CACHE:
        return REPO_CACHE[key]

    new_path = tempfile.mkdtemp("dvc-erepo")

    # Copy and adjust existing clone
    if (url, None, None) in REPO_CACHE:
        old_path = REPO_CACHE[url, None, None]

        # This one unlike shutil.copytree() works with an existing dir
        copy_tree(old_path, new_path)
    else:
        # Create a new clone
        _clone_repo(url, new_path)

        # Save clean clone dir so that we will have access to a default branch
        clean_clone_path = tempfile.mkdtemp("dvc-erepo")
        copy_tree(new_path, clean_clone_path)
        REPO_CACHE[url, None, None] = clean_clone_path

    # Adjust new clone/copy to fit rev and cache_dir

    # Checkout needs to be done first because current branch might not be
    # DVC repository
    if rev is not None:
        _git_checkout(new_path, rev)

    repo = Repo(new_path)
    try:
        # check if the URL is local and no default remote is present
        # add default remote pointing to the original repo's cache location
        if os.path.isdir(url):
            rconfig = RemoteConfig(repo.config)
            if not _default_remote_set(rconfig):
                original_repo = Repo(url)
                try:
                    rconfig.add(
                        "auto-generated-upstream",
                        original_repo.cache.local.cache_dir,
                        default=True,
                        level=Config.LEVEL_LOCAL,
                    )
                finally:
                    original_repo.close()

        if cache_dir is not None:
            cache_config = CacheConfig(repo.config)
            cache_config.set_dir(cache_dir, level=Config.LEVEL_LOCAL)
    finally:
        # Need to close/reopen repo to force config reread
        repo.close()

    REPO_CACHE[key] = new_path
    return new_path
Example #4
0
def test_raise_on_too_many_open_files(tmp_dir, dvc, tmp_path_factory, mocker):
    storage = tmp_path_factory.mktemp("test_remote_base")
    remote_config = RemoteConfig(dvc.config)
    remote_config.add("local_remote", fspath(storage), default=True)

    tmp_dir.dvc_gen({"file": "file content"})

    mocker.patch.object(
        RemoteLOCAL,
        "_upload",
        side_effect=OSError(errno.EMFILE, "Too many open files"),
    )

    with pytest.raises(OSError) as e:
        dvc.push()
        assert e.errno == errno.EMFILE
Example #5
0
 def _set_upstream(self):
     # check if the URL is local and no default remote is present
     # add default remote pointing to the original repo's cache location
     if os.path.isdir(self.url):
         rconfig = RemoteConfig(self.config)
         if not rconfig.has_default():
             src_repo = Repo(self.url)
             try:
                 rconfig.add(
                     "auto-generated-upstream",
                     src_repo.cache.local.cache_dir,
                     default=True,
                     level=Config.LEVEL_LOCAL,
                 )
             finally:
                 src_repo.close()
Example #6
0
def _external_repo(url=None, rev=None, cache_dir=None):
    from dvc.config import Config
    from dvc.cache import CacheConfig
    from dvc.repo import Repo

    key = (url, rev, cache_dir)
    if key in REPO_CACHE:
        return REPO_CACHE[key]

    new_path = cached_clone(url, rev=rev)

    repo = Repo(new_path)
    try:
        # check if the URL is local and no default remote is present
        # add default remote pointing to the original repo's cache location
        if os.path.isdir(url):
            rconfig = RemoteConfig(repo.config)
            if not _default_remote_set(rconfig):
                original_repo = Repo(url)
                try:
                    rconfig.add(
                        "auto-generated-upstream",
                        original_repo.cache.local.cache_dir,
                        default=True,
                        level=Config.LEVEL_LOCAL,
                    )
                finally:
                    original_repo.close()

        if cache_dir is not None:
            cache_config = CacheConfig(repo.config)
            cache_config.set_dir(cache_dir, level=Config.LEVEL_LOCAL)
    finally:
        # Need to close/reopen repo to force config reread
        repo.close()

    REPO_CACHE[key] = new_path
    return new_path
Example #7
0
    def test_pre_push_hook(self, tmp_dir, scm, dvc, tmp_path_factory):
        scm.install()

        temp = tmp_path_factory.mktemp("external")
        git_remote = temp / "project.git"
        storage_path = temp / "dvc_storage"

        RemoteConfig(dvc.config).add("store",
                                     fspath(storage_path),
                                     default=True)
        tmp_dir.dvc_gen("file", "file_content", "commit message")

        file_checksum = file_md5("file")[0]
        expected_storage_path = (storage_path / file_checksum[:2] /
                                 file_checksum[2:])

        scm.repo.clone(fspath(git_remote))
        scm.repo.create_remote("origin", fspath(git_remote))

        assert not expected_storage_path.is_file()
        scm.repo.git.push("origin", "master")
        assert expected_storage_path.is_file()
        assert expected_storage_path.read_text() == "file_content"
Example #8
0
def test_modify_missing_remote(dvc):
    remote_config = RemoteConfig(dvc.config)

    with pytest.raises(ConfigError, match=r"Unable to find remote section"):
        remote_config.modify("myremote", "gdrive_client_id", "xxx")