def test_partial_push_n_pull(tmp_dir, dvc, tmp_path_factory): remote_config = RemoteConfig(dvc.config) remote_config.add("upstream", fspath(tmp_path_factory.mktemp("upstream")), default=True) foo = tmp_dir.dvc_gen({"foo": "foo content"})[0].outs[0] bar = tmp_dir.dvc_gen({"bar": "bar content"})[0].outs[0] # Faulty upload version, failing on foo original = RemoteLOCAL._upload def unreliable_upload(self, from_file, to_info, name=None, **kwargs): if name == "foo": raise Exception("stop foo") return original(self, from_file, to_info, name, **kwargs) with patch.object(RemoteLOCAL, "_upload", unreliable_upload): with pytest.raises(UploadError) as upload_error_info: dvc.push() assert upload_error_info.value.amount == 1 remote = dvc.cloud.get_remote("upstream") assert not remote.exists(remote.checksum_to_path_info(foo.checksum)) assert remote.exists(remote.checksum_to_path_info(bar.checksum)) # Push everything and delete local cache dvc.push() shutil.rmtree(dvc.cache.local.cache_dir) with patch.object(RemoteLOCAL, "_download", side_effect=Exception): with pytest.raises(DownloadError) as download_error_info: dvc.pull() assert download_error_info.value.amount == 2
def _external_repo(url=None, rev=None, cache_dir=None): from dvc.config import Config from dvc.cache import CacheConfig from dvc.repo import Repo key = (url, rev, cache_dir) if key in REPO_CACHE: return REPO_CACHE[key] new_path = tempfile.mkdtemp("dvc-erepo") # Copy and adjust existing clone if (url, None, None) in REPO_CACHE: old_path = REPO_CACHE[url, None, None] # This one unlike shutil.copytree() works with an existing dir copy_tree(old_path, new_path) else: # Create a new clone _clone_repo(url, new_path) # Save clean clone dir so that we will have access to a default branch clean_clone_path = tempfile.mkdtemp("dvc-erepo") copy_tree(new_path, clean_clone_path) REPO_CACHE[url, None, None] = clean_clone_path # Adjust new clone/copy to fit rev and cache_dir # Checkout needs to be done first because current branch might not be # DVC repository if rev is not None: _git_checkout(new_path, rev) repo = Repo(new_path) try: # check if the URL is local and no default remote is present # add default remote pointing to the original repo's cache location if os.path.isdir(url): rconfig = RemoteConfig(repo.config) if not _default_remote_set(rconfig): original_repo = Repo(url) try: rconfig.add( "auto-generated-upstream", original_repo.cache.local.cache_dir, default=True, level=Config.LEVEL_LOCAL, ) finally: original_repo.close() if cache_dir is not None: cache_config = CacheConfig(repo.config) cache_config.set_dir(cache_dir, level=Config.LEVEL_LOCAL) finally: # Need to close/reopen repo to force config reread repo.close() REPO_CACHE[key] = new_path return new_path
def test_verify_checksums(tmp_dir, scm, dvc, mocker, tmp_path_factory): tmp_dir.dvc_gen({"file": "file1 content"}, commit="add file") tmp_dir.dvc_gen({"dir": {"subfile": "file2 content"}}, commit="add dir") RemoteConfig(dvc.config).add( "local_remote", fspath(tmp_path_factory.mktemp("local_remote")), default=True, ) dvc.push() # remove artifacts and cache to trigger fetching os.remove("file") shutil.rmtree("dir") shutil.rmtree(dvc.cache.local.cache_dir) checksum_spy = mocker.spy(dvc.cache.local, "get_file_checksum") dvc.pull() assert checksum_spy.call_count == 0 # Removing cache will invalidate existing state entries shutil.rmtree(dvc.cache.local.cache_dir) dvc.config.set( Config.SECTION_REMOTE_FMT.format("local_remote"), Config.SECTION_REMOTE_VERIFY, "True", ) dvc.pull() assert checksum_spy.call_count == 3
def test_raise_on_too_many_open_files(tmp_dir, dvc, tmp_path_factory, mocker): storage = tmp_path_factory.mktemp("test_remote_base") remote_config = RemoteConfig(dvc.config) remote_config.add("local_remote", fspath(storage), default=True) tmp_dir.dvc_gen({"file": "file content"}) mocker.patch.object( RemoteLOCAL, "_upload", side_effect=OSError(errno.EMFILE, "Too many open files"), ) with pytest.raises(OSError) as e: dvc.push() assert e.errno == errno.EMFILE
def _set_upstream(self): # check if the URL is local and no default remote is present # add default remote pointing to the original repo's cache location if os.path.isdir(self.url): rconfig = RemoteConfig(self.config) if not rconfig.has_default(): src_repo = Repo(self.url) try: rconfig.add( "auto-generated-upstream", src_repo.cache.local.cache_dir, default=True, level=Config.LEVEL_LOCAL, ) finally: src_repo.close()
def _external_repo(url=None, rev=None, cache_dir=None): from dvc.config import Config from dvc.cache import CacheConfig from dvc.repo import Repo key = (url, rev, cache_dir) if key in REPO_CACHE: return REPO_CACHE[key] new_path = cached_clone(url, rev=rev) repo = Repo(new_path) try: # check if the URL is local and no default remote is present # add default remote pointing to the original repo's cache location if os.path.isdir(url): rconfig = RemoteConfig(repo.config) if not _default_remote_set(rconfig): original_repo = Repo(url) try: rconfig.add( "auto-generated-upstream", original_repo.cache.local.cache_dir, default=True, level=Config.LEVEL_LOCAL, ) finally: original_repo.close() if cache_dir is not None: cache_config = CacheConfig(repo.config) cache_config.set_dir(cache_dir, level=Config.LEVEL_LOCAL) finally: # Need to close/reopen repo to force config reread repo.close() REPO_CACHE[key] = new_path return new_path
def test_pre_push_hook(self, tmp_dir, scm, dvc, tmp_path_factory): scm.install() temp = tmp_path_factory.mktemp("external") git_remote = temp / "project.git" storage_path = temp / "dvc_storage" RemoteConfig(dvc.config).add("store", fspath(storage_path), default=True) tmp_dir.dvc_gen("file", "file_content", "commit message") file_checksum = file_md5("file")[0] expected_storage_path = (storage_path / file_checksum[:2] / file_checksum[2:]) scm.repo.clone(fspath(git_remote)) scm.repo.create_remote("origin", fspath(git_remote)) assert not expected_storage_path.is_file() scm.repo.git.push("origin", "master") assert expected_storage_path.is_file() assert expected_storage_path.read_text() == "file_content"
def test_modify_missing_remote(dvc): remote_config = RemoteConfig(dvc.config) with pytest.raises(ConfigError, match=r"Unable to find remote section"): remote_config.modify("myremote", "gdrive_client_id", "xxx")