def test_open_openml_url_cache(monkeypatch, gzip_response, tmpdir): data_id = 61 _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) openml_path = sklearn.datasets._openml._DATA_FILE.format(data_id) cache_directory = str(tmpdir.mkdir('scikit_learn_data')) # first fill the cache response1 = _open_openml_url(openml_path, cache_directory) # assert file exists location = _get_local_path(openml_path, cache_directory) assert os.path.isfile(location) # redownload, to utilize cache response2 = _open_openml_url(openml_path, cache_directory) assert response1.read() == response2.read()
def test_open_openml_url_unlinks_local_path(monkeypatch, gzip_response, tmpdir, write_to_disk): data_id = 61 openml_path = sklearn.datasets._openml._DATA_FILE.format(data_id) cache_directory = str(tmpdir.mkdir('scikit_learn_data')) location = _get_local_path(openml_path, cache_directory) def _mock_urlopen(request): if write_to_disk: with open(location, "w") as f: f.write("") raise ValueError("Invalid request") monkeypatch.setattr(sklearn.datasets._openml, 'urlopen', _mock_urlopen) with pytest.raises(ValueError, match="Invalid request"): _open_openml_url(openml_path, cache_directory) assert not os.path.exists(location)
def test_retry_with_clean_cache(tmpdir): data_id = 61 openml_path = sklearn.datasets._openml._DATA_FILE.format(data_id) cache_directory = str(tmpdir.mkdir('scikit_learn_data')) location = _get_local_path(openml_path, cache_directory) os.makedirs(os.path.dirname(location)) with open(location, 'w') as f: f.write("") @_retry_with_clean_cache(openml_path, cache_directory) def _load_data(): # The first call will raise an error since location exists if os.path.exists(location): raise Exception("File exist!") return 1 warn_msg = "Invalid cache, redownloading file" with pytest.warns(RuntimeWarning, match=warn_msg): result = _load_data() assert result == 1