def test_NetCDFtoZarrSequentialRecipe(daily_xarray_dataset, netcdf_local_paths, tmp_target, tmp_cache): # the same recipe is created as a fixture in conftest.py # I left it here explicitly because it makes the test easier to read. r = recipe.NetCDFtoZarrSequentialRecipe( input_urls=netcdf_local_paths, sequence_dim="time", inputs_per_chunk=1, nitems_per_input=daily_xarray_dataset.attrs["items_per_file"], target=tmp_target, input_cache=tmp_cache, ) # this is the cannonical way to manually execute a recipe for input_key in r.iter_inputs(): r.cache_input(input_key) r.prepare_target() for chunk_key in r.iter_chunks(): r.store_chunk(chunk_key) r.finalize_target() ds_target = xr.open_zarr(tmp_target.get_mapper(), consolidated=True).load() ds_expected = daily_xarray_dataset.compute() assert ds_target.identical(ds_expected)
def test_NetCDFtoZarrSequentialRecipe( daily_xarray_dataset, netcdf_local_paths, tmp_target, tmp_cache, process_input, process_chunk, inputs_per_chunk, target_chunks, chunk_expectation, ): # the same recipe is created as a fixture in conftest.py # I left it here explicitly because it makes the test easier to read. paths, items_per_file = netcdf_local_paths with chunk_expectation as excinfo: r = recipe.NetCDFtoZarrSequentialRecipe( input_urls=paths, sequence_dim="time", inputs_per_chunk=inputs_per_chunk, nitems_per_input=items_per_file, target=tmp_target, input_cache=tmp_cache, process_input=process_input, process_chunk=process_chunk, target_chunks=target_chunks, ) if excinfo: # don't continue if we got an exception return _manually_execute_recipe(r) ds_target = xr.open_zarr(tmp_target.get_mapper(), consolidated=True) # chunk validation sequence_chunks = ds_target.chunks["time"] if target_chunks is None: target_chunks = {} seq_chunk_len = target_chunks.pop("time", None) or (items_per_file * inputs_per_chunk) # we expect all chunks but the last to have the expected size assert all([item == seq_chunk_len for item in sequence_chunks[:-1]]) for other_dim, chunk_len in target_chunks.items(): all([item == chunk_len for item in ds_target.chunks[other_dim][:-1]]) ds_target.load() ds_expected = daily_xarray_dataset.compute() if process_input is not None: # check that the process_input hook made some changes assert not ds_target.identical(ds_expected) # apply these changes to the expected dataset ds_expected = process_input(ds_expected) if process_chunk is not None: # check that the process_chunk hook made some changes assert not ds_target.identical(ds_expected) # apply these changes to the expected dataset ds_expected = process_chunk(ds_expected) assert ds_target.identical(ds_expected)
def test_NetCDFtoZarrSequentialRecipeHttpAuth( daily_xarray_dataset, netcdf_http_server, tmp_target, tmp_cache, username, password ): url, fnames, items_per_file = netcdf_http_server("foo", "bar") urls = [f"{url}/{fname}" for fname in fnames] r = recipe.NetCDFtoZarrSequentialRecipe( input_urls=urls, sequence_dim="time", inputs_per_chunk=1, nitems_per_input=items_per_file, target=tmp_target, input_cache=tmp_cache, fsspec_open_kwargs={"client_kwargs": {"auth": aiohttp.BasicAuth(username, password)}}, ) if password == "wrong": with pytest.raises(aiohttp.client_exceptions.ClientResponseError): r.cache_input(next(r.iter_inputs())) else: _manually_execute_recipe(r) ds_target = xr.open_zarr(tmp_target.get_mapper(), consolidated=True).load() ds_expected = daily_xarray_dataset.compute() assert ds_target.identical(ds_expected)
def test_NetCDFtoZarrSequentialRecipeNoTarget( daily_xarray_dataset, netcdf_local_paths, tmp_target, tmp_cache ): paths, items_per_file = netcdf_local_paths r = recipe.NetCDFtoZarrSequentialRecipe( input_urls=paths, sequence_dim="time", inputs_per_chunk=1, nitems_per_input=items_per_file, ) with pytest.raises(UninitializedTargetError): r.cache_input(next(r.iter_inputs()))
def netCDFtoZarr_sequential_recipe(daily_xarray_dataset, netcdf_local_paths, tmp_target, tmp_cache): paths, items_per_file = netcdf_local_paths r = recipe.NetCDFtoZarrSequentialRecipe( input_urls=paths, sequence_dim="time", inputs_per_chunk=1, nitems_per_input=items_per_file, target=tmp_target, input_cache=tmp_cache, ) return r, daily_xarray_dataset, tmp_target
def test_NetCDFtoZarrSequentialRecipeNoTarget(daily_xarray_dataset, netcdf_local_paths, tmp_target, tmp_cache): r = recipe.NetCDFtoZarrSequentialRecipe( input_urls=netcdf_local_paths, sequence_dim="time", inputs_per_chunk=1, nitems_per_input=daily_xarray_dataset.attrs["items_per_file"], ) # this is the cannonical way to manually execute a recipe with pytest.raises(UninitializedTargetError): r.cache_input(next(r.iter_inputs()))