def test_NetCDFtoZarrSequentialRecipe(daily_xarray_dataset, netcdf_local_paths,
                                      tmp_target, tmp_cache):

    # the same recipe is created as a fixture in conftest.py
    # I left it here explicitly because it makes the test easier to read.
    r = recipe.NetCDFtoZarrSequentialRecipe(
        input_urls=netcdf_local_paths,
        sequence_dim="time",
        inputs_per_chunk=1,
        nitems_per_input=daily_xarray_dataset.attrs["items_per_file"],
        target=tmp_target,
        input_cache=tmp_cache,
    )

    # this is the cannonical way to manually execute a recipe
    for input_key in r.iter_inputs():
        r.cache_input(input_key)
    r.prepare_target()
    for chunk_key in r.iter_chunks():
        r.store_chunk(chunk_key)
    r.finalize_target()

    ds_target = xr.open_zarr(tmp_target.get_mapper(), consolidated=True).load()
    ds_expected = daily_xarray_dataset.compute()
    assert ds_target.identical(ds_expected)
Exemple #2
0
def test_NetCDFtoZarrSequentialRecipe(
    daily_xarray_dataset,
    netcdf_local_paths,
    tmp_target,
    tmp_cache,
    process_input,
    process_chunk,
    inputs_per_chunk,
    target_chunks,
    chunk_expectation,
):

    # the same recipe is created as a fixture in conftest.py
    # I left it here explicitly because it makes the test easier to read.
    paths, items_per_file = netcdf_local_paths
    with chunk_expectation as excinfo:
        r = recipe.NetCDFtoZarrSequentialRecipe(
            input_urls=paths,
            sequence_dim="time",
            inputs_per_chunk=inputs_per_chunk,
            nitems_per_input=items_per_file,
            target=tmp_target,
            input_cache=tmp_cache,
            process_input=process_input,
            process_chunk=process_chunk,
            target_chunks=target_chunks,
        )
    if excinfo:
        # don't continue if we got an exception
        return

    _manually_execute_recipe(r)

    ds_target = xr.open_zarr(tmp_target.get_mapper(), consolidated=True)

    # chunk validation
    sequence_chunks = ds_target.chunks["time"]
    if target_chunks is None:
        target_chunks = {}
    seq_chunk_len = target_chunks.pop("time", None) or (items_per_file * inputs_per_chunk)
    # we expect all chunks but the last to have the expected size
    assert all([item == seq_chunk_len for item in sequence_chunks[:-1]])
    for other_dim, chunk_len in target_chunks.items():
        all([item == chunk_len for item in ds_target.chunks[other_dim][:-1]])

    ds_target.load()
    ds_expected = daily_xarray_dataset.compute()

    if process_input is not None:
        # check that the process_input hook made some changes
        assert not ds_target.identical(ds_expected)
        # apply these changes to the expected dataset
        ds_expected = process_input(ds_expected)
    if process_chunk is not None:
        # check that the process_chunk hook made some changes
        assert not ds_target.identical(ds_expected)
        # apply these changes to the expected dataset
        ds_expected = process_chunk(ds_expected)

    assert ds_target.identical(ds_expected)
Exemple #3
0
def test_NetCDFtoZarrSequentialRecipeHttpAuth(
    daily_xarray_dataset, netcdf_http_server, tmp_target, tmp_cache, username, password
):

    url, fnames, items_per_file = netcdf_http_server("foo", "bar")
    urls = [f"{url}/{fname}" for fname in fnames]
    r = recipe.NetCDFtoZarrSequentialRecipe(
        input_urls=urls,
        sequence_dim="time",
        inputs_per_chunk=1,
        nitems_per_input=items_per_file,
        target=tmp_target,
        input_cache=tmp_cache,
        fsspec_open_kwargs={"client_kwargs": {"auth": aiohttp.BasicAuth(username, password)}},
    )

    if password == "wrong":
        with pytest.raises(aiohttp.client_exceptions.ClientResponseError):
            r.cache_input(next(r.iter_inputs()))
    else:
        _manually_execute_recipe(r)

        ds_target = xr.open_zarr(tmp_target.get_mapper(), consolidated=True).load()
        ds_expected = daily_xarray_dataset.compute()
        assert ds_target.identical(ds_expected)
Exemple #4
0
def test_NetCDFtoZarrSequentialRecipeNoTarget(
    daily_xarray_dataset, netcdf_local_paths, tmp_target, tmp_cache
):

    paths, items_per_file = netcdf_local_paths
    r = recipe.NetCDFtoZarrSequentialRecipe(
        input_urls=paths, sequence_dim="time", inputs_per_chunk=1, nitems_per_input=items_per_file,
    )

    with pytest.raises(UninitializedTargetError):
        r.cache_input(next(r.iter_inputs()))
Exemple #5
0
def netCDFtoZarr_sequential_recipe(daily_xarray_dataset, netcdf_local_paths, tmp_target, tmp_cache):
    paths, items_per_file = netcdf_local_paths
    r = recipe.NetCDFtoZarrSequentialRecipe(
        input_urls=paths,
        sequence_dim="time",
        inputs_per_chunk=1,
        nitems_per_input=items_per_file,
        target=tmp_target,
        input_cache=tmp_cache,
    )
    return r, daily_xarray_dataset, tmp_target
Exemple #6
0
def test_NetCDFtoZarrSequentialRecipeNoTarget(daily_xarray_dataset,
                                              netcdf_local_paths, tmp_target,
                                              tmp_cache):

    r = recipe.NetCDFtoZarrSequentialRecipe(
        input_urls=netcdf_local_paths,
        sequence_dim="time",
        inputs_per_chunk=1,
        nitems_per_input=daily_xarray_dataset.attrs["items_per_file"],
    )

    # this is the cannonical way to manually execute a recipe
    with pytest.raises(UninitializedTargetError):
        r.cache_input(next(r.iter_inputs()))