Esempio n. 1
0
def test_apply_non_polar_dtr_ceiling():
    """Test diurnal temperature range (DTR) non polar dtr ceiling"""

    # case 1 : non polar regions, should be applied
    # Make some fake dtr data
    ceiling = 0.5
    x = np.random.rand(2, 361)
    lon = np.arange(-0.5, 0.5, 0.5)
    lat = np.arange(-90, 90.5, 0.5)
    ds_dtr = xr.Dataset(
        {"fakevariable": xr.DataArray(x, {"lon": lon, "lat": lat}, ["lon", "lat"])}
    )

    in_url = "memory://test_correct_small_dtr/an/input/path.zarr"
    out_url = "memory://test_correct_small_dtr/an/output/path.zarr"
    repository.write(in_url, ds_dtr)

    apply_non_polar_dtr_ceiling(in_url, out=out_url, ceiling=ceiling)
    ds_dtr_corrected = repository.read(out_url)

    are_not_capped = xr.ufuncs.logical_or(
        ds_dtr <= ceiling,
        xr.ufuncs.logical_or(ds_dtr["lat"] <= -60, ds_dtr["lat"] >= 60),
    )

    are_capped = xr.ufuncs.logical_not(are_not_capped)

    # check values that should be capped
    assert (
        ds_dtr_corrected["fakevariable"].values[are_capped["fakevariable"].values]
        == ceiling
    ).all()

    # check values that should not be capped
    corrected = ds_dtr_corrected["fakevariable"].values[
        are_not_capped["fakevariable"].values
    ]
    not_corrected = ds_dtr["fakevariable"].values[are_not_capped["fakevariable"].values]
    np.testing.assert_equal(corrected, not_corrected)

    # case 2 : all polar regions, shouldn't be applied
    # Make some fake dtr data
    ceiling = 0.5
    x = np.random.rand(2, 58)
    lon = np.arange(-0.5, 0.5, 0.5)
    lat = np.arange(-90, -61, 0.5)
    ds_dtr = xr.Dataset(
        {"fakevariable": xr.DataArray(x, {"lon": lon, "lat": lat}, ["lon", "lat"])}
    )

    in_url = "memory://test_correct_small_dtr/an/input/path.zarr"
    out_url = "memory://test_correct_small_dtr/an/output/path.zarr"
    repository.write(in_url, ds_dtr)

    apply_non_polar_dtr_ceiling(in_url, out=out_url, ceiling=ceiling)
    ds_dtr_corrected = repository.read(out_url)

    np.testing.assert_equal(
        ds_dtr["fakevariable"].values, ds_dtr_corrected["fakevariable"].values
    )
Esempio n. 2
0
def test_train_qdm(kind):
    """Test that train_qdm outputs store giving sdba.adjustment.QuantileDeltaMapping

    Checks that output is consistent if we do "additive" or "multiplicative"
    QDM kinds.
    """
    # Setup input data.
    n_years = 10
    n = n_years * 365

    model_bias = 2
    ts = np.sin(np.linspace(-10 * 3.14, 10 * 3.14, n)) * 0.5
    hist = _datafactory(ts + model_bias)
    ref = _datafactory(ts)

    output_key = "memory://test_train_qdm/test_output.zarr"
    hist_key = "memory://test_train_qdm/hist.zarr"
    ref_key = "memory://test_train_qdm/ref.zarr"

    # Load up a fake repo with our input data in the place of big data and cloud
    # storage.
    repository.write(hist_key, hist)
    repository.write(ref_key, ref)

    train_qdm(
        historical=hist_key,
        reference=ref_key,
        out=output_key,
        variable="fakevariable",
        kind=kind,
    )

    assert QuantileDeltaMapping.from_dataset(repository.read(output_key))
Esempio n. 3
0
def test_prime_qplad_output_zarrstore():
    """
    Test that prime_qplad_output_zarrstore creates a Zarr with good variables, shapes, attrs
    """
    # Make fake simulation data for test.
    target_variable = "fakevariable"
    sim = _datafactory(np.ones(365, dtype=np.float32), variable_name=target_variable)
    sim.attrs["foo"] = "bar"
    goal_shape = sim[target_variable].shape
    sim_key = "memory://test_prime_qplad_output_zarrstore/sim.zarr"
    repository.write(sim_key, sim)

    primed_url = "memory://test_prime_qplad_output_zarrstore/primed.zarr"

    prime_qplad_output_zarrstore(
        simulation=sim_key,
        variable=target_variable,
        out=primed_url,
        zarr_region_dims=["lat"],
    )

    primed_ds = repository.read(primed_url)

    assert target_variable in primed_ds.variables
    assert primed_ds[target_variable].shape == goal_shape
    assert primed_ds.attrs["foo"] == "bar"
Esempio n. 4
0
def test_validation(variable, data_type, time_period):
    """Tests that validate passes for fake output data"""
    # Setup input data
    start_time = None
    end_time = None
    if data_type == "bias_corrected" or data_type == "downscaled":
        if time_period == "historical":
            start_time = "1950-01-01"
            end_time = "2014-12-31"
        else:
            start_time = "2015-01-01"
            end_time = "2100-12-31"
    elif data_type == "cmip6":
        if time_period == "historical":
            start_time = "1950-01-01"
            end_time = "2025-12-31"
        else:
            start_time = "2004-01-01"
            end_time = "2100-12-31"

    # create test data
    ds = _modeloutputfactory(
        start_time=start_time, end_time=end_time, variable_name=variable
    )

    # write test data
    in_url = "memory://test_validate/an/input/path.zarr"
    repository.write(in_url, ds)

    validate(in_url, variable, data_type, time_period)
Esempio n. 5
0
def test_get_attrs_variable():
    """Test that services.get_attrs returns json of variable attrs"""
    url = "memory://test_get_attrs/x.zarr"
    variable_name = "bar"
    ds_in = xr.Dataset({variable_name: "SPAM"}, attrs={"fish": "chips"})
    ds_in[variable_name].attrs["carrot"] = "sticks"

    repository.write(url, ds_in)
    out = get_attrs(url, variable=variable_name)

    assert out == '{"carrot": "sticks"}'
Esempio n. 6
0
def test_qplad_train(tmpdir, monkeypatch, kind):
    """Tests that the shape of adjustment factors matches the expected shape"""
    monkeypatch.setenv(
        "HDF5_USE_FILE_LOCKING", "FALSE"
    )  # Avoid thread lock conflicts with dask scheduler
    # make test data
    np.random.seed(0)
    lon = [-99.83, -99.32, -99.79, -99.23]
    lat = [42.25, 42.21, 42.63, 42.59]
    time = xr.cftime_range(start="1994-12-17", end="2015-01-15", calendar="noleap")
    data_ref = 15 + 8 * np.random.randn(len(time), 4, 4)

    ref_fine = xr.Dataset(
        data_vars=dict(
            scen=(["time", "lat", "lon"], data_ref),
        ),
        coords=dict(
            time=time,
            lon=(["lon"], lon),
            lat=(["lat"], lat),
        ),
        attrs=dict(description="Weather related data."),
    )
    # need to set variable units to pass xclim 0.29 check on units
    ref_fine["scen"].attrs["units"] = "K"

    # take the mean across space to represent coarse reference data for AFs
    ds_ref_coarse = ref_fine.mean(["lat", "lon"])
    # tile the fine resolution grid with the coarse resolution ref data
    ref_coarse = ds_ref_coarse.broadcast_like(ref_fine)
    ref_coarse["scen"].attrs["units"] = "K"

    # write test data
    ref_coarse_url = "memory://test_qplad_downscaling/a/ref_coarse/path.zarr"
    ref_fine_url = "memory://test_qplad_downscaling/a/ref_fine/path.zarr"
    train_out_url = "memory://test_qplad_downscaling/a/train_output/path.zarr"

    repository.write(
        ref_coarse_url,
        ref_coarse.chunk({"time": -1}),
    )
    repository.write(ref_fine_url, ref_fine.chunk({"time": -1}))

    # now train QPLAD model
    train_qplad(ref_coarse_url, ref_fine_url, train_out_url, "scen", kind)

    # load adjustment factors
    qplad_model = repository.read(train_out_url)

    af_expected_shape = (len(lon), len(lat), 365, 620)

    assert qplad_model.af.shape == af_expected_shape
Esempio n. 7
0
def test_clean_cmip6():
    """Tests that cmip6 cleanup removes extra dimensions on dataset"""
    # Setup input data
    n = 1500  # need over four years of daily data
    ts = np.sin(np.linspace(-10 * np.pi, 10 * np.pi, n)) * 0.5
    ds_gcm = _gcmfactory(ts, start_time="1950-01-01")

    in_url = "memory://test_clean_cmip6/an/input/path.zarr"
    out_url = "memory://test_clean_cmip6/an/output/path.zarr"
    repository.write(in_url, ds_gcm)

    clean_cmip6(in_url, out_url, leapday_removal=True)
    ds_cleaned = repository.read(out_url)

    assert "height" not in ds_cleaned.coords
    assert "member_id" not in ds_cleaned.coords
    assert "time_bnds" not in ds_cleaned.coords
Esempio n. 8
0
def test_remove_leapdays():
    """Test that leapday removal service removes leap days"""
    # Setup input data
    n = 1500  # need over four years of daily data
    ts = np.sin(np.linspace(-10 * np.pi, 10 * np.pi, n)) * 0.5
    ds_leap = _gcmfactory(ts, start_time="1950-01-01")

    in_url = "memory://test_remove_leapdays/an/input/path.zarr"
    out_url = "memory://test_remove_leapdays/an/output/path.zarr"
    repository.write(in_url, ds_leap)

    remove_leapdays(in_url, out_url)
    ds_noleap = repository.read(out_url)
    ds_leapyear = ds_noleap.loc[dict(time=slice("1952-01-01", "1952-12-31"))]

    # check to be sure that leap days have been removed
    assert len(ds_leapyear.time) == 365
Esempio n. 9
0
def test_correct_wet_day_frequency(process):

    """Test that wet day frequency correction corrects the frequency of wet days"""
    # Make some fake precip data
    ts = np.array([0.5, 1, 1.5, 0.1])
    ds_precip = _datafactory(ts, start_time="1950-01-01")
    in_url = "memory://test_correct_wet_day_frequency/an/input/path.zarr"
    out_url = "memory://test_correct_wet_day_frequency/an/output/path.zarr"
    repository.write(in_url, ds_precip)

    seed = 1
    np.random.seed(seed)
    correct_wet_day_frequency(
        in_url, out=out_url, process=process, variable="fakevariable"
    )
    ds_precip_corrected = repository.read(out_url)

    if process == "pre":
        np.random.seed(seed)
        expected = np.random.uniform(0.5, 1, 4)
        assert (
            ds_precip_corrected.isel(time=0)["fakevariable"].values.item()
            == expected[0]
        )
        assert (
            ds_precip_corrected.isel(time=3)["fakevariable"].values.item()
            == expected[3]
        )
        expected = ds_precip.isel(time=slice(1, 3))["fakevariable"].values
        np.testing.assert_almost_equal(
            ds_precip_corrected.isel(time=slice(1, 3))["fakevariable"].values, expected
        )

    elif process == "post":
        expected = 0.0
        assert (
            ds_precip_corrected.isel(time=0)["fakevariable"].values.item() == expected
        )
        assert (
            ds_precip_corrected.isel(time=3)["fakevariable"].values.item() == expected
        )
        expected = ds_precip.isel(time=slice(1, 3))["fakevariable"].values
        np.testing.assert_almost_equal(
            ds_precip_corrected.isel(time=slice(1, 3))["fakevariable"].values, expected
        )
Esempio n. 10
0
def test_train_qplad_isel_slice():
    """Tests that services.train_qplad subsets with isel_slice"""
    lon = [-99.83, -99.32, -99.79, -99.23]
    lat = [42.25, 42.21, 42.63, 42.59]
    time = xr.cftime_range(start="1994-12-17", end="2015-01-15", calendar="noleap")
    data_ref = 15 + 8 * np.ones((len(time), 4, 4))
    ref_fine = xr.Dataset(
        data_vars=dict(
            scen=(["time", "lat", "lon"], data_ref),
        ),
        coords=dict(
            time=time,
            lon=(["lon"], lon),
            lat=(["lat"], lat),
        ),
        attrs=dict(description="Weather related data."),
    )
    # need to set variable units to pass xclim 0.29 check on units
    ref_fine["scen"].attrs["units"] = "K"
    # take the mean across space to represent coarse reference data for AFs
    ds_ref_coarse = ref_fine.mean(["lat", "lon"])
    # tile the fine resolution grid with the coarse resolution ref data
    ref_coarse = ds_ref_coarse.broadcast_like(ref_fine)
    ref_coarse["scen"].attrs["units"] = "K"

    # write test data
    ref_coarse_url = "memory://train_qplad_isel_slice/a/ref_coarse/path.zarr"
    ref_fine_url = "memory://train_qplad_isel_slice/a/ref_fine/path.zarr"
    train_out_url = "memory://train_qplad_isel_slice/a/train_output/path.zarr"

    repository.write(ref_coarse_url, ref_coarse.chunk({"time": -1}))
    repository.write(ref_fine_url, ref_fine.chunk({"time": -1}))

    # now train QPLAD model
    train_qplad(
        coarse_reference=ref_coarse_url,
        fine_reference=ref_fine_url,
        out=train_out_url,
        variable="scen",
        kind="additive",
        isel_slice={"lat": slice(0, 3)},
    )

    qplad_model = repository.read(train_out_url)
    assert qplad_model["lat"].shape == (3,)
Esempio n. 11
0
def test_regrid_resolution(domain_file, expected_shape):
    """Smoke test that services.regrid outputs with different grid resolutions

    The expected shape is the same, but change in methods should not error.
    """
    # Make fake input data.
    ds_in = grid_global(30, 20)
    ds_in["fakevariable"] = wave_smooth(ds_in["lon"], ds_in["lat"])

    in_url = "memory://test_regrid_resolution/an/input/path.zarr"
    domain_file_url = "memory://test_regrid_resolution/a/domainfile/path.zarr"
    out_url = "memory://test_regrid_resolution/an/output/path.zarr"
    repository.write(in_url, ds_in)
    repository.write(domain_file_url, domain_file)

    regrid(in_url, out=out_url, method="bilinear", domain_file=domain_file_url)
    actual_shape = repository.read(out_url)["fakevariable"].shape
    assert actual_shape == expected_shape
Esempio n. 12
0
def test_regrid_dtype(domain_file, expected_dtype):
    """Tests that services.regrid casts output to different dtypes"""
    # Make fake input data.
    ds_in = grid_global(5, 10)
    ds_in["fakevariable"] = wave_smooth(ds_in["lon"], ds_in["lat"])

    in_url = "memory://test_regrid_dtype/an/input/path.zarr"
    domain_file_url = "memory://test_regrid_dtype/a/domainfile/path.zarr"
    out_url = "memory://test_regrid_dtype/an/output/path.zarr"
    repository.write(in_url, ds_in)
    repository.write(domain_file_url, domain_file)

    regrid(
        in_url,
        out=out_url,
        method="bilinear",
        domain_file=domain_file_url,
        astype=expected_dtype,
    )
    actual_dtype = repository.read(out_url)["fakevariable"].dtype
    assert actual_dtype == expected_dtype
Esempio n. 13
0
def test_apply_dtr_floor():
    """Test diurnal temperature range (DTR) floor constraint"""
    # Make some fake dtr data
    n = 10
    floor = 5.0
    ts = np.linspace(0.0, 10, num=n)
    ds_dtr = _datafactory(ts, start_time="1950-01-01")
    in_url = "memory://test_correct_small_dtr/an/input/path.zarr"
    out_url = "memory://test_correct_small_dtr/an/output/path.zarr"
    repository.write(in_url, ds_dtr)

    apply_dtr_floor(in_url, out=out_url, floor=floor)
    ds_dtr_corrected = repository.read(out_url)

    # all values below floor should have been set to the floor value
    assert all(
        x == floor
        for x in ds_dtr_corrected["fakevariable"].where(
            ds_dtr["fakevariable"] < floor, drop=True
        )
    )
Esempio n. 14
0
def test_adjust_maximum_precipitation():
    """Test that maximum precipitation adjustment corrects precipitation values above a set threshold"""
    # make some fake precip data
    n = 700
    ts = np.linspace(0.0, 4000, num=n)
    threshold = 3000  # mm/day
    ds_precip = _datafactory(ts, start_time="1950-01-01")
    in_url = "memory://test_adjust_maximum_precipitation/an/input/path.zarr"
    out_url = "memory://test_adjust_maximum_precipitation/an/output/path.zarr"
    repository.write(in_url, ds_precip)

    adjust_maximum_precipitation(in_url, out=out_url, threshold=threshold)
    ds_precip_corrected = repository.read(out_url)

    # all values above threshold should have been set to the threshold value
    assert (
        ds_precip_corrected["fakevariable"]
        .where(ds_precip["fakevariable"] > threshold, drop=True)
        .all()
        <= threshold
    )
Esempio n. 15
0
def test_train_qdm_isel_slice():
    """Test that train_qdm outputs subset data when passed isel_slice"""
    # Setup input data.
    n_years = 10
    n = n_years * 365

    # Lazy way to make fake data for 2 latitudes...
    model_bias = 2
    ts = np.sin(np.linspace(-10 * 3.14, 10 * 3.14, n)) * 0.5
    hist1 = _datafactory(ts + model_bias)
    hist2 = _datafactory(ts + model_bias).assign_coords(
        {"lat": hist1["lat"].data + 1.0}
    )
    hist = xr.concat([hist1, hist2], dim="lat")
    ref1 = _datafactory(ts)
    ref2 = _datafactory(ts + model_bias).assign_coords({"lat": ref1["lat"].data + 1.0})
    ref = xr.concat([ref1, ref2], dim="lat")

    output_key = "memory://test_train_qdm_isel_slice/test_output.zarr"
    hist_key = "memory://test_train_qdm_isel_slice/hist.zarr"
    ref_key = "memory://test_train_qdm_isel_slice/ref.zarr"

    repository.write(hist_key, hist)
    repository.write(ref_key, ref)

    train_qdm(
        historical=hist_key,
        reference=ref_key,
        out=output_key,
        variable="fakevariable",
        kind="additive",
        isel_slice={"lat": slice(0, 1)},  # select only 1 of 2 lats by idx...
    )

    # Check we can read output and it's the selected value, only.
    ds_result = repository.read(output_key)
    np.testing.assert_equal(ds_result["lat"].data, ref["lat"].data[0])
    assert QuantileDeltaMapping.from_dataset(ds_result)
Esempio n. 16
0
def test_cmip6_precip_unitconversion(gcm_variable):
    """Tests that precip units are converted in CMIP6 cleanup if variable is precip"""
    # Setup input data
    n = 1500  # need over four years of daily data
    ts = np.sin(np.linspace(-10 * np.pi, 10 * np.pi, n)) * 0.5
    ds_gcm = _gcmfactory(ts, gcm_variable=gcm_variable, start_time="1950-01-01")

    if gcm_variable == "pr":
        # assign units to typical GCM pr units so they can be cleaned
        ds_gcm["pr"].attrs["units"] = "kg m-2 s-1"

    in_url = "memory://test_clean_cmip6/an/input/path.zarr"
    out_url = "memory://test_clean_cmip6/an/output/path.zarr"
    repository.write(in_url, ds_gcm)

    clean_cmip6(in_url, out_url, leapday_removal=True)
    ds_cleaned = repository.read(out_url)

    assert "height" not in ds_cleaned.coords
    assert "member_id" not in ds_cleaned.coords
    assert "time_bnds" not in ds_cleaned.coords

    if "pr" in ds_cleaned.variables:
        assert ds_cleaned["pr"].units == "mm day-1"
Esempio n. 17
0
def test_rechunk():
    """Test that rechunk service rechunks"""
    chunks_goal = {"time": 4, "lon": 1, "lat": 1}
    test_ds = xr.Dataset(
        {"fakevariable": (["time", "lon", "lat"], np.ones((4, 4, 4)))},
        coords={
            "time": [1, 2, 3, 4],
            "lon": (["lon"], [1.0, 2.0, 3.0, 4.0]),
            "lat": (["lat"], [1.5, 2.5, 3.5, 4.5]),
        },
    )

    in_url = "memory://test_rechunk/input_ds.zarr"
    out_url = "memory://test_rechunk/output_ds.zarr"
    repository.write(in_url, test_ds)

    rechunk(
        in_url,
        target_chunks=chunks_goal,
        out=out_url,
    )
    actual_chunks = repository.read(out_url)["fakevariable"].data.chunksize

    assert actual_chunks == tuple(chunks_goal.values())
Esempio n. 18
0
def test_regrid_attrs(domain_file, regrid_method):
    """Tests that services.regrid copies attrs metadata to output"""
    # Make fake input data.
    ds_in = grid_global(5, 10)
    ds_in["fakevariable"] = wave_smooth(ds_in["lon"], ds_in["lat"])

    ds_in.attrs["foo"] = "bar"
    ds_in["fakevariable"].attrs["bar"] = "foo"

    in_url = "memory://test_regrid_attrs/an/input/path.zarr"
    domain_file_url = "memory://test_regrid_attrs/a/domainfile/path.zarr"
    out_url = "memory://test_regrid_attrs/an/output/path.zarr"
    repository.write(in_url, ds_in)
    repository.write(domain_file_url, domain_file)

    regrid(
        in_url,
        out=out_url,
        method="bilinear",
        domain_file=domain_file_url,
        astype="float32",
    )
    assert repository.read(out_url).attrs["foo"] == "bar"
    assert repository.read(out_url)["fakevariable"].attrs["bar"] == "foo"
Esempio n. 19
0
def test_apply_qdm():
    """Test to apply a trained QDM to input data and read the output.

    This is an integration test between train_qdm, apply_qdm.
    """
    # Setup input data.
    target_variable = "fakevariable"
    variable_kind = "additive"
    n_histdays = 10 * 365  # 10 years of daily historical.
    n_simdays = 50 * 365  # 50 years of daily simulation.

    model_bias = 2
    ts_ref = np.ones(n_histdays, dtype=np.float64)
    ts_sim = np.ones(n_simdays, dtype=np.float64)
    hist = _datafactory(ts_ref + model_bias, variable_name=target_variable)
    ref = _datafactory(ts_ref, variable_name=target_variable)
    sim = _datafactory(ts_sim + model_bias, variable_name=target_variable)

    # Load up a fake repo with our input data in the place of big data and cloud
    # storage.
    qdm_key = "memory://test_apply_qdm/qdm.zarr"
    hist_key = "memory://test_apply_qdm/hist.zarr"
    ref_key = "memory://test_apply_qdm/ref.zarr"
    sim_key = "memory://test_apply_qdm/sim.zarr"
    sim_adj_key = "memory://test_apply_qdm/sim_adjusted.zarr"
    repository.write(sim_key, sim)
    repository.write(hist_key, hist)
    repository.write(ref_key, ref)

    target_years = [1994, 1995]

    train_qdm(
        historical=hist_key,
        reference=ref_key,
        out=qdm_key,
        variable=target_variable,
        kind=variable_kind,
    )
    apply_qdm(
        simulation=sim_key,
        qdm=qdm_key,
        years=target_years,
        variable=target_variable,
        out=sim_adj_key,
    )
    adjusted_ds = repository.read(sim_adj_key)
    assert target_variable in adjusted_ds.variables
Esempio n. 20
0
def test_qplad_integration(kind):
    """Integration test of the QDM and QPLAD services"""
    lon = [-99.83, -99.32, -99.79, -99.23]
    lat = [42.25, 42.21, 42.63, 42.59]
    time = xr.cftime_range(start="1994-12-17", end="2015-01-15", calendar="noleap")
    data_ref = 15 + 8 * np.random.randn(len(time), 4, 4)
    data_train = 15 + 8 * np.random.randn(len(time), 4, 4)
    variable = "scen"

    ref_fine = xr.Dataset(
        data_vars=dict(
            scen=(["time", "lat", "lon"], data_ref),
        ),
        coords=dict(
            time=time,
            lon=(["lon"], lon),
            lat=(["lat"], lat),
        ),
        attrs=dict(description="Weather related data."),
    )

    ds_train = xr.Dataset(
        data_vars=dict(
            scen=(["time", "lat", "lon"], data_train),
        ),
        coords=dict(
            time=time,
            lon=(["lon"], lon),
            lat=(["lat"], lat),
        ),
        attrs=dict(description="Weather related data."),
    )

    # take the mean across space to represent coarse reference data for AFs
    ds_ref_coarse = ref_fine.mean(["lat", "lon"])
    ds_train = ds_train.mean(["lat", "lon"])

    # tile the fine resolution grid with the coarse resolution ref data
    ref_coarse = ds_ref_coarse.broadcast_like(ref_fine)
    ds_bc = ds_train + 3

    # need to set variable units to pass xclim 0.29 check on units
    ds_train["scen"].attrs["units"] = "K"
    ds_bc["scen"].attrs["units"] = "K"
    ref_coarse["scen"].attrs["units"] = "K"
    ref_fine["scen"].attrs["units"] = "K"
    ds_ref_coarse["scen"].attrs["units"] = "K"

    # write test data
    ref_coarse_coarse_url = (
        "memory://test_qplad_downscaling/a/ref_coarse_coarse/path.zarr"
    )
    ref_coarse_url = "memory://test_qplad_downscaling/a/ref_coarse/path.zarr"
    ref_fine_url = "memory://test_qplad_downscaling/a/ref_fine/path.zarr"
    qdm_train_url = "memory://test_qplad_downscaling/a/qdm_train/path.zarr"
    sim_url = "memory://test_qplad_downscaling/a/sim/path.zarr"
    qdm_train_out_url = "memory://test_qplad_downscaling/a/qdm_train_out/path.zarr"
    biascorrected_url = "memory://test_qplad_downscaling/a/biascorrected/path.zarr"
    sim_biascorrected_key = (
        "memory://test_qplad_downscaling/a/biascorrected/sim_biascorrected.zarr"
    )

    repository.write(ref_coarse_coarse_url, ds_ref_coarse)
    repository.write(
        ref_coarse_url,
        ref_coarse.chunk({"time": -1, "lat": -1, "lon": -1}),
    )
    repository.write(
        ref_fine_url,
        ref_fine.chunk({"time": -1, "lat": -1, "lon": -1}),
    )
    repository.write(qdm_train_url, ds_train)
    repository.write(sim_url, ds_bc)

    # this is an integration test between QDM and QPLAD, so use QDM services
    # for bias correction
    target_year = 2005

    train_qdm(
        historical=qdm_train_url,
        reference=ref_coarse_coarse_url,
        out=qdm_train_out_url,
        variable=variable,
        kind=kind,
    )
    apply_qdm(
        simulation=sim_url,
        qdm=qdm_train_out_url,
        years=[target_year],
        variable=variable,
        out=sim_biascorrected_key,
    )
    biascorrected_coarse = repository.read(sim_biascorrected_key)
    # make bias corrected data on the fine resolution grid
    biascorrected_fine = biascorrected_coarse.broadcast_like(
        ref_fine.sel(
            time=slice("{}-01-01".format(target_year), "{}-12-31".format(target_year))
        )
    )
    repository.write(
        biascorrected_url,
        biascorrected_fine.chunk({"time": -1, "lat": -1, "lon": -1}),
    )

    # write test data
    qplad_afs_url = "memory://test_qplad_downscaling/a/qplad_afs/path.zarr"

    # Writes NC to local disk, so diff format here:
    sim_downscaled_url = "memory://test_qplad_downscaling/a/qplad_afs/downscaled.zarr"

    # now train QPLAD model
    train_qplad(ref_coarse_url, ref_fine_url, qplad_afs_url, variable, kind)

    # downscale
    apply_qplad(biascorrected_url, qplad_afs_url, variable, sim_downscaled_url)

    # check output
    downscaled_ds = repository.read(sim_downscaled_url)

    # check that downscaled average equals bias corrected value
    bc_timestep = biascorrected_fine[variable].isel(time=100).values[0][0]
    qplad_downscaled_mean = downscaled_ds[variable].isel(time=100).mean().values
    np.testing.assert_almost_equal(bc_timestep, qplad_downscaled_mean)
Esempio n. 21
0
def test_get_attrs_global():
    """Test that services.get_attrs returns json of global attrs"""
    url = "memory://test_get_attrs_global/x.zarr"
    repository.write(url, xr.Dataset({"bar": "SPAM"}, attrs={"fish": "chips"}))
    out = get_attrs(url)
    assert out == '{"fish": "chips"}'
Esempio n. 22
0
def test_prime_qdm_output_zarrstore():
    """
    Test that prime_qdm_output_zarrstore creates a Zarr with variables, shapes, attrs.

    We're testing this by running QDM (train + apply) in it's usualy mode and
    then using comparable parameters to prime a Zarr Store. We then compare
    the two.
    """
    # Setup input data.
    quantile_variable = "sim_q"
    target_variable = "fakevariable"
    variable_kind = "additive"
    n_histdays = 10 * 365  # 10 years of daily historical.
    n_simdays = 50 * 365  # 50 years of daily simulation.

    model_bias = 2
    ts_ref = np.ones(n_histdays, dtype=np.float64)
    ts_sim = np.ones(n_simdays, dtype=np.float64)
    hist = _datafactory(ts_ref + model_bias, variable_name=target_variable)
    ref = _datafactory(ts_ref, variable_name=target_variable)
    sim = _datafactory(ts_sim + model_bias, variable_name=target_variable)

    # Load up a fake repo with our input data in the place of big data and cloud
    # storage.
    qdm_key = "memory://test_prime_qdm_output_zarrstore/qdm.zarr"
    hist_key = "memory://test_prime_qdm_output_zarrstore/hist.zarr"
    ref_key = "memory://test_prime_qdm_output_zarrstore/ref.zarr"
    sim_key = "memory://test_prime_qdm_output_zarrstore/sim.zarr"
    sim_adj_key = "memory://test_prime_qdm_output_zarrstore/sim_adjusted.zarr"
    primed_url = "memory://test_prime_qdm_output_zarrstore/primed.zarr"

    repository.write(sim_key, sim)
    repository.write(hist_key, hist)
    repository.write(ref_key, ref)

    target_year = 1995

    # Lets prime a QDM output.
    prime_qdm_output_zarrstore(
        simulation=sim_key,
        variable=target_variable,
        years=[target_year],
        out=primed_url,
        zarr_region_dims=["lat"],
    )

    primed_ds = repository.read(primed_url)

    # Now train, apply actual QDM and compare outputs with primed Zarr Store
    train_qdm(
        historical=hist_key,
        reference=ref_key,
        out=qdm_key,
        variable=target_variable,
        kind=variable_kind,
    )
    apply_qdm(
        simulation=sim_key,
        qdm=qdm_key,
        years=[target_year],
        variable=target_variable,
        out=sim_adj_key,
    )
    adjusted_ds = repository.read(sim_adj_key)

    # Desired variables present?
    assert (
        quantile_variable in primed_ds.variables
        and target_variable in primed_ds.variables
    )
    # Desired shapes with dims in correct order?
    assert primed_ds[quantile_variable].shape == primed_ds[target_variable].shape
    assert primed_ds[target_variable].shape == adjusted_ds[target_variable].shape
    # Output attrs matching for root and variables?
    assert primed_ds.attrs == adjusted_ds.attrs
    assert primed_ds[target_variable].attrs == adjusted_ds[target_variable].attrs
    assert primed_ds[quantile_variable].attrs == adjusted_ds[quantile_variable].attrs
Esempio n. 23
0
def test_prime_qdm_regional_apply():
    """
    Integration test checking that prime_qdm_output_zarrstore and apply_qdm can write regionally.

    The strategy is to create input data for two latitudes and run two QDMs
    (train + apply). One doing a "vanilla", global QDM. The other using a
    "regional" strategy: training and applying QDM on each latitude and then
    writing each to the same, primed zarr store. We then compare output from
    the vanilla and regional approaches.
    """
    # Setup input data.
    target_variable = "fakevariable"
    variable_kind = "additive"
    n_histdays = 10 * 365  # 10 years of daily historical.
    n_simdays = 50 * 365  # 50 years of daily simulation.

    model_bias = 2
    ts_ref = np.ones(n_histdays, dtype=np.float64)
    ts_sim = np.ones(n_simdays, dtype=np.float64)
    hist = _datafactory(ts_ref + model_bias, variable_name=target_variable)
    ref = _datafactory(ts_ref, variable_name=target_variable)
    sim = _datafactory(ts_sim + model_bias, variable_name=target_variable)

    # Append a copy of the data onto a new latitude of "2.0". I'm too lazy to
    # modify the data factories to get this. Gives us a way to test regional
    # writes.
    sim = xr.concat([sim, sim.assign({"lat": np.array([2.0])})], dim="lat")
    sim[target_variable][
        :, :, -1
    ] += 1  # Introducing a slight difference for different lat.
    ref = xr.concat([ref, ref.assign({"lat": np.array([2.0])})], dim="lat")
    hist = xr.concat([hist, hist.assign({"lat": np.array([2.0])})], dim="lat")

    # Datafactory appends cruft "index" coordinate. We're removing it because we
    # dont need it and I'm too lazy to tinker with input data fixtures.
    hist = hist.drop_vars("index")
    ref = ref.drop_vars("index")
    sim = sim.drop_vars("index")

    # Load up a fake repo with our input data in the place of big data and cloud
    # storage.
    qdm_key = "memory://test_apply_qdm/qdm_global.zarr"
    qdm_region1_key = "memory://test_apply_qdm/qdm_region1.zarr"
    qdm_region2_key = "memory://test_apply_qdm/qdm_region2.zarr"
    hist_key = "memory://test_apply_qdm/hist.zarr"
    ref_key = "memory://test_apply_qdm/ref.zarr"
    sim_key = "memory://test_apply_qdm/sim.zarr"
    primed_url = "memory://test_prime_qdm_output_zarrstore/primed.zarr"
    sim_adj_key = "memory://test_apply_qdm/sim_adjusted.zarr"

    repository.write(sim_key, sim)
    repository.write(hist_key, hist)
    repository.write(ref_key, ref)

    target_years = [1994, 1995]

    # Lets prime a QDM output.
    prime_qdm_output_zarrstore(
        simulation=sim_key,
        variable=target_variable,
        years=target_years,
        out=primed_url,
        zarr_region_dims=["lat"],
    )

    # Now train, apply QDM for two cases. One with region write to primed
    # zarr and one without.

    # Writing to regions
    region_1 = {"lat": slice(0, 1)}
    train_qdm(
        historical=hist_key,
        reference=ref_key,
        out=qdm_region1_key,
        variable=target_variable,
        kind=variable_kind,
        isel_slice=region_1,
    )
    apply_qdm(
        simulation=sim_key,
        qdm=qdm_region1_key,
        years=target_years,
        variable=target_variable,
        out=primed_url,
        isel_slice=region_1,
        out_zarr_region=region_1,
    )
    region_2 = {"lat": slice(1, 2)}
    train_qdm(
        historical=hist_key,
        reference=ref_key,
        out=qdm_region2_key,
        variable=target_variable,
        kind=variable_kind,
        isel_slice=region_2,
    )
    apply_qdm(
        simulation=sim_key,
        qdm=qdm_region2_key,
        years=target_years,
        variable=target_variable,
        out=primed_url,
        isel_slice=region_2,
        out_zarr_region=region_2,
    )
    primed_adjusted_ds = repository.read(primed_url)

    # Doing it globally, all "regions" at once.
    train_qdm(
        historical=hist_key,
        reference=ref_key,
        out=qdm_key,
        variable=target_variable,
        kind=variable_kind,
    )
    apply_qdm(
        simulation=sim_key,
        qdm=qdm_key,
        years=target_years,
        variable=target_variable,
        out=sim_adj_key,
    )
    adjusted_ds = repository.read(sim_adj_key)

    # Desired variables present?
    xr.testing.assert_allclose(primed_adjusted_ds, adjusted_ds)