def make_climpred_hindcast_object(hindcast, observations): """ This function takes a hindcasting dataset of streamflow as well as associated observations and creates a hindcasting object that can be used by the climpred toolbox for hindcast verification. Parameters ---------- hindcast : xarray.Dataset The hindcasting streamflow data for a given period observations : xarray.Dataset The streamflow observations that are used to verify the hindcasts Returns ------- hindcast_obj : climpred.HindcastEnsemble object The hindcast ensemble formatted to be used in climpred. """ # Todo: Add verification that the variable names are the same # Todo: Add verification of sizes, catch and return message. # Make the hindcastEnsemble object for the hindcast data hindcast_obj = HindcastEnsemble(hindcast) # Add the observations to that hindcastEnsemble object for verification. hindcast_obj = hindcast_obj.add_observations(observations) return hindcast_obj
def test_HindcastEnsemble_instantiating_standard_name(da_lead, dim, new_dim, cf_standard_name): """Test PredictionEnsemble without init only works with matching standard name.""" init = (da_lead.to_dataset(name="var").expand_dims("member").assign_coords( member=[1])) init["init"] = xr.cftime_range(start="2000", periods=init.init.size, freq="YS") init["lead"].attrs["units"] = "years" # change to non CLIMPRED_DIMS init = init.rename({dim: new_dim}) if dim != "member": # member not required with pytest.raises( DimensionError, match= "PredictionEnsemble object must contain the following dimensions", ): HindcastEnsemble(init) init[new_dim].attrs["standard_name"] = cf_standard_name # find renamed after warning with pytest.warns(UserWarning, match="but renamed dimension"): init = HindcastEnsemble(init).get_initialized() assert dim in init.dims, print(init.dims, init.coords)
def test_verify_single(hind_ds_initialized_1d, reconstruction_ds_1d): """Test to see if verify automatically works with a single observational product.""" hindcast = HindcastEnsemble(hind_ds_initialized_1d) hindcast = hindcast.add_observations(reconstruction_ds_1d, 'reconstruction') hindcast.verify()
def test_HindcastEnsemble_verify_lead0_lead1(hindcast_hist_obs_1d, hind_ds_initialized_1d_lead0): """ Checks that HindcastEnsemble.verify() returns the same results with a lead-0 and lead-1 framework. """ kw = dict(metric="rmse", comparison="e2o", dim="init", alignment="same_verifs") res = hindcast_hist_obs_1d.verify(**kw) hind_shifted_lead = HindcastEnsemble( hind_ds_initialized_1d_lead0).add_observations( hindcast_hist_obs_1d.get_observations()) print( hindcast_hist_obs_1d.plot_alignment(return_xr=True, alignment=kw["alignment"], reference=[])) print( hind_shifted_lead.plot_alignment(return_xr=True, alignment=kw["alignment"], reference=[])) res_lead_init_shift = hind_shifted_lead.verify(**kw) assert res.equals(res_lead_init_shift), print(res, res_lead_init_shift, res - res_lead_init_shift, sep="\n")
def test_hindcastEnsemble_operator_different_datasets(hind_ds_initialized_1d, observations_ds_1d): """Test that HindcastEnsemble math operator (+-*/) on HindcastEnsemble.""" he = HindcastEnsemble(hind_ds_initialized_1d) he = he.add_observations(observations_ds_1d) he2 = HindcastEnsemble(hind_ds_initialized_1d) assert not (he2 - he).equals(he2) assert not (he - he2).equals(he)
def hindcast_recon_3d(hind_ds_initialized_3d, reconstruction_ds_3d): """HindcastEnsemble initialized with `initialized`, `reconstruction`(`recon`).""" hindcast = HindcastEnsemble(hind_ds_initialized_3d) hindcast = hindcast.add_observations(reconstruction_ds_3d) hindcast = hindcast - hindcast.sel( time=slice("1964", "2014")).mean("time").sel( init=slice("1964", "2014")).mean("init") return hindcast
def test_verify(hind_ds_initialized_1d, reconstruction_ds_1d): """Test to see if verify automatically works.""" hindcast = HindcastEnsemble(hind_ds_initialized_1d) hindcast = hindcast.add_observations(reconstruction_ds_1d) hindcast.verify(metric="acc", comparison="e2o", dim="init", alignment="same_verif")
def hindcast_recon_1d_mm(hindcast_recon_1d_ym, reconstruction_ds_1d_mm): """HindcastEnsemble with initialized and reconstruction (observations) as a monthly observational and initialized time series (no grid).""" hind = hindcast_recon_1d_ym.get_initialized().sel( init=slice("1964", "1970")) del hind.coords["valid_time"] hind["lead"].attrs["units"] = "months" hindcast = HindcastEnsemble(hind) hindcast = hindcast.add_observations(reconstruction_ds_1d_mm) return hindcast
def hindcast_hist_obs_1d(hind_ds_initialized_1d, hist_ds_uninitialized_1d, observations_ds_1d): """HindcastEnsemble initialized with `initialized`, `uninitialzed` and `obs`.""" hindcast = HindcastEnsemble(hind_ds_initialized_1d) hindcast = hindcast.add_uninitialized(hist_ds_uninitialized_1d) hindcast = hindcast.add_observations(observations_ds_1d) hindcast = hindcast - hindcast.sel( time=slice("1964", "2014")).mean("time").sel( init=slice("1964", "2014")).mean("init") return hindcast
def test_get_observations(hind_ds_initialized_1d, reconstruction_ds_1d): """Tests whether get_observations method works.""" hindcast = HindcastEnsemble(hind_ds_initialized_1d) hindcast = hindcast.add_observations(reconstruction_ds_1d, 'FOSI') # Without name keyword. obs = hindcast.get_observations() assert obs == hindcast._datasets['observations']['FOSI'] # With name keyword. obs = hindcast.get_observations('FOSI') assert obs == hindcast._datasets['observations']['FOSI']
def hindcast_recon_3d(hind_ds_initialized_3d, reconstruction_ds_3d): """HindcastEnsemble initialized with `initialized`, `reconstruction`(`recon`).""" # fix to align coords for c in ["TLAT", "TLONG", "TAREA"]: reconstruction_ds_3d[c] = hind_ds_initialized_3d[c] hindcast = HindcastEnsemble(hind_ds_initialized_3d) hindcast = hindcast.add_observations(reconstruction_ds_3d) hindcast = hindcast - hindcast.sel(time=slice("1964", "2014")).mean("time").sel( init=slice("1964", "2014") ).mean("init") return hindcast
def test_smooth_coarsen(reconstruction_ds_3d, hind_ds_initialized_3d): """Test whether coarsening reduces dim.size.""" hindcast = HindcastEnsemble(hind_ds_initialized_3d) hindcast = hindcast.add_observations(reconstruction_ds_3d, 'reconstruction') hindcast = hindcast.add_uninitialized(reconstruction_ds_3d) initialized_before = hindcast._datasets['initialized'] dim = 'nlon' hindcast = hindcast.smooth(smooth_kws={dim: 2}) actual_initialized = hindcast._datasets['initialized'] assert initialized_before[dim].size // 2 == actual_initialized[dim].size
def test_calendar_matching_uninitialized(hind_ds_initialized_1d, hist_ds_uninitialized_1d): """Tests that error is thrown if calendars mismatch when adding uninitialized.""" hindcast = HindcastEnsemble(hind_ds_initialized_1d) hist_ds_uninitialized_1d["time"] = xr.cftime_range( start="1950", periods=hist_ds_uninitialized_1d.time.size, freq="MS", calendar="all_leap", ) with pytest.raises(ValueError, match="does not match"): hindcast.add_uninitialized(hist_ds_uninitialized_1d)
def hindcast_recon_1d_ym(hind_ds_initialized_1d, reconstruction_ds_1d): """HindcastEnsemble initialized with `initialized` and `recon`.""" hindcast = HindcastEnsemble(hind_ds_initialized_1d).add_observations( reconstruction_ds_1d) hindcast = hindcast - hindcast.sel( time=slice("1964", "2014")).mean("time").sel( init=slice("1964", "2014")).mean("init") hindcast._datasets["initialized"]["SST"].attrs = hind_ds_initialized_1d[ "SST"].attrs hindcast._datasets["observations"]["SST"].attrs = reconstruction_ds_1d[ "SST"].attrs return hindcast
def test_calendar_matching_observations(hind_ds_initialized_1d, reconstruction_ds_1d): """Tests that error is thrown if calendars mismatch when adding observations.""" hindcast = HindcastEnsemble(hind_ds_initialized_1d) reconstruction_ds_1d["time"] = xr.cftime_range( start="1950", periods=reconstruction_ds_1d.time.size, freq="MS", calendar="all_leap", ) with pytest.raises(ValueError, match="does not match"): hindcast.add_observations(reconstruction_ds_1d)
def test_verify(hind_ds_initialized_1d, reconstruction_ds_1d, observations_ds_1d): """Test to see if verify can be run from the HindcastEnsemble""" hindcast = HindcastEnsemble(hind_ds_initialized_1d) hindcast = hindcast.add_observations(reconstruction_ds_1d, 'reconstruction') hindcast = hindcast.add_observations(observations_ds_1d, 'observations') # Don't need to check for NaNs, etc. since that's handled in the prediction # module testing. hindcast.verify() # compute over all observations hindcast.verify('reconstruction') # compute over single observation # test all keywords hindcast.verify(metric='rmse', comparison='m2o')
def test_warn_if_chunked_along_init_member_time( hindcast_hist_obs_1d, perfectModelEnsemble_initialized_control): """Test that _warn_if_chunked_along_init_member_time warns.""" he = hindcast_hist_obs_1d with pytest.warns(UserWarning, match="is chunked along dimensions"): he_chunked = HindcastEnsemble(he.get_initialized().chunk( {"init": 10})).add_observations(he.get_observations()) with pytest.raises( ValueError, match="pass ``allow_rechunk=True`` in ``dask_gufunc_kwargs``"): he_chunked.verify(metric="rmse", dim="init", comparison="e2o", alignment="same_inits") with pytest.warns(UserWarning, match="is chunked along dimensions"): he_chunked = HindcastEnsemble(he.get_initialized()).add_observations( he.get_observations().chunk({"time": 10})) with pytest.raises( ValueError, match="pass ``allow_rechunk=True`` in ``dask_gufunc_kwargs``"): he_chunked.verify(metric="rmse", dim="init", comparison="e2o", alignment="same_inits") pm = perfectModelEnsemble_initialized_control with pytest.warns(UserWarning, match="is chunked along dimensions"): PerfectModelEnsemble(pm.get_initialized().chunk({"init": 10}))
def test_HindcastEnsemble_lead_pdTimedelta(hind_ds_initialized_1d, lead_res): """Test to see HindcastEnsemble can be initialized with lead as pd.Timedelta.""" if lead_res == "pentads": n, freq = 5, "d" else: n, freq = 1, lead_res[0].lower() initialized = hind_ds_initialized_1d initialized["lead"] = [ pd.Timedelta(f"{i*n} {freq}") for i in initialized.lead.values ] hindcast = HindcastEnsemble(initialized) assert hindcast.get_initialized().lead.attrs["units"] == lead_res
def test_smooth_goddard(reconstruction_ds_3d, hind_ds_initialized_3d): """Test whether goddard smoothing function reduces ntime.""" hindcast = HindcastEnsemble( hind_ds_initialized_3d.isel(nlat=slice(1, None))) hindcast = hindcast.add_observations( reconstruction_ds_3d.isel(nlat=slice(1, None)), 'reconstruction') hindcast = hindcast.add_uninitialized( reconstruction_ds_3d.isel(nlat=slice(1, None))) initialized_before = hindcast._datasets['initialized'] hindcast = hindcast.smooth(smooth_kws='goddard2013') actual_initialized = hindcast._datasets['initialized'] dim = 'lead' assert actual_initialized[dim].size < initialized_before[dim].size for dim in ['nlon', 'nlat']: assert actual_initialized[dim[1:]].size < initialized_before[dim].size
def HindcastEnsemble_time_resolution(request): """Create HindcastEnsemble of given lead time resolution.""" if request.param == "pentads": freq = "5D" elif request.param == "weeks": freq = "7D" elif request.param == "minutes": freq = "T" elif request.param in "months": freq = "MS" elif request.param == "seasons": freq = "QS" elif request.param == "years": freq = "YS" else: freq = request.param[0].upper() # create initialized init = xr.cftime_range(START, freq=freq, periods=NINITS) lead = np.arange(NLEADS) member = np.arange(NMEMBERS) initialized = xr.DataArray( np.random.rand(len(init), len(lead), len(member)), dims=["init", "lead", "member"], coords=[init, lead, member], ).to_dataset(name="var") initialized.lead.attrs["units"] = request.param # create observations time = xr.cftime_range(START, freq=freq, periods=NINITS + NLEADS) obs = xr.DataArray(np.random.rand(len(time)), dims=["time"], coords=[time]).to_dataset(name="var") # climpred.PredictionEnsemble hindcast = HindcastEnsemble(initialized).add_observations(obs) return hindcast
def test_hindcastEnsemble_init_time(init_freq, lead_unit, calendar): """Test to see HindcastEnsemble can be initialized and creates valid_time coordinate depending on init and lead for different calendars and lead units.""" p = 3 nlead = 2 lead = [0, 1] init = xr.cftime_range(start="2000", freq=init_freq, periods=p) data = np.random.rand(p, nlead) init = xr.DataArray( data, dims=["init", "lead"], coords={ "init": init, "lead": lead }, name="initialized", ) init.lead.attrs["units"] = lead_unit coords = HindcastEnsemble(init).coords assert "valid_time" in coords assert (coords["valid_time"].isel(lead=0, drop=True) == coords["init"]).all() assert (coords["valid_time"].isel(lead=1, drop=True) != coords["init"]).all()
def get_data(self): _skip_slow() init = load_dataset("ECMWF_S2S_Germany").t2m.isel( lead=slice(None, None, 7)) obs = load_dataset("Observations_Germany").t2m self.PredictionEnsemble = (HindcastEnsemble(init).add_observations( obs).generate_uninitialized())
def test_HindcastEnsemble_multidim_initialized_lessdim_verif( hindcast_hist_obs_1d): """ Test HindcastEnsemble allow broadcast over dimensions in initialized only. But not in observations, e.g. model dim which is not available in observations. """ initialized = hindcast_hist_obs_1d.get_initialized() obs = hindcast_hist_obs_1d.get_observations() hind = HindcastEnsemble( initialized.expand_dims("model").isel(model=[0] * 2)).add_observations(obs) skill = hind.verify(metric="acc", dim="init", comparison="e2o", alignment="same_inits") assert "model" in skill.dims
def test_smooth_temporal(fosi_3d, dple_3d): """Test whether coarsening reduces dim.size.""" hindcast = HindcastEnsemble(dple_3d) hindcast.add_reference(fosi_3d, 'reconstruction') hindcast.add_uninitialized(fosi_3d) initialized_before = hindcast.initialized dim = 'lead' hindcast.smooth(smooth_kws={dim: 4}) actual_initialized = hindcast.initialized assert initialized_before[dim].size > actual_initialized[dim].size
def hindcast_NMME_Nino34(): """NMME hindcasts with monthly leads and monthly inits and related IOv2 observations for SST of the Nino34 region.""" init = load_dataset("NMME_hindcast_Nino34_sst") obs = load_dataset("NMME_OIv2_Nino34_sst") init["sst"].attrs["units"] = "C" obs["sst"].attrs["units"] = "C" return HindcastEnsemble(init).add_observations( obs.broadcast_like(init, exclude=("L", "S", "M")))
def test_HindcastEnsemble_area_weighted_mean(hind_ds_initialized_3d): """Test area weighted mean HindcastEnsemble.""" he = HindcastEnsemble(hind_ds_initialized_3d) # fake area area = hind_ds_initialized_3d["TAREA"] spatial_dims = [ d for d in hind_ds_initialized_3d.dims if d not in CLIMPRED_DIMS ] # PredictionEnsemble doesnt like other data_vars he_self_spatial_mean = (he * area).sum(spatial_dims) / area.sum() # weighted requires Dataset area = area.to_dataset(name="area") he_xr_spatial_mean = he.weighted(area).mean(spatial_dims) assert_PredictionEnsemble(he_self_spatial_mean, he_xr_spatial_mean, how="allclose", rtol=0.03, atol=0.05)
def setup(self, *args, **kwargs): self.get_data(spatial_res=360) self.PredictionEnsemble = (HindcastEnsemble( self.initialized).add_uninitialized( self.uninitialized).add_observations(self.observations)) self.alignment = "same_inits" self.resample_dim = "member" self.reference = None self.iterations = ITERATIONS
def test_smooth_goddard(fosi_3d, dple_3d): """Test whether goddard smoothing function reduces ntime.""" hindcast = HindcastEnsemble(dple_3d.isel(nlat=slice(1, None))) hindcast.add_reference(fosi_3d.isel(nlat=slice(1, None)), 'reconstruction') hindcast.add_uninitialized(fosi_3d.isel(nlat=slice(1, None))) initialized_before = hindcast.initialized hindcast.smooth(smooth_kws='goddard2013') actual_initialized = hindcast.initialized dim = 'lead' assert actual_initialized[dim].size < initialized_before[dim].size for dim in ['nlon', 'nlat']: assert actual_initialized[dim[1:]].size < initialized_before[dim].size
def test_HindcastEnsemble_multidim_verif_lessdim_initialized( hindcast_hist_obs_1d): """Test HindcastEnsemble.__init__ fails if obs has more dims than initialized.""" initialized = hindcast_hist_obs_1d.get_initialized() obs = hindcast_hist_obs_1d.get_observations() with pytest.raises( DimensionError, match="Verification contains more dimensions than initialized"): HindcastEnsemble(initialized).add_observations( obs.expand_dims("model").isel(model=[0] * 2))
def test_hindcastEnsemble_plus_broadcast(hind_ds_initialized_3d, operator): """Test that HindcastEnsemble math operator (+-*/) other also broadcasts correctly.""" he = HindcastEnsemble(hind_ds_initialized_3d) operator = eval(operator) # minimal adding an offset or like multiplying area he2 = operator( he, xr.ones_like(hind_ds_initialized_3d.isel(init=1, lead=1, drop=True))) he3 = operator(he, 1) assert_PredictionEnsemble(he2, he3)