def test_calc_perc(self, transpose): ens = ensembles.create_ensemble(self.nc_datasets_simple) if transpose: ens = ens.transpose() out1 = ensembles.ensemble_percentiles(ens, split=True) np.testing.assert_array_equal( np.percentile(ens["tg_mean"].isel(time=0, lon=5, lat=5), 10), out1["tg_mean_p10"].isel(time=0, lon=5, lat=5), ) np.testing.assert_array_equal( np.percentile(ens["tg_mean"].isel(time=0, lon=5, lat=5), 50), out1["tg_mean_p50"].isel(time=0, lon=5, lat=5), ) np.testing.assert_array_equal( np.percentile(ens["tg_mean"].isel(time=0, lon=5, lat=5), 90), out1["tg_mean_p90"].isel(time=0, lon=5, lat=5), ) assert np.all(out1["tg_mean_p90"] > out1["tg_mean_p50"]) assert np.all(out1["tg_mean_p50"] > out1["tg_mean_p10"]) out2 = ensembles.ensemble_percentiles(ens, values=(25, 75)) assert np.all(out2["tg_mean_p75"] > out2["tg_mean_p25"]) assert "Computation of the percentiles on" in out1.attrs[ "xclim_history"] out3 = ensembles.ensemble_percentiles(ens, split=False) xr.testing.assert_equal(out1["tg_mean_p10"], out3.tg_mean.sel(percentiles=10, drop=True))
def test_calc_perc_dask(self, keep_chunk_size): ens = ensembles.create_ensemble(self.nc_datasets_simple) out2 = ensembles.ensemble_percentiles(ens.chunk({"time": 2}), keep_chunk_size=keep_chunk_size, split=False) out1 = ensembles.ensemble_percentiles(ens.load(), split=False) np.testing.assert_array_equal(out1["tg_mean"], out2["tg_mean"])
def test_calc_perc_dask(self, keep_chunk_size): ens = ensembles.create_ensemble(self.nc_files_simple) out2 = ensembles.ensemble_percentiles( ens.chunk({"time": 2}), values=(10, 50, 90), keep_chunk_size=keep_chunk_size ) out1 = ensembles.ensemble_percentiles(ens.load()) np.testing.assert_array_equal(out1["tg_mean_p10"], out2["tg_mean_p10"]) np.testing.assert_array_equal(out1["tg_mean_p50"], out2["tg_mean_p50"]) np.testing.assert_array_equal(out1["tg_mean_p90"], out2["tg_mean_p90"])
def test_calc_perc_blocks(self): ens = ensembles.create_ensemble(self.nc_files_simple) out1 = ensembles.ensemble_percentiles(ens) out2 = ensembles.ensemble_percentiles(ens, values=(10, 50, 90), time_block=10) np.testing.assert_array_equal(out1["tg_mean_p10"], out2["tg_mean_p10"]) np.testing.assert_array_equal(out1["tg_mean_p50"], out2["tg_mean_p50"]) np.testing.assert_array_equal(out1["tg_mean_p90"], out2["tg_mean_p90"])
def test_calc_perc(self): ens = ensembles.create_ensemble(self.nc_files_simple) out1 = ensembles.ensemble_percentiles(ens) np.testing.assert_array_equal( np.percentile(ens["tg_mean"][:, 0, 5, 5], 10), out1["tg_mean_p10"][0, 5, 5]) np.testing.assert_array_equal( np.percentile(ens["tg_mean"][:, 0, 5, 5], 50), out1["tg_mean_p50"][0, 5, 5]) np.testing.assert_array_equal( np.percentile(ens["tg_mean"][:, 0, 5, 5], 90), out1["tg_mean_p90"][0, 5, 5]) assert np.all(out1["tg_mean_p90"] > out1["tg_mean_p50"]) assert np.all(out1["tg_mean_p50"] > out1["tg_mean_p10"]) out1 = ensembles.ensemble_percentiles(ens, values=(25, 75)) assert np.all(out1["tg_mean_p75"] > out1["tg_mean_p25"])
def make_ensemble(files: List[Path], percentiles: List[int], average_dims: Optional[Tuple[str]] = None) -> None: ensemble = ensembles.create_ensemble(files) # make sure we have data starting in 1950 ensemble = ensemble.sel(time=(ensemble.time.dt.year >= 1950)) # If data is in day of year, percentiles won't make sense. # Convert to "days since" (base will be the time coordinate) for v in ensemble.data_vars: if ensemble[v].attrs.get('is_dayofyear', 0) == 1: ensemble[v] = doy_to_days_since(ensemble[v]) if average_dims is not None: ensemble = ensemble.mean(dim=average_dims) ensemble_percentiles = ensembles.ensemble_percentiles(ensemble, values=percentiles) # Doy data converted previously is converted back. for v in ensemble_percentiles.data_vars: if ensemble_percentiles[v].attrs.get('units', '').startswith('days after'): ensemble_percentiles[v] = days_since_to_doy( ensemble_percentiles[v]) # Depending on the datasets, I've found that writing the netcdf could hang # if the dataset was not loaded explicitely previously... Not sure why. # The datasets should be pretty small when computing the ensembles, so this is # a best effort at working around what looks like a bug in either xclim or xarray. # The xarray documentation mentions: 'this method can be necessary when working # with many file objects on disk.' ensemble_percentiles.load() return ensemble_percentiles
def test_calc_perc(self, transpose): ens = ensembles.create_ensemble(self.nc_files_simple) if transpose: ens = ens.transpose() out1 = ensembles.ensemble_percentiles(ens) np.testing.assert_array_equal( np.percentile(ens["tg_mean"].isel(time=0, lon=5, lat=5), 10), out1["tg_mean_p10"].isel(time=0, lon=5, lat=5), ) np.testing.assert_array_equal( np.percentile(ens["tg_mean"].isel(time=0, lon=5, lat=5), 50), out1["tg_mean_p50"].isel(time=0, lon=5, lat=5), ) np.testing.assert_array_equal( np.percentile(ens["tg_mean"].isel(time=0, lon=5, lat=5), 90), out1["tg_mean_p90"].isel(time=0, lon=5, lat=5), ) assert np.all(out1["tg_mean_p90"] > out1["tg_mean_p50"]) assert np.all(out1["tg_mean_p50"] > out1["tg_mean_p10"]) out1 = ensembles.ensemble_percentiles(ens, values=(25, 75)) assert np.all(out1["tg_mean_p75"] > out1["tg_mean_p25"])
def test_calc_perc_nans(self): ens = ensembles.create_ensemble(self.nc_datasets_simple).load() ens.tg_mean[2, 0, 5, 5] = np.nan ens.tg_mean[2, 7, 5, 5] = np.nan out1 = ensembles.ensemble_percentiles(ens, split=True) masked_arr = np.ma.fix_invalid(ens["tg_mean"][:, 0, 5, 5]) np.testing.assert_array_almost_equal( mquantiles(masked_arr, 0.10, alphap=1, betap=1), out1["tg_mean_p10"][0, 5, 5], ) masked_arr = np.ma.fix_invalid(ens["tg_mean"][:, 7, 5, 5]) np.testing.assert_array_almost_equal( mquantiles(masked_arr, 0.10, alphap=1, betap=1), out1["tg_mean_p10"][7, 5, 5], ) assert np.all(out1["tg_mean_p90"] > out1["tg_mean_p50"]) assert np.all(out1["tg_mean_p50"] > out1["tg_mean_p10"])
def test_calc_perc_nans(self): ens = ensembles.create_ensemble(self.nc_files_simple).load() ens.tg_mean[2, 0, 5, 5] = np.nan ens.tg_mean[2, 7, 5, 5] = np.nan out1 = ensembles.ensemble_percentiles(ens) np.testing.assert_array_equal( np.percentile(ens['tg_mean'][:, 0, 5, 5], 10), np.nan) np.testing.assert_array_equal( np.percentile(ens['tg_mean'][:, 7, 5, 5], 10), np.nan) np.testing.assert_array_equal( np.nanpercentile(ens['tg_mean'][:, 0, 5, 5], 10), out1['tg_mean_p10'][0, 5, 5]) np.testing.assert_array_equal( np.nanpercentile(ens['tg_mean'][:, 7, 5, 5], 10), out1['tg_mean_p10'][7, 5, 5]) assert np.all(out1['tg_mean_p90'] > out1['tg_mean_p50']) assert np.all(out1['tg_mean_p50'] > out1['tg_mean_p10'])
def test_calc_perc_nans(self): ens = ensembles.create_ensemble(self.nc_datasets_simple).load() ens.tg_mean[2, 0, 5, 5] = np.nan ens.tg_mean[2, 7, 5, 5] = np.nan out1 = ensembles.ensemble_percentiles(ens, split=True) np.testing.assert_array_equal( np.percentile(ens["tg_mean"][:, 0, 5, 5], 10), np.nan) np.testing.assert_array_equal( np.percentile(ens["tg_mean"][:, 7, 5, 5], 10), np.nan) np.testing.assert_array_equal( np.nanpercentile(ens["tg_mean"][:, 0, 5, 5], 10), out1["tg_mean_p10"][0, 5, 5], ) np.testing.assert_array_equal( np.nanpercentile(ens["tg_mean"][:, 7, 5, 5], 10), out1["tg_mean_p10"][7, 5, 5], ) assert np.all(out1["tg_mean_p90"] > out1["tg_mean_p50"]) assert np.all(out1["tg_mean_p50"] > out1["tg_mean_p10"])
def exp_xcdef(ds, percentiles): return xcens.ensemble_percentiles(ds, percentiles)