def test_create_ensemble(self): ens = ensembles.create_ensemble(self.nc_files_simple) assert len(ens.realization) == len(self.nc_files_simple) # create again using xr.Dataset objects ds_all = [] for n in self.nc_files_simple: ds = xr.open_dataset(n, decode_times=False) ds["time"] = xr.decode_cf(ds).time ds_all.append(ds) ens1 = ensembles.create_ensemble(ds_all) coords = list(ens1.coords) coords.extend(list(ens1.data_vars)) for c in coords: np.testing.assert_array_equal(ens[c], ens1[c])
def test_calc_perc(self, transpose): ens = ensembles.create_ensemble(self.nc_datasets_simple) if transpose: ens = ens.transpose() out1 = ensembles.ensemble_percentiles(ens, split=True) np.testing.assert_array_equal( np.percentile(ens["tg_mean"].isel(time=0, lon=5, lat=5), 10), out1["tg_mean_p10"].isel(time=0, lon=5, lat=5), ) np.testing.assert_array_equal( np.percentile(ens["tg_mean"].isel(time=0, lon=5, lat=5), 50), out1["tg_mean_p50"].isel(time=0, lon=5, lat=5), ) np.testing.assert_array_equal( np.percentile(ens["tg_mean"].isel(time=0, lon=5, lat=5), 90), out1["tg_mean_p90"].isel(time=0, lon=5, lat=5), ) assert np.all(out1["tg_mean_p90"] > out1["tg_mean_p50"]) assert np.all(out1["tg_mean_p50"] > out1["tg_mean_p10"]) out2 = ensembles.ensemble_percentiles(ens, values=(25, 75)) assert np.all(out2["tg_mean_p75"] > out2["tg_mean_p25"]) assert "Computation of the percentiles on" in out1.attrs[ "xclim_history"] out3 = ensembles.ensemble_percentiles(ens, split=False) xr.testing.assert_equal(out1["tg_mean_p10"], out3.tg_mean.sel(percentiles=10, drop=True))
def make_ensemble(files: List[Path], percentiles: List[int], average_dims: Optional[Tuple[str]] = None) -> None: ensemble = ensembles.create_ensemble(files) # make sure we have data starting in 1950 ensemble = ensemble.sel(time=(ensemble.time.dt.year >= 1950)) # If data is in day of year, percentiles won't make sense. # Convert to "days since" (base will be the time coordinate) for v in ensemble.data_vars: if ensemble[v].attrs.get('is_dayofyear', 0) == 1: ensemble[v] = doy_to_days_since(ensemble[v]) if average_dims is not None: ensemble = ensemble.mean(dim=average_dims) ensemble_percentiles = ensembles.ensemble_percentiles(ensemble, values=percentiles) # Doy data converted previously is converted back. for v in ensemble_percentiles.data_vars: if ensemble_percentiles[v].attrs.get('units', '').startswith('days after'): ensemble_percentiles[v] = days_since_to_doy( ensemble_percentiles[v]) # Depending on the datasets, I've found that writing the netcdf could hang # if the dataset was not loaded explicitely previously... Not sure why. # The datasets should be pretty small when computing the ensembles, so this is # a best effort at working around what looks like a bug in either xclim or xarray. # The xarray documentation mentions: 'this method can be necessary when working # with many file objects on disk.' ensemble_percentiles.load() return ensemble_percentiles
def test_calc_perc_dask(self, keep_chunk_size): ens = ensembles.create_ensemble(self.nc_datasets_simple) out2 = ensembles.ensemble_percentiles(ens.chunk({"time": 2}), keep_chunk_size=keep_chunk_size, split=False) out1 = ensembles.ensemble_percentiles(ens.load(), split=False) np.testing.assert_array_equal(out1["tg_mean"], out2["tg_mean"])
def test_calc_perc_blocks(self): ens = ensembles.create_ensemble(self.nc_files_simple) out1 = ensembles.ensemble_percentiles(ens) out2 = ensembles.ensemble_percentiles(ens, values=(10, 50, 90), time_block=10) np.testing.assert_array_equal(out1["tg_mean_p10"], out2["tg_mean_p10"]) np.testing.assert_array_equal(out1["tg_mean_p50"], out2["tg_mean_p50"]) np.testing.assert_array_equal(out1["tg_mean_p90"], out2["tg_mean_p90"])
def test_calc_perc_dask(self, keep_chunk_size): ens = ensembles.create_ensemble(self.nc_files_simple) out2 = ensembles.ensemble_percentiles( ens.chunk({"time": 2}), values=(10, 50, 90), keep_chunk_size=keep_chunk_size ) out1 = ensembles.ensemble_percentiles(ens.load()) np.testing.assert_array_equal(out1["tg_mean_p10"], out2["tg_mean_p10"]) np.testing.assert_array_equal(out1["tg_mean_p50"], out2["tg_mean_p50"]) np.testing.assert_array_equal(out1["tg_mean_p90"], out2["tg_mean_p90"])
def test_create_unaligned_times(self, timegen, calkw): t1 = timegen("2000-01-01", periods=24, freq="M", **calkw) t2 = timegen("2000-01-01", periods=24, freq="MS", **calkw) d1 = xr.DataArray( np.arange(24), dims=("time",), coords={"time": t1}, name="tas" ) d2 = xr.DataArray( np.arange(24), dims=("time",), coords={"time": t2}, name="tas" ) if t1.dtype != "O": ens = ensembles.create_ensemble((d1, d2)) assert ens.time.size == 48 np.testing.assert_equal(ens.isel(time=0), [np.nan, 0]) ens = ensembles.create_ensemble((d1, d2), resample_freq="MS") assert ens.time.size == 24 np.testing.assert_equal(ens.isel(time=0), [0, 0])
def test_no_time(self): # create again using xr.Dataset objects ds_all = [] for n in self.nc_files_simple: ds = xr.open_dataset(n, decode_times=False) ds["time"] = xr.decode_cf(ds).time ds_all.append(ds.groupby(ds.time.dt.month).mean("time", keep_attrs=True)) ens = ensembles.create_ensemble(ds_all) assert len(ens.realization) == len(self.nc_files_simple)
def test_no_time(self, tmp_path): # create again using xr.Dataset objects f1 = Path(tmp_path / "notime") f1.mkdir() ds_all = [] for n in self.nc_files: ds = open_dataset(os.path.join("EnsembleStats", n), decode_times=False) ds["time"] = xr.decode_cf(ds).time ds_all.append( ds.groupby(ds.time.dt.month).mean("time", keep_attrs=True)) ds.groupby(ds.time.dt.month).mean("time", keep_attrs=True).to_netcdf( f1.joinpath(n)) ens = ensembles.create_ensemble(ds_all) assert len(ens.realization) == len(self.nc_files) in_ncs = list(Path(f1).glob("*.nc")) ens = ensembles.create_ensemble(in_ncs) assert len(ens.realization) == len(self.nc_files)
def test_create_ensemble(self): ens = ensembles.create_ensemble(self.nc_datasets_simple) assert len(ens.realization) == len(self.nc_datasets_simple) assert len(ens.time) == 151 # create again using xr.Dataset objects ds_all = [] for n in self.nc_files: ds = open_dataset(os.path.join("EnsembleStats", n), decode_times=False) ds["time"] = xr.decode_cf(ds).time ds_all.append(ds) ens1 = ensembles.create_ensemble(ds_all) coords = list(ens1.coords) coords.extend(list(ens1.data_vars)) for c in coords: np.testing.assert_array_equal(ens[c], ens1[c]) for i in np.arange(0, len(ens1.realization)): np.testing.assert_array_equal( ens1.isel(realization=i).tg_mean.values, ds_all[i].tg_mean.values)
def test_calc_mean_std_min_max(self): ens = ensembles.create_ensemble(self.nc_files_simple) out1 = ensembles.ensemble_mean_std_max_min(ens) np.testing.assert_array_equal( ens['tg_mean'][:, 0, 5, 5].mean(dim='realization'), out1.tg_mean_mean[0, 5, 5]) np.testing.assert_array_equal( ens['tg_mean'][:, 0, 5, 5].std(dim='realization'), out1.tg_mean_stdev[0, 5, 5]) np.testing.assert_array_equal( ens['tg_mean'][:, 0, 5, 5].max(dim='realization'), out1.tg_mean_max[0, 5, 5]) np.testing.assert_array_equal( ens['tg_mean'][:, 0, 5, 5].min(dim='realization'), out1.tg_mean_min[0, 5, 5])
def test_calc_perc(self): ens = ensembles.create_ensemble(self.nc_files_simple) out1 = ensembles.ensemble_percentiles(ens) np.testing.assert_array_equal( np.percentile(ens["tg_mean"][:, 0, 5, 5], 10), out1["tg_mean_p10"][0, 5, 5]) np.testing.assert_array_equal( np.percentile(ens["tg_mean"][:, 0, 5, 5], 50), out1["tg_mean_p50"][0, 5, 5]) np.testing.assert_array_equal( np.percentile(ens["tg_mean"][:, 0, 5, 5], 90), out1["tg_mean_p90"][0, 5, 5]) assert np.all(out1["tg_mean_p90"] > out1["tg_mean_p50"]) assert np.all(out1["tg_mean_p50"] > out1["tg_mean_p10"]) out1 = ensembles.ensemble_percentiles(ens, values=(25, 75)) assert np.all(out1["tg_mean_p75"] > out1["tg_mean_p25"])
def test_calc_perc_nans(self): ens = ensembles.create_ensemble(self.nc_datasets_simple).load() ens.tg_mean[2, 0, 5, 5] = np.nan ens.tg_mean[2, 7, 5, 5] = np.nan out1 = ensembles.ensemble_percentiles(ens, split=True) masked_arr = np.ma.fix_invalid(ens["tg_mean"][:, 0, 5, 5]) np.testing.assert_array_almost_equal( mquantiles(masked_arr, 0.10, alphap=1, betap=1), out1["tg_mean_p10"][0, 5, 5], ) masked_arr = np.ma.fix_invalid(ens["tg_mean"][:, 7, 5, 5]) np.testing.assert_array_almost_equal( mquantiles(masked_arr, 0.10, alphap=1, betap=1), out1["tg_mean_p10"][7, 5, 5], ) assert np.all(out1["tg_mean_p90"] > out1["tg_mean_p50"]) assert np.all(out1["tg_mean_p50"] > out1["tg_mean_p10"])
def test_calc_mean_std_min_max(self): ens = ensembles.create_ensemble(self.nc_datasets_simple) out1 = ensembles.ensemble_mean_std_max_min(ens) np.testing.assert_array_equal( ens["tg_mean"][:, 0, 5, 5].mean(dim="realization"), out1.tg_mean_mean[0, 5, 5], ) np.testing.assert_array_equal( ens["tg_mean"][:, 0, 5, 5].std(dim="realization"), out1.tg_mean_stdev[0, 5, 5], ) np.testing.assert_array_equal( ens["tg_mean"][:, 0, 5, 5].max(dim="realization"), out1.tg_mean_max[0, 5, 5]) np.testing.assert_array_equal( ens["tg_mean"][:, 0, 5, 5].min(dim="realization"), out1.tg_mean_min[0, 5, 5]) assert "Computation of statistics on" in out1.attrs["xclim_history"]
def test_calc_perc_nans(self): ens = ensembles.create_ensemble(self.nc_files_simple).load() ens.tg_mean[2, 0, 5, 5] = np.nan ens.tg_mean[2, 7, 5, 5] = np.nan out1 = ensembles.ensemble_percentiles(ens) np.testing.assert_array_equal( np.percentile(ens['tg_mean'][:, 0, 5, 5], 10), np.nan) np.testing.assert_array_equal( np.percentile(ens['tg_mean'][:, 7, 5, 5], 10), np.nan) np.testing.assert_array_equal( np.nanpercentile(ens['tg_mean'][:, 0, 5, 5], 10), out1['tg_mean_p10'][0, 5, 5]) np.testing.assert_array_equal( np.nanpercentile(ens['tg_mean'][:, 7, 5, 5], 10), out1['tg_mean_p10'][7, 5, 5]) assert np.all(out1['tg_mean_p90'] > out1['tg_mean_p50']) assert np.all(out1['tg_mean_p50'] > out1['tg_mean_p10'])
def test_calc_perc_nans(self): ens = ensembles.create_ensemble(self.nc_datasets_simple).load() ens.tg_mean[2, 0, 5, 5] = np.nan ens.tg_mean[2, 7, 5, 5] = np.nan out1 = ensembles.ensemble_percentiles(ens, split=True) np.testing.assert_array_equal( np.percentile(ens["tg_mean"][:, 0, 5, 5], 10), np.nan) np.testing.assert_array_equal( np.percentile(ens["tg_mean"][:, 7, 5, 5], 10), np.nan) np.testing.assert_array_equal( np.nanpercentile(ens["tg_mean"][:, 0, 5, 5], 10), out1["tg_mean_p10"][0, 5, 5], ) np.testing.assert_array_equal( np.nanpercentile(ens["tg_mean"][:, 7, 5, 5], 10), out1["tg_mean_p10"][7, 5, 5], ) assert np.all(out1["tg_mean_p90"] > out1["tg_mean_p50"]) assert np.all(out1["tg_mean_p50"] > out1["tg_mean_p10"])
def test_calc_perc(self, transpose): ens = ensembles.create_ensemble(self.nc_files_simple) if transpose: ens = ens.transpose() out1 = ensembles.ensemble_percentiles(ens) np.testing.assert_array_equal( np.percentile(ens["tg_mean"].isel(time=0, lon=5, lat=5), 10), out1["tg_mean_p10"].isel(time=0, lon=5, lat=5), ) np.testing.assert_array_equal( np.percentile(ens["tg_mean"].isel(time=0, lon=5, lat=5), 50), out1["tg_mean_p50"].isel(time=0, lon=5, lat=5), ) np.testing.assert_array_equal( np.percentile(ens["tg_mean"].isel(time=0, lon=5, lat=5), 90), out1["tg_mean_p90"].isel(time=0, lon=5, lat=5), ) assert np.all(out1["tg_mean_p90"] > out1["tg_mean_p50"]) assert np.all(out1["tg_mean_p50"] > out1["tg_mean_p10"]) out1 = ensembles.ensemble_percentiles(ens, values=(25, 75)) assert np.all(out1["tg_mean_p75"] > out1["tg_mean_p25"])
def test_create_unequal_times(self): ens = ensembles.create_ensemble(self.nc_files) assert len(ens.realization) == len(self.nc_files) assert ens.time.dt.year.min() == 1950 assert ens.time.dt.year.max() == 2100 ii = [i for i, s in enumerate(self.nc_files) if "1970-2050" in s] # assert padded with nans assert np.all( np.isnan( ens.tg_mean.isel(realization=ii).sel( time=ens.time.dt.year < 1970))) assert np.all( np.isnan( ens.tg_mean.isel(realization=ii).sel( time=ens.time.dt.year > 2050))) ens_mean = ens.tg_mean.mean(dim=["realization", "lon", "lat"], skipna=False) assert ens_mean.where(~np.isnan(ens_mean), drop=True).time.dt.year.min() == 1970 assert ens_mean.where(~np.isnan(ens_mean), drop=True).time.dt.year.max() == 2050
def test_create_ensemble(self): ens = ensembles.create_ensemble(self.nc_files_simple) assert len(ens.realization) == len(self.nc_files_simple)
ax.plot(times, mem, label=name, color=colors[name]) ax.legend() ax.set_xlabel('Computation time [s]') ax.set_ylabel('Memory usage [MiB]') ax.set_title('Memory usage of different percentile calculations') plt.show() else: if args.with_client: c = Client(n_workers=1, threads_per_worker=args.nthreads, memory_limit=args.max_mem) num_real = len(glob.glob(testfile.format(r='*', i=0))) ds = xcens.create_ensemble( [glob.glob(testfile.format(r=r, i='*')) for r in range(num_real)], mf_flag=True, combine='by_coords') print(f'Running rolling with exp: {args.exp}') ds_out = all_exps[args.exp](ds, [10, 50, 90]) print('Writing to file') r = ds_out.to_netcdf(outfile.format(args.exp), compute=False) r.compute() ds_out.close() if args.with_client: c.close()
def test_create_unequal_times(self): ens = ensembles.create_ensemble(self.nc_files) assert len(ens.realization) == len(self.nc_files) assert ens.time.dt.year.min() == 1970 assert ens.time.dt.year.max() == 2050