Esempio n. 1
0
    def test_create_ensemble(self):
        ens = ensembles.create_ensemble(self.nc_files_simple)
        assert len(ens.realization) == len(self.nc_files_simple)

        # create again using xr.Dataset objects
        ds_all = []
        for n in self.nc_files_simple:
            ds = xr.open_dataset(n, decode_times=False)
            ds["time"] = xr.decode_cf(ds).time
            ds_all.append(ds)

        ens1 = ensembles.create_ensemble(ds_all)
        coords = list(ens1.coords)
        coords.extend(list(ens1.data_vars))
        for c in coords:
            np.testing.assert_array_equal(ens[c], ens1[c])
Esempio n. 2
0
    def test_calc_perc(self, transpose):
        ens = ensembles.create_ensemble(self.nc_datasets_simple)
        if transpose:
            ens = ens.transpose()

        out1 = ensembles.ensemble_percentiles(ens, split=True)
        np.testing.assert_array_equal(
            np.percentile(ens["tg_mean"].isel(time=0, lon=5, lat=5), 10),
            out1["tg_mean_p10"].isel(time=0, lon=5, lat=5),
        )
        np.testing.assert_array_equal(
            np.percentile(ens["tg_mean"].isel(time=0, lon=5, lat=5), 50),
            out1["tg_mean_p50"].isel(time=0, lon=5, lat=5),
        )
        np.testing.assert_array_equal(
            np.percentile(ens["tg_mean"].isel(time=0, lon=5, lat=5), 90),
            out1["tg_mean_p90"].isel(time=0, lon=5, lat=5),
        )
        assert np.all(out1["tg_mean_p90"] > out1["tg_mean_p50"])
        assert np.all(out1["tg_mean_p50"] > out1["tg_mean_p10"])

        out2 = ensembles.ensemble_percentiles(ens, values=(25, 75))
        assert np.all(out2["tg_mean_p75"] > out2["tg_mean_p25"])
        assert "Computation of the percentiles on" in out1.attrs[
            "xclim_history"]

        out3 = ensembles.ensemble_percentiles(ens, split=False)
        xr.testing.assert_equal(out1["tg_mean_p10"],
                                out3.tg_mean.sel(percentiles=10, drop=True))
Esempio n. 3
0
def make_ensemble(files: List[Path],
                  percentiles: List[int],
                  average_dims: Optional[Tuple[str]] = None) -> None:
    ensemble = ensembles.create_ensemble(files)
    # make sure we have data starting in 1950
    ensemble = ensemble.sel(time=(ensemble.time.dt.year >= 1950))

    # If data is in day of year, percentiles won't make sense.
    # Convert to "days since" (base will be the time coordinate)
    for v in ensemble.data_vars:
        if ensemble[v].attrs.get('is_dayofyear', 0) == 1:
            ensemble[v] = doy_to_days_since(ensemble[v])

    if average_dims is not None:
        ensemble = ensemble.mean(dim=average_dims)

    ensemble_percentiles = ensembles.ensemble_percentiles(ensemble,
                                                          values=percentiles)

    # Doy data converted previously is converted back.
    for v in ensemble_percentiles.data_vars:
        if ensemble_percentiles[v].attrs.get('units',
                                             '').startswith('days after'):
            ensemble_percentiles[v] = days_since_to_doy(
                ensemble_percentiles[v])

    # Depending on the datasets, I've found that writing the netcdf could hang
    # if the dataset was not loaded explicitely previously... Not sure why.
    # The datasets should be pretty small when computing the ensembles, so this is
    # a best effort at working around what looks like a bug in either xclim or xarray.
    # The xarray documentation mentions: 'this method can be necessary when working
    # with many file objects on disk.'
    ensemble_percentiles.load()

    return ensemble_percentiles
Esempio n. 4
0
 def test_calc_perc_dask(self, keep_chunk_size):
     ens = ensembles.create_ensemble(self.nc_datasets_simple)
     out2 = ensembles.ensemble_percentiles(ens.chunk({"time": 2}),
                                           keep_chunk_size=keep_chunk_size,
                                           split=False)
     out1 = ensembles.ensemble_percentiles(ens.load(), split=False)
     np.testing.assert_array_equal(out1["tg_mean"], out2["tg_mean"])
Esempio n. 5
0
 def test_calc_perc_blocks(self):
     ens = ensembles.create_ensemble(self.nc_files_simple)
     out1 = ensembles.ensemble_percentiles(ens)
     out2 = ensembles.ensemble_percentiles(ens,
                                           values=(10, 50, 90),
                                           time_block=10)
     np.testing.assert_array_equal(out1["tg_mean_p10"], out2["tg_mean_p10"])
     np.testing.assert_array_equal(out1["tg_mean_p50"], out2["tg_mean_p50"])
     np.testing.assert_array_equal(out1["tg_mean_p90"], out2["tg_mean_p90"])
Esempio n. 6
0
 def test_calc_perc_dask(self, keep_chunk_size):
     ens = ensembles.create_ensemble(self.nc_files_simple)
     out2 = ensembles.ensemble_percentiles(
         ens.chunk({"time": 2}), values=(10, 50, 90), keep_chunk_size=keep_chunk_size
     )
     out1 = ensembles.ensemble_percentiles(ens.load())
     np.testing.assert_array_equal(out1["tg_mean_p10"], out2["tg_mean_p10"])
     np.testing.assert_array_equal(out1["tg_mean_p50"], out2["tg_mean_p50"])
     np.testing.assert_array_equal(out1["tg_mean_p90"], out2["tg_mean_p90"])
Esempio n. 7
0
    def test_create_unaligned_times(self, timegen, calkw):
        t1 = timegen("2000-01-01", periods=24, freq="M", **calkw)
        t2 = timegen("2000-01-01", periods=24, freq="MS", **calkw)

        d1 = xr.DataArray(
            np.arange(24), dims=("time",), coords={"time": t1}, name="tas"
        )
        d2 = xr.DataArray(
            np.arange(24), dims=("time",), coords={"time": t2}, name="tas"
        )

        if t1.dtype != "O":
            ens = ensembles.create_ensemble((d1, d2))
            assert ens.time.size == 48
            np.testing.assert_equal(ens.isel(time=0), [np.nan, 0])

        ens = ensembles.create_ensemble((d1, d2), resample_freq="MS")
        assert ens.time.size == 24
        np.testing.assert_equal(ens.isel(time=0), [0, 0])
Esempio n. 8
0
    def test_no_time(self):

        # create again using xr.Dataset objects
        ds_all = []
        for n in self.nc_files_simple:
            ds = xr.open_dataset(n, decode_times=False)
            ds["time"] = xr.decode_cf(ds).time
            ds_all.append(ds.groupby(ds.time.dt.month).mean("time", keep_attrs=True))

        ens = ensembles.create_ensemble(ds_all)
        assert len(ens.realization) == len(self.nc_files_simple)
Esempio n. 9
0
    def test_no_time(self, tmp_path):
        # create again using xr.Dataset objects
        f1 = Path(tmp_path / "notime")
        f1.mkdir()
        ds_all = []
        for n in self.nc_files:
            ds = open_dataset(os.path.join("EnsembleStats", n),
                              decode_times=False)
            ds["time"] = xr.decode_cf(ds).time
            ds_all.append(
                ds.groupby(ds.time.dt.month).mean("time", keep_attrs=True))
            ds.groupby(ds.time.dt.month).mean("time",
                                              keep_attrs=True).to_netcdf(
                                                  f1.joinpath(n))

        ens = ensembles.create_ensemble(ds_all)
        assert len(ens.realization) == len(self.nc_files)

        in_ncs = list(Path(f1).glob("*.nc"))
        ens = ensembles.create_ensemble(in_ncs)
        assert len(ens.realization) == len(self.nc_files)
Esempio n. 10
0
    def test_create_ensemble(self):
        ens = ensembles.create_ensemble(self.nc_datasets_simple)
        assert len(ens.realization) == len(self.nc_datasets_simple)
        assert len(ens.time) == 151

        # create again using xr.Dataset objects
        ds_all = []
        for n in self.nc_files:
            ds = open_dataset(os.path.join("EnsembleStats", n),
                              decode_times=False)
            ds["time"] = xr.decode_cf(ds).time
            ds_all.append(ds)

        ens1 = ensembles.create_ensemble(ds_all)
        coords = list(ens1.coords)
        coords.extend(list(ens1.data_vars))
        for c in coords:
            np.testing.assert_array_equal(ens[c], ens1[c])

        for i in np.arange(0, len(ens1.realization)):
            np.testing.assert_array_equal(
                ens1.isel(realization=i).tg_mean.values,
                ds_all[i].tg_mean.values)
Esempio n. 11
0
 def test_calc_mean_std_min_max(self):
     ens = ensembles.create_ensemble(self.nc_files_simple)
     out1 = ensembles.ensemble_mean_std_max_min(ens)
     np.testing.assert_array_equal(
         ens['tg_mean'][:, 0, 5, 5].mean(dim='realization'),
         out1.tg_mean_mean[0, 5, 5])
     np.testing.assert_array_equal(
         ens['tg_mean'][:, 0, 5, 5].std(dim='realization'),
         out1.tg_mean_stdev[0, 5, 5])
     np.testing.assert_array_equal(
         ens['tg_mean'][:, 0, 5, 5].max(dim='realization'),
         out1.tg_mean_max[0, 5, 5])
     np.testing.assert_array_equal(
         ens['tg_mean'][:, 0, 5, 5].min(dim='realization'),
         out1.tg_mean_min[0, 5, 5])
Esempio n. 12
0
 def test_calc_perc(self):
     ens = ensembles.create_ensemble(self.nc_files_simple)
     out1 = ensembles.ensemble_percentiles(ens)
     np.testing.assert_array_equal(
         np.percentile(ens["tg_mean"][:, 0, 5, 5], 10),
         out1["tg_mean_p10"][0, 5, 5])
     np.testing.assert_array_equal(
         np.percentile(ens["tg_mean"][:, 0, 5, 5], 50),
         out1["tg_mean_p50"][0, 5, 5])
     np.testing.assert_array_equal(
         np.percentile(ens["tg_mean"][:, 0, 5, 5], 90),
         out1["tg_mean_p90"][0, 5, 5])
     assert np.all(out1["tg_mean_p90"] > out1["tg_mean_p50"])
     assert np.all(out1["tg_mean_p50"] > out1["tg_mean_p10"])
     out1 = ensembles.ensemble_percentiles(ens, values=(25, 75))
     assert np.all(out1["tg_mean_p75"] > out1["tg_mean_p25"])
Esempio n. 13
0
    def test_calc_perc_nans(self):
        ens = ensembles.create_ensemble(self.nc_datasets_simple).load()

        ens.tg_mean[2, 0, 5, 5] = np.nan
        ens.tg_mean[2, 7, 5, 5] = np.nan
        out1 = ensembles.ensemble_percentiles(ens, split=True)
        masked_arr = np.ma.fix_invalid(ens["tg_mean"][:, 0, 5, 5])
        np.testing.assert_array_almost_equal(
            mquantiles(masked_arr, 0.10, alphap=1, betap=1),
            out1["tg_mean_p10"][0, 5, 5],
        )
        masked_arr = np.ma.fix_invalid(ens["tg_mean"][:, 7, 5, 5])
        np.testing.assert_array_almost_equal(
            mquantiles(masked_arr, 0.10, alphap=1, betap=1),
            out1["tg_mean_p10"][7, 5, 5],
        )
        assert np.all(out1["tg_mean_p90"] > out1["tg_mean_p50"])
        assert np.all(out1["tg_mean_p50"] > out1["tg_mean_p10"])
Esempio n. 14
0
 def test_calc_mean_std_min_max(self):
     ens = ensembles.create_ensemble(self.nc_datasets_simple)
     out1 = ensembles.ensemble_mean_std_max_min(ens)
     np.testing.assert_array_equal(
         ens["tg_mean"][:, 0, 5, 5].mean(dim="realization"),
         out1.tg_mean_mean[0, 5, 5],
     )
     np.testing.assert_array_equal(
         ens["tg_mean"][:, 0, 5, 5].std(dim="realization"),
         out1.tg_mean_stdev[0, 5, 5],
     )
     np.testing.assert_array_equal(
         ens["tg_mean"][:, 0, 5, 5].max(dim="realization"),
         out1.tg_mean_max[0, 5, 5])
     np.testing.assert_array_equal(
         ens["tg_mean"][:, 0, 5, 5].min(dim="realization"),
         out1.tg_mean_min[0, 5, 5])
     assert "Computation of statistics on" in out1.attrs["xclim_history"]
Esempio n. 15
0
    def test_calc_perc_nans(self):
        ens = ensembles.create_ensemble(self.nc_files_simple).load()

        ens.tg_mean[2, 0, 5, 5] = np.nan
        ens.tg_mean[2, 7, 5, 5] = np.nan
        out1 = ensembles.ensemble_percentiles(ens)
        np.testing.assert_array_equal(
            np.percentile(ens['tg_mean'][:, 0, 5, 5], 10), np.nan)
        np.testing.assert_array_equal(
            np.percentile(ens['tg_mean'][:, 7, 5, 5], 10), np.nan)
        np.testing.assert_array_equal(
            np.nanpercentile(ens['tg_mean'][:, 0, 5, 5], 10),
            out1['tg_mean_p10'][0, 5, 5])
        np.testing.assert_array_equal(
            np.nanpercentile(ens['tg_mean'][:, 7, 5, 5], 10),
            out1['tg_mean_p10'][7, 5, 5])
        assert np.all(out1['tg_mean_p90'] > out1['tg_mean_p50'])
        assert np.all(out1['tg_mean_p50'] > out1['tg_mean_p10'])
Esempio n. 16
0
    def test_calc_perc_nans(self):
        ens = ensembles.create_ensemble(self.nc_datasets_simple).load()

        ens.tg_mean[2, 0, 5, 5] = np.nan
        ens.tg_mean[2, 7, 5, 5] = np.nan
        out1 = ensembles.ensemble_percentiles(ens, split=True)
        np.testing.assert_array_equal(
            np.percentile(ens["tg_mean"][:, 0, 5, 5], 10), np.nan)
        np.testing.assert_array_equal(
            np.percentile(ens["tg_mean"][:, 7, 5, 5], 10), np.nan)
        np.testing.assert_array_equal(
            np.nanpercentile(ens["tg_mean"][:, 0, 5, 5], 10),
            out1["tg_mean_p10"][0, 5, 5],
        )
        np.testing.assert_array_equal(
            np.nanpercentile(ens["tg_mean"][:, 7, 5, 5], 10),
            out1["tg_mean_p10"][7, 5, 5],
        )
        assert np.all(out1["tg_mean_p90"] > out1["tg_mean_p50"])
        assert np.all(out1["tg_mean_p50"] > out1["tg_mean_p10"])
Esempio n. 17
0
 def test_calc_perc(self, transpose):
     ens = ensembles.create_ensemble(self.nc_files_simple)
     if transpose:
         ens = ens.transpose()
     out1 = ensembles.ensemble_percentiles(ens)
     np.testing.assert_array_equal(
         np.percentile(ens["tg_mean"].isel(time=0, lon=5, lat=5), 10),
         out1["tg_mean_p10"].isel(time=0, lon=5, lat=5),
     )
     np.testing.assert_array_equal(
         np.percentile(ens["tg_mean"].isel(time=0, lon=5, lat=5), 50),
         out1["tg_mean_p50"].isel(time=0, lon=5, lat=5),
     )
     np.testing.assert_array_equal(
         np.percentile(ens["tg_mean"].isel(time=0, lon=5, lat=5), 90),
         out1["tg_mean_p90"].isel(time=0, lon=5, lat=5),
     )
     assert np.all(out1["tg_mean_p90"] > out1["tg_mean_p50"])
     assert np.all(out1["tg_mean_p50"] > out1["tg_mean_p10"])
     out1 = ensembles.ensemble_percentiles(ens, values=(25, 75))
     assert np.all(out1["tg_mean_p75"] > out1["tg_mean_p25"])
Esempio n. 18
0
    def test_create_unequal_times(self):
        ens = ensembles.create_ensemble(self.nc_files)
        assert len(ens.realization) == len(self.nc_files)
        assert ens.time.dt.year.min() == 1950
        assert ens.time.dt.year.max() == 2100

        ii = [i for i, s in enumerate(self.nc_files) if "1970-2050" in s]
        # assert padded with nans
        assert np.all(
            np.isnan(
                ens.tg_mean.isel(realization=ii).sel(
                    time=ens.time.dt.year < 1970)))
        assert np.all(
            np.isnan(
                ens.tg_mean.isel(realization=ii).sel(
                    time=ens.time.dt.year > 2050)))

        ens_mean = ens.tg_mean.mean(dim=["realization", "lon", "lat"],
                                    skipna=False)
        assert ens_mean.where(~np.isnan(ens_mean),
                              drop=True).time.dt.year.min() == 1970
        assert ens_mean.where(~np.isnan(ens_mean),
                              drop=True).time.dt.year.max() == 2050
Esempio n. 19
0
 def test_create_ensemble(self):
     ens = ensembles.create_ensemble(self.nc_files_simple)
     assert len(ens.realization) == len(self.nc_files_simple)
Esempio n. 20
0
            ax.plot(times, mem, label=name, color=colors[name])
        ax.legend()
        ax.set_xlabel('Computation time [s]')
        ax.set_ylabel('Memory usage [MiB]')
        ax.set_title('Memory usage of different percentile calculations')
        plt.show()

    else:
        if args.with_client:
            c = Client(n_workers=1,
                       threads_per_worker=args.nthreads,
                       memory_limit=args.max_mem)

        num_real = len(glob.glob(testfile.format(r='*', i=0)))
        ds = xcens.create_ensemble(
            [glob.glob(testfile.format(r=r, i='*')) for r in range(num_real)],
            mf_flag=True,
            combine='by_coords')

        print(f'Running rolling with exp: {args.exp}')
        ds_out = all_exps[args.exp](ds, [10, 50, 90])

        print('Writing to file')

        r = ds_out.to_netcdf(outfile.format(args.exp), compute=False)
        r.compute()
        ds_out.close()

        if args.with_client:
            c.close()
Esempio n. 21
0
 def test_create_unequal_times(self):
     ens = ensembles.create_ensemble(self.nc_files)
     assert len(ens.realization) == len(self.nc_files)
     assert ens.time.dt.year.min() == 1970
     assert ens.time.dt.year.max() == 2050