def test_custom_metric_passed_to_bootstrap_compute(PM_da_initialized_1d, PM_da_control_1d): """Test custom metric in bootstrap_perfect_model.""" comparison = 'e2c' dim = 'init' np.random.seed(42) actual = bootstrap_perfect_model( PM_da_initialized_1d, PM_da_control_1d, comparison=comparison, metric=my_mse, iterations=ITERATIONS, dim=dim, ) expected = bootstrap_perfect_model( PM_da_initialized_1d, PM_da_control_1d, comparison=comparison, metric='mse', iterations=ITERATIONS, dim=dim, ) assert_allclose(actual, expected, rtol=0.1, atol=1)
def test_bootstrap_pm_assign_attrs(): """Test assigning attrs for bootstrap_perfect_model.""" v = 'tos' metric = 'pearson_r' comparison = 'm2e' ITERATIONS = 3 sig = 95 da = load_dataset('MPI-PM-DP-1D')[v].isel(area=1, period=-1) control = load_dataset('MPI-control-1D')[v].isel(area=1, period=-1) actual = bootstrap_perfect_model( da, control, metric=metric, comparison=comparison, iterations=ITERATIONS, sig=sig, ).attrs assert actual['metric'] == metric assert actual['comparison'] == comparison assert actual['bootstrap_iterations'] == ITERATIONS assert str(round((1 - sig / 100) / 2, 3)) in actual['confidence_interval_levels'] if metric == 'pearson_r': assert actual['units'] == 'None' assert 'bootstrap' in actual['skill_calculated_by_function']
def test_bootstrap_pm_assign_attrs(): """Test assigning attrs for bootstrap_perfect_model.""" v = "tos" metric = "pearson_r" comparison = "m2e" ITERATIONS = 3 sig = 95 da = load_dataset("MPI-PM-DP-1D")[v].isel(area=1, period=-1) control = load_dataset("MPI-control-1D")[v].isel(area=1, period=-1) actual = bootstrap_perfect_model( da, control, metric=metric, comparison=comparison, iterations=ITERATIONS, sig=sig, ).attrs assert actual["metric"] == metric assert actual["comparison"] == comparison assert actual["bootstrap_iterations"] == ITERATIONS assert str(round((1 - sig / 100) / 2, 3)) in actual["confidence_interval_levels"] if metric == "pearson_r": assert actual["units"] == "None" assert "bootstrap" in actual["skill_calculated_by_function"]
def test_bootstrap_perfect_model_da_not_nan(PM_da_ds, PM_da_control, metric, comparison): actual = bootstrap_perfect_model(PM_da_ds, PM_da_control, metric=metric, comparison=comparison, sig=50, bootstrap=2).isnull().any() assert actual == False
def test_pvalue_from_bootstrapping(pm_da_ds1d, pm_da_control1d, metric): """Test that pvalue of initialized ensemble first lead is close to 0.""" sig = 95 actual = (bootstrap_perfect_model( pm_da_ds1d, pm_da_control1d, metric=metric, bootstrap=20, comparison='e2c', sig=sig, ).sel(kind='uninit', results='p').isel(lead=0)) assert actual < 2 * (1 - sig / 100)
def peakmem_bootstrap_perfect_model(self, metric, comparison): """Take memory peak for `bootstrap_perfect_model`.""" dim = "member" if metric in PROBABILISTIC_METRICS else None ensure_loaded( bootstrap_perfect_model( self.ds, self.control, metric=metric, comparison=comparison, iterations=self.iterations, dim=dim, ))
def Sef2018_Fig1_Different_PH_Definitions(ds, control, unit='PgC/yr', sig=95, bootstrap=1000): # from esmtools.prediction import predictability_horizon from PMMPIESM.plot import _set_integer_xaxis rsig = (100 - sig)/100 _control = control _ds = ds ss = compute_perfect_model( _ds, _control, metric='rmse', comparison='m2e') ss['lead'] = np.arange(1, ss.lead.size + 1) # ss.name = 'every' ss_boot = bootstrap_perfect_model(_ds, _control, metric='rmse', comparison='m2e', sig=sig, bootstrap=bootstrap) ss_p = ss_boot.sel(kind='uninit', results='p') ss_ci_high = ss_boot.sel(kind='uninit', results='low_ci') ph_Spring_2019 = predictability_horizon( ss.where(ss_p < rsig)).values b_m2e, ph_Sef_2018, c_m2e = fit_ph_int(ss.to_series()) print('ph_Sef_2018', ph_Sef_2018) print('ph_Spring_2019', int(ph_Spring_2019)) fig, ax = plt.subplots(figsize=(10, 4)) std = _control.std('time').values every_color = 'mediumorchid' ss.name = 'skill' ss.to_dataframe().plot(ax=ax, label='skill', color='k', marker='o') t_fit = np.arange(0, _ds.lead.size) ax.plot(t_fit[1:], func(t_fit, b_m2e, ph_Sef_2018, c_m2e)[1:], linewidth=3, color=every_color, label='Sef 2018 breakpoint fit') ax.axvline(x=ph_Sef_2018, linestyle='-.', color=every_color, label='PH Sef 2018') ax.axhline(y=std, ls='--', c='k', alpha=.3, label='std control') ax.axhline(y=ss_ci_high.mean('lead'), ls=':', c='royalblue', label='Bootstrapped high CI') ax.axvline(x=ph_Spring_2019, ls='-.', c='royalblue', label='PH Spring 2019') ax.set_xlabel('Lead Time [time]') ax.set_ylabel('RMSE [' + unit + ']') ax.set_ylim([0, ss.max() * 1.1]) ax.set_xlim([0, 10]) _set_integer_xaxis(ax) ax.legend(frameon=False, ncol=2) ax.set_xticks(range(1, 11)) ax.set_title( ' Global oceanic CO$_2$ flux: Differences in definitions of Predictability Horizon') if savefig: plt.tight_layout() plt.savefig('FigureSI_Differences_PH_definition')
def test_mpi_he_plot_bootstrapped_skill_over_leadyear_ds( PM_ds_initialized_1d, PM_ds_control_1d): """ Checks plots from bootstrap_perfect_model works for xr.Dataset with one variable. """ res = bootstrap_perfect_model( PM_ds_initialized_1d, PM_ds_control_1d, metric="pearson_r", iterations=ITERATIONS, ) res_ax = plot_bootstrapped_skill_over_leadyear(res) assert res_ax is not None
def test_mpi_pm_plot_bootstrapped_skill_over_leadyear_da( PM_da_initialized_1d, PM_da_control_1d): """ Checks plots from bootstrap_perfect_model works for xr.DataArray. """ res = bootstrap_perfect_model( PM_da_initialized_1d, PM_da_control_1d, metric='pearson_r', iterations=ITERATIONS, ) res_ax = plot_bootstrapped_skill_over_leadyear(res) assert res_ax is not None
def test_pvalue_from_bootstrapping(PM_da_initialized_1d, PM_da_control_1d, metric): """Test that pvalue of initialized ensemble first lead is close to 0.""" sig = 95 actual = (bootstrap_perfect_model( PM_da_initialized_1d, PM_da_control_1d, metric=metric, iterations=ITERATIONS, comparison="e2c", sig=sig, dim="init", ).sel(skill="uninitialized", results="p").isel(lead=0)) assert actual.values < 2 * (1 - sig / 100)
def test_bootstrap_PM_keep_lead_attrs( PM_da_initialized_3d, PM_da_control_3d, ): """Test bootstrap_perfect_model works lazily.""" PM_da_initialized_3d.lead.attrs['units'] = 'years' s = bootstrap_perfect_model( PM_da_initialized_3d, PM_da_control_3d, iterations=ITERATIONS, comparison='m2c', metric='mse', ) assert 'units' in s.lead.attrs assert s.lead.attrs['units'] == PM_da_initialized_3d.lead.attrs['units']
def test_bootstrap_perfect_model_da3d_not_nan(PM_da_ds3d, PM_da_control3d, metric, comparison): """ Checks that there are no NaNs on bootstrap init skill or uninit p of 3D da. """ actual = bootstrap_perfect_model(PM_da_ds3d, PM_da_control3d, metric=metric, comparison=comparison, sig=50, bootstrap=2) actual_init_skill = actual.sel(i='init', results='skill').isnull().any() assert not actual_init_skill actual_uninit_p = actual.sel(i='uninit', results='p').isnull().any() assert not actual_uninit_p
def test_mpi_he_plot_bootstrapped_skill_over_leadyear_single_uninit_lead( PM_da_initialized_1d, PM_da_control_1d): """ Checks plots from bootstrap_perfect_model works for xr.DataArray. """ res = bootstrap_perfect_model( PM_da_initialized_1d, PM_da_control_1d, metric="pearson_r", iterations=ITERATIONS, ) # set all but first uninit lead to nan res[:, 2, 1:] = [np.nan] * (res.lead.size - 1) res_ax = plot_bootstrapped_skill_over_leadyear(res) assert res_ax is not None
def test_bootstrap_perfect_model_da1d_not_nan(PM_da_initialized_1d, PM_da_control_1d): """ Checks that there are no NaNs on bootstrap perfect_model of 1D da. """ actual = bootstrap_perfect_model( PM_da_initialized_1d, PM_da_control_1d, metric='rmse', comparison='e2c', sig=50, iterations=ITERATIONS, ) actual_init_skill = actual.sel(kind='init', results='skill').isnull().any() assert not actual_init_skill actual_uninit_p = actual.sel(kind='uninit', results='p').isnull().any() assert not actual_uninit_p
def test_bootstrap_perfect_model_keeps_lead_units(PM_da_initialized_1d, PM_da_control_1d): """Test that lead units is kept in compute.""" sig = 95 units = "years" PM_da_initialized_1d.lead.attrs["units"] = "years" actual = bootstrap_perfect_model( PM_da_initialized_1d, PM_da_control_1d, metric="mse", iterations=ITERATIONS, comparison="e2c", sig=sig, dim="init", ) assert actual.lead.attrs["units"] == units
def test_bootstrap_perfect_model_da1d_not_nan(pm_da_ds1d, pm_da_control1d): """ Checks that there are no NaNs on bootstrap perfect_model of 1D da. """ actual = bootstrap_perfect_model( pm_da_ds1d, pm_da_control1d, metric='rmse', comparison='e2c', sig=50, bootstrap=2, ) actual_init_skill = actual.sel(kind='init', results='skill').isnull().any() assert not actual_init_skill actual_uninit_p = actual.sel(kind='uninit', results='p').isnull().any() assert not actual_uninit_p
def test_bootstrap_perfect_model_keeps_lead_units( PM_da_initialized_1d, PM_da_control_1d ): """Test that lead units is kept in compute.""" sig = 95 units = 'years' PM_da_initialized_1d.lead.attrs['units'] = 'years' actual = bootstrap_perfect_model( PM_da_initialized_1d, PM_da_control_1d, metric='mse', iterations=ITERATIONS, comparison='e2c', sig=sig, dim='init', ) assert actual.lead.attrs['units'] == units
def test_mpi_pm_plot_bootstrapped_skill_over_leadyear(): """ Checks plots from bootstrap_perfect_model works. """ da = load_dataset('MPI-PM-DP-1D').isel(area=1, period=-1) PM_da_ds1d = da['tos'] da = load_dataset('MPI-control-1D').isel(area=1, period=-1) PM_da_control1d = da['tos'] # sig = 95 bootstrap = 5 res = bootstrap_perfect_model( PM_da_ds1d, PM_da_control1d, metric='pearson_r', bootstrap=bootstrap ) res_ax = plot_bootstrapped_skill_over_leadyear(res, 95) assert res_ax is not None
def test_bootstrap_perfect_model_da1d_not_nan(PM_da_initialized_1d, PM_da_control_1d): """ Checks that there are no NaNs on bootstrap perfect_model of 1D da. """ actual = bootstrap_perfect_model( PM_da_initialized_1d, PM_da_control_1d, metric="rmse", comparison="e2c", dim="init", sig=50, iterations=ITERATIONS, ) actual_init_skill = actual.sel(kind="initialized", results="skill").isnull().any() assert not actual_init_skill actual_uninit_p = actual.sel(kind="uninitialized", results="p").isnull().any() assert not actual_uninit_p
def test_bootstrap_PM_lazy_results( PM_da_initialized_3d, PM_da_control_3d, chunk, comparison ): """Test bootstrap_perfect_model works lazily.""" if chunk: PM_da_initialized_3d = PM_da_initialized_3d.chunk({'lead': 2}).persist() PM_da_control_3d = PM_da_control_3d.chunk({'time': -1}).persist() else: PM_da_initialized_3d = PM_da_initialized_3d.compute() PM_da_control_3d = PM_da_control_3d.compute() s = bootstrap_perfect_model( PM_da_initialized_3d, PM_da_control_3d, iterations=ITERATIONS, comparison=comparison, metric='mse', ) assert dask.is_dask_collection(s) == chunk
def test_bootstrap_pm_dim(pm_da_ds1d, pm_da_control1d): """Test whether bootstrap_hindcast calcs skill over member dim and returns init dim.""" actual = bootstrap_perfect_model( pm_da_ds1d, pm_da_control1d, metric='rmse', dim='member', comparison='m2c', bootstrap=3, ) assert 'init' in actual.dims for kind in ['init', 'uninit']: actualk = actual.sel(kind=kind, results='skill') if 'init' in actualk.coords: actualk = actualk.mean('init') actualk = actualk.isnull().any() assert not actualk
def test_bootstrap_pm_dim(PM_da_initialized_1d, PM_da_control_1d): """Test whether bootstrap_hindcast calcs skill over member dim and returns init dim.""" actual = bootstrap_perfect_model( PM_da_initialized_1d, PM_da_control_1d, metric='rmse', dim='member', comparison='m2c', iterations=ITERATIONS, resample_dim='member', ) assert 'init' in actual.dims for kind in ['init', 'uninit']: actualk = actual.sel(kind=kind, results='skill') if 'init' in actualk.coords: actualk = actualk.mean('init') actualk = actualk.isnull().any() assert not actualk
def test_bootstrap_perfect_model_ds1d_not_nan(PM_ds_ds1d, PM_ds_control1d, metric, comparison): """ Checks that there are no NaNs on bootstrap init skill or uninit p of 1D ds time series. """ actual = bootstrap_perfect_model(PM_ds_ds1d, PM_ds_control1d, metric=metric, comparison=comparison, sig=50, bootstrap=2) for var in actual.data_vars: actual_init_skill = actual[var].sel(i='init', results='skill').isnull().any() assert not actual_init_skill for var in actual.data_vars: actual_uninit_p = actual[var].sel(i='uninit', results='p').isnull().any() assert not actual_uninit_p
def test_bootstrap_perfect_model_da1d_not_nan_probabilistic( PM_da_initialized_1d, PM_da_control_1d, metric, comparison): """ Checks that there are no NaNs on perfect model probabilistic metrics of 1D time series. """ kwargs = { "comparison": comparison, "metric": metric, } category_edges = np.array([0, 0.5, 1]) if metric in probabilistic_metrics_requiring_logical: def f(x): return x > 0.5 kwargs["logical"] = f elif metric == "threshold_brier_score": kwargs["threshold"] = 0.5 elif metric == "contingency": kwargs["forecast_category_edges"] = category_edges kwargs["observation_category_edges"] = category_edges kwargs["score"] = "accuracy" elif metric == "rps": kwargs["category_edges"] = category_edges dim = (["member", "init"] if metric in probabilistic_metrics_requiring_more_than_member_dim else "member") kwargs["dim"] = dim assert (not compute_perfect_model(PM_da_initialized_1d, PM_da_control_1d, **kwargs).isnull().all()) kwargs["iterations"] = ITERATIONS kwargs["resample_dim"] = "member" actual = bootstrap_perfect_model(PM_da_initialized_1d, PM_da_control_1d, **kwargs) for kind in ["initialized", "uninitialized"]: actualk = actual.sel(kind=kind, results="skill") actualk = actualk.isnull().all() assert not actualk
def test_bootstrap_perfect_model_ds1d_not_nan(PM_ds_initialized_1d, PM_ds_control_1d): """ Checks that there are no NaNs on bootstrap perfect_model of 1D ds. """ actual = bootstrap_perfect_model( PM_ds_initialized_1d, PM_ds_control_1d, metric="rmse", comparison="e2c", dim="init", sig=50, iterations=ITERATIONS, ) for var in actual.data_vars: actual_init_skill = (actual[var].sel( skill="initialized", results="verify skill").isnull().any()) assert not actual_init_skill for var in actual.data_vars: actual_uninit_p = (actual[var].sel(skill="uninitialized", results="p").isnull().any()) assert not actual_uninit_p
def test_bootstrap_perfect_model_da1d_not_nan_probabilistic( PM_da_initialized_1d, PM_da_control_1d, metric, comparison ): """ Checks that there are no NaNs on perfect model probabilistic metrics of 1D time series. """ if 'threshold' in metric: threshold = 10.5 else: threshold = None if metric == 'brier_score': def func(x): return x > 0 else: func = None actual = bootstrap_perfect_model( PM_da_initialized_1d, PM_da_control_1d, comparison=comparison, metric=metric, threshold=threshold, gaussian=True, func=func, iterations=ITERATIONS, dim='member', resample_dim='member', ) for kind in ['init', 'uninit']: actualk = actual.sel(kind=kind, results='skill') if 'init' in actualk.coords: actualk = actualk.mean('init') actualk = actualk.isnull().any() assert not actualk
s3d = compute_perfect_model(ds3d, control3d, metric=metric) # s3d.plot(col='lead') s3dapn = compute_perfect_model( ds3d.sel(init=anom_pos_nino34_init.values), control3d, metric=metric) s3daneun = compute_perfect_model( ds3d.sel(init=anom_neu_nino34_init), control3d, metric=metric) s3dann = compute_perfect_model( ds3d.sel(init=anom_neg_nino34_init.values), control3d, metric=metric) sa3d = xr.concat([s3d, s3dapn, s3daneun, s3dann], 'IC') sa3d['IC'] = ['all', 'pos', 'neutral', 'neg'] warnings.simplefilter("ignore") b = 100 s3d = bootstrap_perfect_model(ds3d, control3d, metric=metric, bootstrap=b) s3dapn = bootstrap_perfect_model( ds3d.sel(init=anom_pos_nino34_init.values), control3d, metric=metric, bootstrap=b) s3daneun = bootstrap_perfect_model( ds3d.sel(init=anom_neu_nino34_init), control3d, metric=metric, bootstrap=b) s3dann = bootstrap_perfect_model( ds3d.sel(init=anom_neg_nino34_init.values), control3d, metric=metric, bootstrap=b) sa3d.sel(kind='uninit', results='p').plot(col='lead', row='IC', vmax=.2) sa3d = xr.concat([s3d, s3dapn, s3daneun, s3dann], 'IC') sa3d['IC'] = ['all', 'pos', 'neutral', 'neg'] skill = sa3d.sel(kind='init', results='skill').where( sa3d.sel(results='p', kind='uninit') <= .05) #sa3d.plot(col='lead', row='IC', robust=True) #sa3d.name = 'CO$_2$ ACC'
metric = 'pearson_r' v = 'co2_flux' v='CO2' metric = 'rmse' for metric in ['pearson_r', 'rmse']: for v in ['co2_flux', 'CO2']: varstring = copy(v) print(v, varstring) ds = xr.open_dataset(data_path + 'ds_' + v + '_ym.nc').load() control = xr.open_dataset(data_path + 'control_' + v + '_ym.nc').load() ds, control = comply_climpred(ds, control) # faster ds = ds.isel(lead=slice(None, 6)).isel(member=slice(1,None)) bs = bootstrap_perfect_model( ds, control, metric=metric, comparison=comparison, bootstrap=bootstrap, sig=sig) if save_nc: bs.to_netcdf('_'.join(['results', varstring, 'ym', 'metric', metric, 'comparison', comparison, 'sig', str(sig), 'bootstrap', str(bootstrap)]) + '.nc') skill = bs.sel(results='skill', kind='init').where( bs.sel(results='p', kind='uninit') <= psig) skill[v].plot(col='lead',col_wrap=4,yincrease=False) # plots bootstrap = 1000 comparison = 'm2e' psig = .05 # prototyping metric = 'rmse'
comparison = 'm2e' sig = 95 psig = (100 - sig) / 100 bootstrap = 5000 control = xr.open_dataset(post_global + 'control_diagnosed_co2.nc') ds = xr.open_dataset(post_global + 'ds_diagnosed_co2.nc') # rename dims to climpred requirements if 'ensemble' in ds.dims: ds = ds.rename({'ensemble': 'init', 'time': 'lead'}) p = 'data/results/' compute = False if compute: bs_acc = bootstrap_perfect_model( ds, control, metric='pearson_r', comparison=comparison, bootstrap=bootstrap, sig=sig) if save_nc: bs_acc.to_netcdf(p+'_'.join(['results', 'global', 'ym', 'metric', 'pearson_r', 'comparison', comparison, 'sig', str(sig), 'bootstrap', str(bootstrap)]) + '.nc') bs_rmse = bootstrap_perfect_model( ds, control, metric='rmse', comparison=comparison, bootstrap=bootstrap, sig=sig) if save_nc: bs_rmse.to_netcdf(p+'_'.join(['results', 'global', 'ym', 'metric', 'rmse', 'comparison', comparison, 'sig', str(sig), 'bootstrap', str(bootstrap)]) + '.nc') else: bs_acc = xr.open_dataset(p+'_'.join(['results', 'global', 'ym', 'metric', 'pearson_r', 'comparison', comparison, 'sig', str(sig), 'bootstrap', str(bootstrap)]) + '.nc') bs_rmse = xr.open_dataset(p+'_'.join(['results', 'global', 'ym', 'metric', 'rmse', 'comparison', comparison, 'sig', str(sig), 'bootstrap', str(bootstrap)]) + '.nc')