Exemple #1
0
def test_custom_metric_passed_to_bootstrap_compute(PM_da_initialized_1d,
                                                   PM_da_control_1d):
    """Test custom metric in bootstrap_perfect_model."""
    comparison = 'e2c'
    dim = 'init'
    np.random.seed(42)
    actual = bootstrap_perfect_model(
        PM_da_initialized_1d,
        PM_da_control_1d,
        comparison=comparison,
        metric=my_mse,
        iterations=ITERATIONS,
        dim=dim,
    )

    expected = bootstrap_perfect_model(
        PM_da_initialized_1d,
        PM_da_control_1d,
        comparison=comparison,
        metric='mse',
        iterations=ITERATIONS,
        dim=dim,
    )

    assert_allclose(actual, expected, rtol=0.1, atol=1)
Exemple #2
0
def test_bootstrap_pm_assign_attrs():
    """Test assigning attrs for bootstrap_perfect_model."""
    v = 'tos'
    metric = 'pearson_r'
    comparison = 'm2e'
    ITERATIONS = 3
    sig = 95
    da = load_dataset('MPI-PM-DP-1D')[v].isel(area=1, period=-1)
    control = load_dataset('MPI-control-1D')[v].isel(area=1, period=-1)
    actual = bootstrap_perfect_model(
        da,
        control,
        metric=metric,
        comparison=comparison,
        iterations=ITERATIONS,
        sig=sig,
    ).attrs
    assert actual['metric'] == metric
    assert actual['comparison'] == comparison
    assert actual['bootstrap_iterations'] == ITERATIONS
    assert str(round((1 - sig / 100) / 2,
                     3)) in actual['confidence_interval_levels']
    if metric == 'pearson_r':
        assert actual['units'] == 'None'
    assert 'bootstrap' in actual['skill_calculated_by_function']
Exemple #3
0
def test_bootstrap_pm_assign_attrs():
    """Test assigning attrs for bootstrap_perfect_model."""
    v = "tos"
    metric = "pearson_r"
    comparison = "m2e"
    ITERATIONS = 3
    sig = 95
    da = load_dataset("MPI-PM-DP-1D")[v].isel(area=1, period=-1)
    control = load_dataset("MPI-control-1D")[v].isel(area=1, period=-1)
    actual = bootstrap_perfect_model(
        da,
        control,
        metric=metric,
        comparison=comparison,
        iterations=ITERATIONS,
        sig=sig,
    ).attrs
    assert actual["metric"] == metric
    assert actual["comparison"] == comparison
    assert actual["bootstrap_iterations"] == ITERATIONS
    assert str(round((1 - sig / 100) / 2,
                     3)) in actual["confidence_interval_levels"]
    if metric == "pearson_r":
        assert actual["units"] == "None"
    assert "bootstrap" in actual["skill_calculated_by_function"]
Exemple #4
0
def test_bootstrap_perfect_model_da_not_nan(PM_da_ds, PM_da_control, metric,
                                            comparison):
    actual = bootstrap_perfect_model(PM_da_ds,
                                     PM_da_control,
                                     metric=metric,
                                     comparison=comparison,
                                     sig=50,
                                     bootstrap=2).isnull().any()
    assert actual == False
def test_pvalue_from_bootstrapping(pm_da_ds1d, pm_da_control1d, metric):
    """Test that pvalue of initialized ensemble first lead is close to 0."""
    sig = 95
    actual = (bootstrap_perfect_model(
        pm_da_ds1d,
        pm_da_control1d,
        metric=metric,
        bootstrap=20,
        comparison='e2c',
        sig=sig,
    ).sel(kind='uninit', results='p').isel(lead=0))
    assert actual < 2 * (1 - sig / 100)
Exemple #6
0
 def peakmem_bootstrap_perfect_model(self, metric, comparison):
     """Take memory peak for `bootstrap_perfect_model`."""
     dim = "member" if metric in PROBABILISTIC_METRICS else None
     ensure_loaded(
         bootstrap_perfect_model(
             self.ds,
             self.control,
             metric=metric,
             comparison=comparison,
             iterations=self.iterations,
             dim=dim,
         ))
Exemple #7
0
def Sef2018_Fig1_Different_PH_Definitions(ds, control, unit='PgC/yr', sig=95, bootstrap=1000):
    # from esmtools.prediction import predictability_horizon
    from PMMPIESM.plot import _set_integer_xaxis
    rsig = (100 - sig)/100
    _control = control
    _ds = ds
    ss = compute_perfect_model(
        _ds, _control, metric='rmse', comparison='m2e')
    ss['lead'] = np.arange(1, ss.lead.size + 1)
    # ss.name = 'every'
    ss_boot = bootstrap_perfect_model(_ds, _control, metric='rmse',
                                      comparison='m2e', sig=sig, bootstrap=bootstrap)
    ss_p = ss_boot.sel(kind='uninit', results='p')
    ss_ci_high = ss_boot.sel(kind='uninit', results='low_ci')

    ph_Spring_2019 = predictability_horizon(
        ss.where(ss_p < rsig)).values

    b_m2e, ph_Sef_2018, c_m2e = fit_ph_int(ss.to_series())
    print('ph_Sef_2018', ph_Sef_2018)
    print('ph_Spring_2019', int(ph_Spring_2019))

    fig, ax = plt.subplots(figsize=(10, 4))
    std = _control.std('time').values

    every_color = 'mediumorchid'
    ss.name = 'skill'
    ss.to_dataframe().plot(ax=ax, label='skill', color='k', marker='o')

    t_fit = np.arange(0, _ds.lead.size)
    ax.plot(t_fit[1:], func(t_fit, b_m2e, ph_Sef_2018, c_m2e)[1:],
            linewidth=3, color=every_color, label='Sef 2018 breakpoint fit')
    ax.axvline(x=ph_Sef_2018, linestyle='-.',
               color=every_color, label='PH Sef 2018')
    ax.axhline(y=std, ls='--', c='k', alpha=.3, label='std control')
    ax.axhline(y=ss_ci_high.mean('lead'), ls=':',
               c='royalblue', label='Bootstrapped high CI')

    ax.axvline(x=ph_Spring_2019, ls='-.', c='royalblue',
               label='PH Spring 2019')
    ax.set_xlabel('Lead Time [time]')
    ax.set_ylabel('RMSE [' + unit + ']')
    ax.set_ylim([0, ss.max() * 1.1])
    ax.set_xlim([0, 10])
    _set_integer_xaxis(ax)
    ax.legend(frameon=False, ncol=2)
    ax.set_xticks(range(1, 11))
    ax.set_title(
        ' Global oceanic CO$_2$ flux: Differences in definitions of Predictability Horizon')
    if savefig:
        plt.tight_layout()
        plt.savefig('FigureSI_Differences_PH_definition')
def test_mpi_he_plot_bootstrapped_skill_over_leadyear_ds(
        PM_ds_initialized_1d, PM_ds_control_1d):
    """
    Checks plots from bootstrap_perfect_model works for xr.Dataset with one variable.
    """
    res = bootstrap_perfect_model(
        PM_ds_initialized_1d,
        PM_ds_control_1d,
        metric="pearson_r",
        iterations=ITERATIONS,
    )
    res_ax = plot_bootstrapped_skill_over_leadyear(res)
    assert res_ax is not None
Exemple #9
0
def test_mpi_pm_plot_bootstrapped_skill_over_leadyear_da(
        PM_da_initialized_1d, PM_da_control_1d):
    """
    Checks plots from bootstrap_perfect_model works for xr.DataArray.
    """
    res = bootstrap_perfect_model(
        PM_da_initialized_1d,
        PM_da_control_1d,
        metric='pearson_r',
        iterations=ITERATIONS,
    )
    res_ax = plot_bootstrapped_skill_over_leadyear(res)
    assert res_ax is not None
def test_pvalue_from_bootstrapping(PM_da_initialized_1d, PM_da_control_1d,
                                   metric):
    """Test that pvalue of initialized ensemble first lead is close to 0."""
    sig = 95
    actual = (bootstrap_perfect_model(
        PM_da_initialized_1d,
        PM_da_control_1d,
        metric=metric,
        iterations=ITERATIONS,
        comparison="e2c",
        sig=sig,
        dim="init",
    ).sel(skill="uninitialized", results="p").isel(lead=0))
    assert actual.values < 2 * (1 - sig / 100)
Exemple #11
0
def test_bootstrap_PM_keep_lead_attrs(
    PM_da_initialized_3d, PM_da_control_3d,
):
    """Test bootstrap_perfect_model works lazily."""
    PM_da_initialized_3d.lead.attrs['units'] = 'years'
    s = bootstrap_perfect_model(
        PM_da_initialized_3d,
        PM_da_control_3d,
        iterations=ITERATIONS,
        comparison='m2c',
        metric='mse',
    )
    assert 'units' in s.lead.attrs
    assert s.lead.attrs['units'] == PM_da_initialized_3d.lead.attrs['units']
def test_bootstrap_perfect_model_da3d_not_nan(PM_da_ds3d, PM_da_control3d,
                                              metric, comparison):
    """
    Checks that there are no NaNs on bootstrap init skill or uninit p of 3D da.
    """
    actual = bootstrap_perfect_model(PM_da_ds3d,
                                     PM_da_control3d,
                                     metric=metric,
                                     comparison=comparison,
                                     sig=50,
                                     bootstrap=2)
    actual_init_skill = actual.sel(i='init', results='skill').isnull().any()
    assert not actual_init_skill
    actual_uninit_p = actual.sel(i='uninit', results='p').isnull().any()
    assert not actual_uninit_p
def test_mpi_he_plot_bootstrapped_skill_over_leadyear_single_uninit_lead(
        PM_da_initialized_1d, PM_da_control_1d):
    """
    Checks plots from bootstrap_perfect_model works for xr.DataArray.
    """
    res = bootstrap_perfect_model(
        PM_da_initialized_1d,
        PM_da_control_1d,
        metric="pearson_r",
        iterations=ITERATIONS,
    )
    # set all but first uninit lead to nan
    res[:, 2, 1:] = [np.nan] * (res.lead.size - 1)
    res_ax = plot_bootstrapped_skill_over_leadyear(res)
    assert res_ax is not None
Exemple #14
0
def test_bootstrap_perfect_model_da1d_not_nan(PM_da_initialized_1d, PM_da_control_1d):
    """
    Checks that there are no NaNs on bootstrap perfect_model of 1D da.
    """
    actual = bootstrap_perfect_model(
        PM_da_initialized_1d,
        PM_da_control_1d,
        metric='rmse',
        comparison='e2c',
        sig=50,
        iterations=ITERATIONS,
    )
    actual_init_skill = actual.sel(kind='init', results='skill').isnull().any()
    assert not actual_init_skill
    actual_uninit_p = actual.sel(kind='uninit', results='p').isnull().any()
    assert not actual_uninit_p
def test_bootstrap_perfect_model_keeps_lead_units(PM_da_initialized_1d,
                                                  PM_da_control_1d):
    """Test that lead units is kept in compute."""
    sig = 95
    units = "years"
    PM_da_initialized_1d.lead.attrs["units"] = "years"
    actual = bootstrap_perfect_model(
        PM_da_initialized_1d,
        PM_da_control_1d,
        metric="mse",
        iterations=ITERATIONS,
        comparison="e2c",
        sig=sig,
        dim="init",
    )
    assert actual.lead.attrs["units"] == units
def test_bootstrap_perfect_model_da1d_not_nan(pm_da_ds1d, pm_da_control1d):
    """
    Checks that there are no NaNs on bootstrap perfect_model of 1D da.
    """
    actual = bootstrap_perfect_model(
        pm_da_ds1d,
        pm_da_control1d,
        metric='rmse',
        comparison='e2c',
        sig=50,
        bootstrap=2,
    )
    actual_init_skill = actual.sel(kind='init', results='skill').isnull().any()
    assert not actual_init_skill
    actual_uninit_p = actual.sel(kind='uninit', results='p').isnull().any()
    assert not actual_uninit_p
Exemple #17
0
def test_bootstrap_perfect_model_keeps_lead_units(
    PM_da_initialized_1d, PM_da_control_1d
):
    """Test that lead units is kept in compute."""
    sig = 95
    units = 'years'
    PM_da_initialized_1d.lead.attrs['units'] = 'years'
    actual = bootstrap_perfect_model(
        PM_da_initialized_1d,
        PM_da_control_1d,
        metric='mse',
        iterations=ITERATIONS,
        comparison='e2c',
        sig=sig,
        dim='init',
    )
    assert actual.lead.attrs['units'] == units
Exemple #18
0
def test_mpi_pm_plot_bootstrapped_skill_over_leadyear():
    """
    Checks plots from bootstrap_perfect_model works.
    """
    da = load_dataset('MPI-PM-DP-1D').isel(area=1, period=-1)
    PM_da_ds1d = da['tos']

    da = load_dataset('MPI-control-1D').isel(area=1, period=-1)
    PM_da_control1d = da['tos']

    # sig = 95
    bootstrap = 5
    res = bootstrap_perfect_model(
        PM_da_ds1d, PM_da_control1d, metric='pearson_r', bootstrap=bootstrap
    )
    res_ax = plot_bootstrapped_skill_over_leadyear(res, 95)
    assert res_ax is not None
Exemple #19
0
def test_bootstrap_perfect_model_da1d_not_nan(PM_da_initialized_1d, PM_da_control_1d):
    """
    Checks that there are no NaNs on bootstrap perfect_model of 1D da.
    """
    actual = bootstrap_perfect_model(
        PM_da_initialized_1d,
        PM_da_control_1d,
        metric="rmse",
        comparison="e2c",
        dim="init",
        sig=50,
        iterations=ITERATIONS,
    )
    actual_init_skill = actual.sel(kind="initialized", results="skill").isnull().any()
    assert not actual_init_skill
    actual_uninit_p = actual.sel(kind="uninitialized", results="p").isnull().any()
    assert not actual_uninit_p
Exemple #20
0
def test_bootstrap_PM_lazy_results(
    PM_da_initialized_3d, PM_da_control_3d, chunk, comparison
):
    """Test bootstrap_perfect_model works lazily."""
    if chunk:
        PM_da_initialized_3d = PM_da_initialized_3d.chunk({'lead': 2}).persist()
        PM_da_control_3d = PM_da_control_3d.chunk({'time': -1}).persist()
    else:
        PM_da_initialized_3d = PM_da_initialized_3d.compute()
        PM_da_control_3d = PM_da_control_3d.compute()
    s = bootstrap_perfect_model(
        PM_da_initialized_3d,
        PM_da_control_3d,
        iterations=ITERATIONS,
        comparison=comparison,
        metric='mse',
    )
    assert dask.is_dask_collection(s) == chunk
def test_bootstrap_pm_dim(pm_da_ds1d, pm_da_control1d):
    """Test whether bootstrap_hindcast calcs skill over member dim and
    returns init dim."""
    actual = bootstrap_perfect_model(
        pm_da_ds1d,
        pm_da_control1d,
        metric='rmse',
        dim='member',
        comparison='m2c',
        bootstrap=3,
    )
    assert 'init' in actual.dims
    for kind in ['init', 'uninit']:
        actualk = actual.sel(kind=kind, results='skill')
        if 'init' in actualk.coords:
            actualk = actualk.mean('init')
        actualk = actualk.isnull().any()
        assert not actualk
def test_bootstrap_pm_dim(PM_da_initialized_1d, PM_da_control_1d):
    """Test whether bootstrap_hindcast calcs skill over member dim and
    returns init dim."""
    actual = bootstrap_perfect_model(
        PM_da_initialized_1d,
        PM_da_control_1d,
        metric='rmse',
        dim='member',
        comparison='m2c',
        iterations=ITERATIONS,
        resample_dim='member',
    )
    assert 'init' in actual.dims
    for kind in ['init', 'uninit']:
        actualk = actual.sel(kind=kind, results='skill')
        if 'init' in actualk.coords:
            actualk = actualk.mean('init')
        actualk = actualk.isnull().any()
        assert not actualk
def test_bootstrap_perfect_model_ds1d_not_nan(PM_ds_ds1d, PM_ds_control1d,
                                              metric, comparison):
    """
    Checks that there are no NaNs on bootstrap init skill or uninit p of 1D ds time series.
    """
    actual = bootstrap_perfect_model(PM_ds_ds1d,
                                     PM_ds_control1d,
                                     metric=metric,
                                     comparison=comparison,
                                     sig=50,
                                     bootstrap=2)
    for var in actual.data_vars:
        actual_init_skill = actual[var].sel(i='init',
                                            results='skill').isnull().any()
        assert not actual_init_skill
    for var in actual.data_vars:
        actual_uninit_p = actual[var].sel(i='uninit',
                                          results='p').isnull().any()
        assert not actual_uninit_p
def test_bootstrap_perfect_model_da1d_not_nan_probabilistic(
        PM_da_initialized_1d, PM_da_control_1d, metric, comparison):
    """
    Checks that there are no NaNs on perfect model probabilistic metrics of 1D
    time series.
    """
    kwargs = {
        "comparison": comparison,
        "metric": metric,
    }
    category_edges = np.array([0, 0.5, 1])
    if metric in probabilistic_metrics_requiring_logical:

        def f(x):
            return x > 0.5

        kwargs["logical"] = f
    elif metric == "threshold_brier_score":
        kwargs["threshold"] = 0.5
    elif metric == "contingency":
        kwargs["forecast_category_edges"] = category_edges
        kwargs["observation_category_edges"] = category_edges
        kwargs["score"] = "accuracy"
    elif metric == "rps":
        kwargs["category_edges"] = category_edges
    dim = (["member", "init"]
           if metric in probabilistic_metrics_requiring_more_than_member_dim
           else "member")
    kwargs["dim"] = dim

    assert (not compute_perfect_model(PM_da_initialized_1d, PM_da_control_1d,
                                      **kwargs).isnull().all())

    kwargs["iterations"] = ITERATIONS
    kwargs["resample_dim"] = "member"
    actual = bootstrap_perfect_model(PM_da_initialized_1d, PM_da_control_1d,
                                     **kwargs)
    for kind in ["initialized", "uninitialized"]:
        actualk = actual.sel(kind=kind, results="skill")
        actualk = actualk.isnull().all()
        assert not actualk
def test_bootstrap_perfect_model_ds1d_not_nan(PM_ds_initialized_1d,
                                              PM_ds_control_1d):
    """
    Checks that there are no NaNs on bootstrap perfect_model of 1D ds.
    """
    actual = bootstrap_perfect_model(
        PM_ds_initialized_1d,
        PM_ds_control_1d,
        metric="rmse",
        comparison="e2c",
        dim="init",
        sig=50,
        iterations=ITERATIONS,
    )
    for var in actual.data_vars:
        actual_init_skill = (actual[var].sel(
            skill="initialized", results="verify skill").isnull().any())
        assert not actual_init_skill
    for var in actual.data_vars:
        actual_uninit_p = (actual[var].sel(skill="uninitialized",
                                           results="p").isnull().any())
        assert not actual_uninit_p
Exemple #26
0
def test_bootstrap_perfect_model_da1d_not_nan_probabilistic(
    PM_da_initialized_1d, PM_da_control_1d, metric, comparison
):
    """
    Checks that there are no NaNs on perfect model probabilistic metrics of 1D
    time series.
    """
    if 'threshold' in metric:
        threshold = 10.5
    else:
        threshold = None

    if metric == 'brier_score':

        def func(x):
            return x > 0

    else:
        func = None

    actual = bootstrap_perfect_model(
        PM_da_initialized_1d,
        PM_da_control_1d,
        comparison=comparison,
        metric=metric,
        threshold=threshold,
        gaussian=True,
        func=func,
        iterations=ITERATIONS,
        dim='member',
        resample_dim='member',
    )
    for kind in ['init', 'uninit']:
        actualk = actual.sel(kind=kind, results='skill')
        if 'init' in actualk.coords:
            actualk = actualk.mean('init')
        actualk = actualk.isnull().any()
        assert not actualk
Exemple #27
0
s3d = compute_perfect_model(ds3d, control3d, metric=metric)
# s3d.plot(col='lead')

s3dapn = compute_perfect_model(
    ds3d.sel(init=anom_pos_nino34_init.values), control3d, metric=metric)
s3daneun = compute_perfect_model(
    ds3d.sel(init=anom_neu_nino34_init), control3d, metric=metric)
s3dann = compute_perfect_model(
    ds3d.sel(init=anom_neg_nino34_init.values), control3d, metric=metric)
sa3d = xr.concat([s3d, s3dapn, s3daneun, s3dann], 'IC')
sa3d['IC'] = ['all', 'pos', 'neutral', 'neg']

warnings.simplefilter("ignore")

b = 100
s3d = bootstrap_perfect_model(ds3d, control3d, metric=metric, bootstrap=b)
s3dapn = bootstrap_perfect_model(
    ds3d.sel(init=anom_pos_nino34_init.values), control3d, metric=metric, bootstrap=b)
s3daneun = bootstrap_perfect_model(
    ds3d.sel(init=anom_neu_nino34_init), control3d, metric=metric, bootstrap=b)
s3dann = bootstrap_perfect_model(
    ds3d.sel(init=anom_neg_nino34_init.values), control3d, metric=metric, bootstrap=b)

sa3d.sel(kind='uninit', results='p').plot(col='lead', row='IC', vmax=.2)

sa3d = xr.concat([s3d, s3dapn, s3daneun, s3dann], 'IC')
sa3d['IC'] = ['all', 'pos', 'neutral', 'neg']
skill = sa3d.sel(kind='init', results='skill').where(
    sa3d.sel(results='p', kind='uninit') <= .05)
#sa3d.plot(col='lead', row='IC', robust=True)
#sa3d.name = 'CO$_2$ ACC'
metric = 'pearson_r'
v = 'co2_flux'
v='CO2'
metric = 'rmse'
for metric in ['pearson_r', 'rmse']:
    for v in ['co2_flux', 'CO2']:
        varstring = copy(v)
        print(v, varstring)
        ds = xr.open_dataset(data_path + 'ds_' + v + '_ym.nc').load()
        control = xr.open_dataset(data_path + 'control_' + v + '_ym.nc').load()
        ds, control = comply_climpred(ds, control)
        # faster
        ds = ds.isel(lead=slice(None, 6)).isel(member=slice(1,None))

        bs = bootstrap_perfect_model(
            ds, control, metric=metric, comparison=comparison, bootstrap=bootstrap, sig=sig)
        if save_nc:
            bs.to_netcdf('_'.join(['results', varstring, 'ym', 'metric', metric, 'comparison',
                                   comparison, 'sig', str(sig), 'bootstrap', str(bootstrap)]) + '.nc')

skill = bs.sel(results='skill', kind='init').where(
    bs.sel(results='p', kind='uninit') <= psig)
skill[v].plot(col='lead',col_wrap=4,yincrease=False)

# plots
bootstrap = 1000
comparison = 'm2e'
psig = .05

# prototyping
metric = 'rmse'
Exemple #29
0
comparison = 'm2e'
sig = 95
psig = (100 - sig) / 100
bootstrap = 5000

control = xr.open_dataset(post_global + 'control_diagnosed_co2.nc')
ds = xr.open_dataset(post_global + 'ds_diagnosed_co2.nc')

# rename dims to climpred requirements
if 'ensemble' in ds.dims:
    ds = ds.rename({'ensemble': 'init', 'time': 'lead'})

p = 'data/results/'
compute = False
if compute:
    bs_acc = bootstrap_perfect_model(
        ds, control, metric='pearson_r', comparison=comparison, bootstrap=bootstrap, sig=sig)
    if save_nc:
        bs_acc.to_netcdf(p+'_'.join(['results', 'global', 'ym', 'metric', 'pearson_r',
                                     'comparison', comparison, 'sig', str(sig), 'bootstrap', str(bootstrap)]) + '.nc')
    bs_rmse = bootstrap_perfect_model(
        ds, control, metric='rmse', comparison=comparison, bootstrap=bootstrap, sig=sig)
    if save_nc:
        bs_rmse.to_netcdf(p+'_'.join(['results', 'global', 'ym', 'metric', 'rmse',
                                      'comparison', comparison, 'sig', str(sig), 'bootstrap', str(bootstrap)]) + '.nc')
else:
    bs_acc = xr.open_dataset(p+'_'.join(['results', 'global', 'ym', 'metric', 'pearson_r',
                                         'comparison', comparison, 'sig', str(sig), 'bootstrap', str(bootstrap)]) + '.nc')
    bs_rmse = xr.open_dataset(p+'_'.join(['results', 'global', 'ym', 'metric', 'rmse',
                                          'comparison', comparison, 'sig', str(sig), 'bootstrap', str(bootstrap)]) + '.nc')