Exemple #1
0
def compute_uninitialized(uninit,
                          reference,
                          metric='pearson_r',
                          comparison='e2r',
                          return_p=False,
                          dim='time'):
    """
    Compute a predictability skill score between an uninitialized ensemble
    and some reference (hindcast, assimilation, reconstruction, observations).

    Based on Decadal Prediction protocol, this should only be computed for the
    first lag and then projected out to any further lags being analyzed.

    Parameters
    ----------
    uninit (xarray object):
        uninitialized ensemble.
    reference (xarray object):
        reference output/data over same time period.
    metric (str):
        Metric used in comparing the decadal prediction ensemble with the
        reference.
        * pearson_r (Default)
        * rmse
        * mae
        * mse
    comparison (str):
        How to compare the decadal prediction ensemble to the reference.
        * e2r : ensemble mean to reference (Default)
        * m2r : each member to the reference
    return_p (bool): If True, return p values associated with pearson r.

    Returns:
        u (xarray object): Results from comparison at the first lag.
        p (xarray object): If `return_p`, p values associated with
                                 pearson r correlations.
    """
    check_xarray(uninit)
    check_xarray(reference)
    comparison = get_comparison_function(comparison)
    if comparison not in [_e2r, _m2r]:
        raise KeyError("""Please input either 'e2r' or 'm2r' for your
            comparison. This will be implemented for the perfect model setup
            in the future.""")
    uninit, reference = comparison(uninit, reference)
    metric = get_metric_function(metric)
    u = metric(uninit, reference, dim=dim)
    if (return_p) & (metric != _pearson_r):
        raise KeyError("""You can only return p values if the metric is
            'pearson_r'.""")
    elif (return_p) & (metric == _pearson_r):
        p = pearson_r_p_value(uninit, reference, dim=dim)
        return u, p
    else:
        return u
Exemple #2
0
def _pearson_r_p_value(forecast, reference, dim='svd', comparison=None):
    """
    Calculate the probability associated with the ACC not being random.
    """
    # p-value returns a runtime error when working with NaNs, such as on a climate
    # model grid. We can avoid this annoying output by specifically suppressing
    # warning here.
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        pval = pearson_r_p_value(forecast, reference, dim=dim)
    return pval
Exemple #3
0
 def _lag_correlate(x, y, dim, lead, return_p):
     """Helper function to shift the two time series and correlate."""
     N = x[dim].size
     normal = x.isel({dim: slice(0, N - lead)})
     shifted = y.isel({dim: slice(0 + lead, N)})
     # Align dimensions for xarray operation.
     shifted[dim] = normal[dim]
     corrcoef = pearson_r(normal, shifted, dim)
     if return_p:
         pval = pearson_r_p_value(normal, shifted, dim)
         return corrcoef, pval
     else:
         return corrcoef
Exemple #4
0
def xr_autocorr(ds, lag=1, dim='time', return_p=False):
    """Calculate the lagged correlation of time series.

    Args:
        ds (xarray object): Time series or grid of time series.
        lag (optional int): Number of time steps to lag correlate to.
        dim (optional str): Name of dimension to autocorrelate over.
        return_p (optional bool): If True, return correlation coefficients
                                  and p values.

    Returns:
        Pearson correlation coefficients.

        If return_p, also returns their associated p values.
    """
    check_xarray(ds)
    N = ds[dim].size
    normal = ds.isel({dim: slice(0, N - lag)})
    shifted = ds.isel({dim: slice(0 + lag, N)})
    """
    xskillscore pearson_r looks for the dimensions to be matching, but we
    shifted them so they probably won't be. This solution doesn't work
    if the user provides a dataset without a coordinate for the main
    dimension, so we need to create a dummy dimension in that case.
    """
    if dim not in list(ds.coords):
        normal[dim] = np.arange(1, N)
    shifted[dim] = normal[dim]
    r = pearson_r(normal, shifted, dim)
    if return_p:
        # NOTE: This assumes 2-tailed. Need to update xr_eff_pearsonr
        # to utilize xskillscore's metrics but then compute own effective
        # p-value with option for one-tailed.
        p = pearson_r_p_value(normal, shifted, dim)
        return r, p
    else:
        return r
Exemple #5
0
def _pearson_r_p_value(forecast, reference, dim='svd', comparison=None):
    """
    Calculate the probability associated with the ACC not being random.
    """
    return pearson_r_p_value(forecast, reference, dim=dim)
Exemple #6
0
def r_p(a, b):
    return xs.pearson_r_p_value(a, b, "time")
Exemple #7
0
def compute_reference(ds,
                      reference,
                      metric='pearson_r',
                      comparison='e2r',
                      nlags=None,
                      return_p=False):
    """
    Compute a predictability skill score against some reference (hindcast,
    assimilation, reconstruction, observations).

    Note that if reference is the reconstruction, the output correlation
    coefficients are for potential predictability. If the reference is
    observations, the output correlation coefficients are actual skill.

    Parameters
    ----------
    ds (xarray object):
        Expected to follow package conventions:
        `initialization` : dim of initialization dates
        `time` : dim of lead years from those initializations
        Additional dims can be lat, lon, depth.
    reference (xarray object):
        reference output/data over same time period.
    metric (str):
        Metric used in comparing the decadal prediction ensemble with the
        reference.
        * pearson_r (Default)
        * rmse
        * mae
        * mse
    comparison (str):
        How to compare the decadal prediction ensemble to the reference.
        * e2r : ensemble mean to reference (Default)
        * m2r : each member to the reference
    nlags (int): How many lags to compute skill/potential predictability out
                 to. Default: length of `time` dim
    return_p (bool): If True, return p values associated with pearson r.

    Returns:
        skill (xarray object): Predictability with main dimension `lag`.
        p_value (xarray object): If `return_p`, p values associated with
                                 pearson r correlations.
    """
    _check_xarray(ds)
    _check_xarray(reference)
    comparison = _get_comparison_function(comparison)
    if comparison not in [_e2r, _m2r]:
        raise ValueError("""Please input either 'e2r' or 'm2r' for your
            comparison.""")
    forecast, reference = comparison(ds, reference)
    if nlags is None:
        nlags = forecast.time.size
    metric = _get_metric_function(metric)
    if metric not in [_pearson_r, _rmse, _mse, _mae]:
        raise ValueError("""Please input 'pearson_r', 'rmse', 'mse', or
            'mae' for your metric.""")
    plag = []
    for i in range(0, nlags):
        a, b = _shift(
            forecast.isel(time=i), reference, i, dim='initialization')
        plag.append(metric(a, b, dim='initialization'))
    skill = xr.concat(plag, 'time')
    skill['time'] = np.arange(1, 1 + nlags)
    if (return_p) & (metric != _pearson_r):
        raise ValueError("""You can only return p values if the metric is
            pearson_r.""")
    elif (return_p) & (metric == _pearson_r):
        # NaN values throw warning for p-value comparison, so just
        # suppress that here.
        p_value = []
        for i in range(0, nlags):
            a, b = _shift(
                forecast.isel(time=i), reference, i, dim='initialization')
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                p_value.append(pearson_r_p_value(a, b, dim='initialization'))
        p_value = xr.concat(p_value, 'time')
        p_value['time'] = np.arange(1, 1 + nlags)
        return skill, p_value
    else:
        return skill