def xr_corr(x, y, dim='time', lag=0, return_p=False): """Computes the Pearson product-moment coefficient of linear correlation. This version calculates the effective degrees of freedom, accounting for autocorrelation within each time series that could fluff the significance of the correlation. References: * Wilks, Daniel S. Statistical methods in the atmospheric sciences. Vol. 100. Academic press, 2011. * Lovenduski, Nicole S., and Nicolas Gruber. "Impact of the Southern Annular Mode on Southern Ocean circulation and biology." Geophysical Research Letters 32.11 (2005). Todo: * Test and adapt for xr.Datasets Args: x (xarray object): Independent variable time series or grid of time series. y (xarray object): Dependent variable time series or grid of time series dim (optional str): Correlation dimension lag (optional int): Lag to apply to correlaton, with x predicting y. return_p (optional bool): If True, return correlation coefficients as well as p values. Returns: Pearson correlation coefficients If return_p True, associated p values. """ check_xarray(x) check_xarray(y) if lag != 0: N = x[dim].size normal = x.isel({dim: slice(0, N - lag)}) shifted = y.isel({dim: slice(0 + lag, N)}) if dim not in list(x.coords): normal[dim] = np.arange(1, N) shifted[dim] = normal[dim] r = pearson_r(normal, shifted, dim) else: r = pearson_r(x, y, dim) if return_p: p = _xr_eff_p_value(x, y, r, dim) # return with proper dimension labeling. would be easier with # apply_ufunc, but having trouble getting it to work here. issue # probably has to do with core dims. dimlist = get_dims(r) for i in range(len(dimlist)): p = p.rename({'dim_' + str(i): dimlist[i]}) return r, p else: return r
def _lag_corr(x, y, dim, lead): """Help function to shift the two time series and correlate.""" N = x[dim].size normal = x.isel({dim: slice(0, N - lead)}) shifted = y.isel({dim: slice(0 + lead, N)}) # Align dimensions for xarray operation shifted[dim] = normal[dim] return pearson_r(normal, shifted, dim)
def _compute_autocorr(v, dim, n): """ Return normal and shifted time series with equal dimensions so as not to throw an error. """ shifted = v.isel({dim: slice(1, n)}) normal = v.isel({dim: slice(0, n - 1)}) # see explanation in autocorr for this if dim not in list(v.coords): normal[dim] = np.arange(1, n) shifted[dim] = normal[dim] return pearson_r(shifted, normal, dim)
def _lag_correlate(x, y, dim, lead, return_p): """Helper function to shift the two time series and correlate.""" N = x[dim].size normal = x.isel({dim: slice(0, N - lead)}) shifted = y.isel({dim: slice(0 + lead, N)}) # Align dimensions for xarray operation. shifted[dim] = normal[dim] corrcoef = pearson_r(normal, shifted, dim) if return_p: pval = pearson_r_p_value(normal, shifted, dim) return corrcoef, pval else: return corrcoef
def _pearson_r(forecast, reference, dim='svd', comparison=None): """ Calculate the Anomaly Correlation Coefficient (ACC). .. math:: ACC = \\frac{cov(f, o)}{\\sigma_{f}\\cdot\\sigma_{o}} .. note:: Use metric ``pearson_r_p_value`` to get the corresponding pvalue. Range: * perfect: 1 * min: -1 See also: * xskillscore.pearson_r * xskillscore.pearson_r_p_value """ return pearson_r(forecast, reference, dim=dim)
def xr_autocorr(ds, lag=1, dim='time', return_p=False): """Calculate the lagged correlation of time series. Args: ds (xarray object): Time series or grid of time series. lag (optional int): Number of time steps to lag correlate to. dim (optional str): Name of dimension to autocorrelate over. return_p (optional bool): If True, return correlation coefficients and p values. Returns: Pearson correlation coefficients. If return_p, also returns their associated p values. """ check_xarray(ds) N = ds[dim].size normal = ds.isel({dim: slice(0, N - lag)}) shifted = ds.isel({dim: slice(0 + lag, N)}) """ xskillscore pearson_r looks for the dimensions to be matching, but we shifted them so they probably won't be. This solution doesn't work if the user provides a dataset without a coordinate for the main dimension, so we need to create a dummy dimension in that case. """ if dim not in list(ds.coords): normal[dim] = np.arange(1, N) shifted[dim] = normal[dim] r = pearson_r(normal, shifted, dim) if return_p: # NOTE: This assumes 2-tailed. Need to update xr_eff_pearsonr # to utilize xskillscore's metrics but then compute own effective # p-value with option for one-tailed. p = pearson_r_p_value(normal, shifted, dim) return r, p else: return r
if _model == 'FIMr1p1': _model = 'FIM' _line_color = 'red' if _model == 'GEFS': _line_color = 'green' if _model == 'GEM': _line_color = 'blue' if _model == 'GEM': _line_color = 'blue' if _model == 'GEOS_V2p1': _model = 'GEOS' _line_color = 'purple' if _model == 'NESM': _line_color = 'orange' r = xs.pearson_r(obs, fct, 'S') _r = r.values rmse = xs.rmse(obs, fct, 'S') _rmse = rmse.values _x = np.arange(1, len(da.L) + 1) ax1.plot(_x, _r, label=_model, linewidth=2, color=_line_color) ax2.plot(_x, _rmse, linewidth=2, color=_line_color) ax1.legend(loc="upper right", fontsize=16) plt.savefig(fsavename + '.png', bbox_inches='tight') plt.savefig(fsavename + '.eps', bbox_inches='tight', format='eps') plt.close()
oma = np.ma.masked_invalid(nc['sea_ice_area_fraction@oman'].data) bkg = obs_in - omg ana = obs_in - oma # set obs and fct arrays ------------------------------------------------- obs = xr.DataArray( obs_in, coords=[record], dims=["nlocs"], ) fct = obs.copy() fct.values = bkg ### Deterministic metrics # Pearson's correlation coefficient r = xs.pearson_r(obs, fct, "nlocs") # 2-tailed p-value of Pearson's correlation coefficient #jkim r_p_value = xs.pearson_r_p_value(obs, fct, "nlocs") # Spearman's correlation coefficient rs = xs.spearman_r(obs, fct, "nlocs") # 2-tailed p-value associated with Spearman's correlation coefficient #jkim rs_p_value = xs.spearman_r_p_value(obs, fct, "nlocs") # Root Mean Squared Error rmse = xs.rmse(obs, fct, "nlocs") # Mean Squared Error mse = xs.mse(obs, fct, "nlocs")