def calc_corr_signif(fcast, obs, corr=None): """ Calculate local anomaly correlation along with 95% significance. """ assert(fcast.shape == obs.shape) corr_neff = St.calc_n_eff(fcast, obs) if corr is None: corr = St.calc_lac(fcast, obs) signif = np.empty_like(corr, dtype=np.bool) if True in (abs(corr) < 0.5): g_idx = np.where(abs(corr) < 0.5) gen_2std = 2./np.sqrt(corr_neff[g_idx]) signif[g_idx] = (abs(corr[g_idx]) - gen_2std) > 0 if True in (abs(corr) >= 0.5): z_idx = np.where(abs(corr) >= 0.5) z = 1./2 * np.log((1 + corr[z_idx]) / (1 - corr[z_idx])) z_2std = 2. / np.sqrt(corr_neff[z_idx] - 3) signif[z_idx] = (abs(z) - z_2std) > 0 # if True in ((corr_neff <= 3) & (abs(corr) >= 0.5)): # assert(False) # I have to figure out how to implement T_Test # trow = np.where((corr_neff <= 20) & (corr >= 0.5)) signif[corr_neff <= 3] = False return signif, corr
def calc_corr_signif(fcast, obs, corr=None): """ Calculate local anomaly correlation along with 95% significance. """ assert (fcast.shape == obs.shape) corr_neff = St.calc_n_eff(fcast, obs) if corr is None: corr = St.calc_lac(fcast, obs) signif = np.empty_like(corr, dtype=np.bool) if True in (abs(corr) < 0.5): g_idx = np.where(abs(corr) < 0.5) gen_2std = 2. / np.sqrt(corr_neff[g_idx]) signif[g_idx] = (abs(corr[g_idx]) - gen_2std) > 0 if True in (abs(corr) >= 0.5): z_idx = np.where(abs(corr) >= 0.5) z = 1. / 2 * np.log((1 + corr[z_idx]) / (1 - corr[z_idx])) z_2std = 2. / np.sqrt(corr_neff[z_idx] - 3) signif[z_idx] = (abs(z) - z_2std) > 0 # if True in ((corr_neff <= 3) & (abs(corr) >= 0.5)): # assert(False) # I have to figure out how to implement T_Test # trow = np.where((corr_neff <= 20) & (corr >= 0.5)) signif[corr_neff <= 3] = False return signif, corr
def get_scalar_outputs(dobj, nelem_in_yr, var_fcast, verif_data_attr, out_types, use_dask=False, ): if use_dask: truth_data = dobj.reset_data(verif_data_attr) else: truth_data = getattr(dobj, verif_data_attr) truth_1yr = truth_data[nelem_in_yr:] truth_init = truth_data[:-nelem_in_yr] curr_var_output = {} for out_type in out_types: fcast_factor, verif_factor = get_scalar_factor(dobj, out_type, verif_data_attr) var_out = var_fcast @ fcast_factor truth_init_out = truth_init @ verif_factor truth_1yr_out = truth_1yr @ verif_factor # Standardize PDO Index relative to truth output if out_type == 'pdo': truth_1yr_out, std_dev = _standardize_series(truth_1yr_out) var_out, _ = _standardize_series(var_out, std_dev=std_dev) truth_init_out, _ = _standardize_series(truth_init_out, std_dev=std_dev) if use_dask: t_truth_1yr_out = np.empty(truth_1yr_out.shape) t_truth_init_out = np.empty(truth_init_out.shape) dask_vars = [truth_1yr_out, truth_init_out] dask_outs = [t_truth_1yr_out, t_truth_init_out] if ST.is_dask_array(var_out): t_var_out = np.empty(var_out.shape) dask_vars.append(var_out) dask_outs.append(t_var_out) da.store(dask_vars, dask_outs) truth_1yr_out = t_truth_1yr_out truth_init_out = t_truth_init_out if ST.is_dask_array(var_out): var_out = t_var_out curr_var_output[out_type] = {'fcast': var_out, 't0': truth_init_out, '1yr': truth_1yr_out} return curr_var_output
def conf_bound95(fcast, reference, metric='r', use_sample_size=None): n_samples = len(fcast) n_iters = 10000 threshold = 1e-5 if metric == 'r': mean_target = np.corrcoef(fcast, reference)[0, 1] elif metric == 'ce': mean_target = ST.calc_ce(fcast, reference) else: raise KeyError('Unknown metric specified: {}'.format(metric)) args = [ (fcast, reference, n_samples, metric), ] seeds = np.random.choice(n_iters * 10, size=n_iters, replace=False) iter_args = product(seeds, args) with Pool(processes=10) as calc_pool: result = calc_pool.map(pool_func, iter_args) result = np.array(result) diff = abs(result.mean() - mean_target) print('Mean Target {} diff: {:1.3e}'.format(metric, diff)) # Create bounds for plt.errorbars lower_bnd = np.percentile(result, 2.5) upper_bnd = np.percentile(result, 97.5) res_mean = result.mean() conf_bounds = (upper_bnd, lower_bnd) return res_mean, conf_bounds
def _calc_limstate_eofs(data, num_eofs): var_stats = {} state_eofs, state_svals = plstat.calc_eofs(data, num_eofs, var_stats_dict=var_stats) return state_eofs, state_svals, var_stats
def fcast_corr_old(h5file): """ Calculate the local anomaly correlation for a LIM forecast at every point. Parameters ---------- h5file: tables.File PyTables HDF5 file containing LIM forecast data. All necessary variables are loaded from this file. Returns ------- ndarray Local anomaly correlation for each forecast lead time at all points. (compared against observations) """ node_name = 'corr' parent = '/stats' assert(h5file is not None and type(h5file) == tb.File) # Load necessary data try: obs = h5file.root.data.anomaly_srs[:] test_start_idxs = h5file.root.data.test_start_idxs[:] fcast_times = h5file.root.data.fcast_times[:] fcasts = h5file.list_nodes(h5file.root.data.fcast_bin) eofs = h5file.root.data.eofs[:] yrsize = h5file.root.data._v_attrs.yrsize test_tdim = h5file.root.data._v_attrs.test_tdim except tb.NodeError as e: raise type(e)(e.message + ' Returning without finishing operation...') # Create output location in h5file atom = tb.Atom.from_dtype(obs.dtype) corr_shp = [len(fcast_times), obs.shape[1]] try: corr_out = Dt.empty_hdf5_carray(h5file, parent, node_name, atom, corr_shp, title="Spatial Correlation", createparents=True) except tb.FileModeError: corr_out = np.zeros(corr_shp) # Calculate LAC for i, lead in enumerate(fcast_times): print 'Calculating Correlation: %i yr fcast' % lead compiled_obs = build_trial_obs(obs, test_start_idxs, lead*yrsize, test_tdim) data = fcasts[i].read() phys_fcast = build_trial_fcast(data, eofs) # for j, trial in enumerate(data): # phys_fcast = np.dot(trial.T, eofs[j].T) # corr_out[i] += St.calc_ce(phys_fcast, compiled_obs[j], obs) corr_out[i] = St.calc_lac(phys_fcast, compiled_obs) return corr_out
def calc_state_eofs(self, num_eofs): var_stats = {} state_eofs, state_svals = ST.calc_eofs(self.data, num_eofs, var_stats_dict=var_stats) self.eofs = state_eofs self.svals = state_svals return var_stats
def get_pdo_factor(eofs, data, latgrid, longrid): npac_mask = ((latgrid >= 20) & (latgrid <= 70) & (longrid >= 110) & (longrid <= 250)) # Have to perform a read here because fancy indexing doesn't work for CArr? data = data[:][:, npac_mask] npac_eofs, npac_svals = ST.calc_eofs(data, 1) npac_eofs_full = np.zeros_like(latgrid) npac_eofs_full[npac_mask] = npac_eofs[:, 0] pdo_factor = eofs.T @ npac_eofs_full return pdo_factor, npac_eofs_full
def pool_func(args): # TODO: Samples are correlated in time, need to do block resample instead i, (fcast, ref, n_samples, metric) = args np.random.seed(i) sample_idx = np.random.choice(n_samples, size=n_samples, replace=True) test_fcast = fcast[sample_idx] test_ref = ref[sample_idx] if metric == 'r': out = np.corrcoef(test_fcast, test_ref)[0, 1] else: out = ST.calc_ce(test_fcast, test_ref) return out
def _gen_nao_index_factor(prior_cfg, varname): print('Generating NAO EOF for index calculation.') prior_var = PriorVariable.load(prior_cfg, varname, anomaly=True, nens=None) var_dobj, latgrid, longrid = _load_prior_var_dat(prior_cfg, varname) mask = _gen_latlon_grid_mask(latgrid, longrid, 20, 80, 270, 40) valid_data = var_dobj.valid_data var_dobj.detrend_data() var_dobj.area_weight_data(use_sqrt=True) data = var_dobj.data[:][:, mask] natl_eofs, natl_svals = ST.calc_eofs(data, 1) nao_eof = natl_eofs[:, 0] return nao_eof, valid_data, mask
def long_output_to_scalar(output, dobjs, output_map, state, verif_spec, use_dask=False): output_orig = state.proj_data_into_orig_basis(output, unstandardize=True) var_scalar_out = {} for var_key, dobj in dobjs.items(): verif_data_attr = verif_spec.get(var_key, 'detrended') data = state.get_var_from_state(var_key, data=output_orig) if use_dask: truth_data = dobj.reset_data(verif_data_attr) else: truth_data = getattr(dobj, verif_data_attr) truth_data = truth_data[:] curr_var_output = {} for out_type in output_map[var_key]: fcast_factor, verif_factor = get_scalar_factor(dobj, out_type, verif_data_attr) scalar_out = data @ fcast_factor compare_scalar_out = truth_data @ verif_factor if out_type == 'pdo': [compare_scalar_out, std_dev] = _standardize_series(compare_scalar_out) scalar_out, _ = _standardize_series(scalar_out, std_dev=std_dev) if use_dask: tmp_compare = np.empty(compare_scalar_out.shape) da.store(compare_scalar_out, tmp_compare) compare_scalar_out = tmp_compare if ST.is_dask_array(scalar_out): tmp_scalar = np.empty(scalar_out.shape) da.store(scalar_out, tmp_scalar) scalar_out = tmp_scalar curr_var_output[out_type] = {'fcast': scalar_out, 'source': compare_scalar_out} var_scalar_out[var_key] = curr_var_output return var_scalar_out
def _gen_pdo_index_factor(prior_cfg, varname): print('Generating PDO EOF for index calculation.') var_dobj, latgrid, longrid = _load_prior_var_dat(prior_cfg, varname) # PDO region mask from the compressed grid mask = _gen_latlon_grid_mask(latgrid, longrid, 20, 70, 110, 250) # Valid mask from the full grid valid_data = var_dobj.valid_data # TODO: Change to removal of GMT regression signal var_dobj.detrend_data() var_dobj.area_weight_data(use_sqrt=True) data = var_dobj.data[:][:, mask] npac_eofs, npac_svals = ST.calc_eofs(data, 1) pdo_eof = npac_eofs[:, 0] # full_grid_pdo_eof = var_dobj.inflate_full_grid(data=compressed_pdo_eof) return pdo_eof, valid_data, mask
def fcast_corr(h5file, avg_trial=False): """ Calculate the local anomaly correlation for a LIM forecast at every point. Parameters ---------- h5file: tables.File PyTables HDF5 file containing LIM forecast data. All necessary variables are loaded from this file. Returns ------- ndarray Local anomaly correlation for each forecast lead time at all points. (compared against observations) """ if avg_trial: corr_node_name = 'corr_trial_avg' signif_node_name = 'corr_tavg_signif' else: corr_node_name = 'corr' signif_node_name = 'corr_signif' parent = '/stats' assert (h5file is not None and type(h5file) == tb.File) # Load necessary data try: try: obs = h5file.root.data.anomaly[:] except tb.NoSuchNodeError: obs = h5file.root.data.detrended[:] test_start_idxs = h5file.root.data._v_attrs.test_start_idxs fcast_times = h5file.root.data._v_attrs.fcast_times fcasts = h5file.list_nodes(h5file.root.data.fcast_bin) eofs = h5file.root.data.eofs[:] yrsize = h5file.root.data._v_attrs.yrsize test_tdim = h5file.root.data._v_attrs.test_tdim except tb.NodeError as e: raise type(e)(e.message + ' Returning without finishing operation...') # Create output location in h5file atom = tb.Atom.from_dtype(obs.dtype) corr_shp = [len(fcast_times), obs.shape[1]] signif = np.ones(corr_shp, dtype=np.bool) try: corr_out = Dt.empty_hdf5_carray(h5file, parent, corr_node_name, atom, corr_shp, title="Spatial Correlation", createparents=True) signif_out = Dt.var_to_hdf5_carray(h5file, parent, signif_node_name, signif) except tb.FileModeError: corr_out = np.zeros(corr_shp) signif_out = signif # Calculate LAC for i, lead in enumerate(fcast_times): print('Calculating Correlation: %i yr fcast' % lead) if avg_trial: # TODO: Significance is currently ignored for avg_trial corr_trials = np.zeros((len(fcasts[i]), eofs.shape[1])) for j, trial in enumerate(fcasts[i]): phys_fcast = np.dot(trial.T, eofs[j].T) compiled_obs = build_trial_obs(obs, [test_start_idxs[j]], lead * yrsize, test_tdim) corr_trials[j] = St.calc_lac(phys_fcast, compiled_obs) # if j == 0: # corr = St.calc_lac(phys_fcast, compiled_obs) # else: # corr += St.calc_lac(phys_fcast, compiled_obs) corr = corr_trials.mean(axis=0) ttest, pval = ttest_1samp(corr_trials, 0, axis=0) sig = pval <= 0.05 #raise AssertionError else: compiled_obs = build_trial_obs(obs, test_start_idxs, lead * yrsize, test_tdim) data = fcasts[i].read() phys_fcast = build_trial_fcast(data, eofs) corr = St.calc_lac(phys_fcast, compiled_obs) sig, _ = calc_corr_signif(phys_fcast, compiled_obs, corr=corr) corr_out[i] = corr # if not avg_trial: signif_out[i] = sig return corr_out, signif_out
def spatial_perf_fcast_verification(incl_keys, field_factors, times, fcast_1yr, state_obj, latgrid, longrid, valid_data_masks, var_std_factors, fcast_against_src, perf_figdir): """ Parameters ---------- incl_keys State keys for the basic output variables specifified in the configuration. Keys are of the form (var_name, avg_interval) field_factors dict of factors by (var, avg_interval) to matrix multiply the lim space output by to get the full field times array of years corresponding to 1-year forecast times fcast_1yr lim forecast in lim space state_obj state used as initial conditions for the forecast latgrid flattened grid of latitude coordinates longrid flattened grid of longitude coordinates valid_data_masks dict of masks by (var, avg_interval) to be applied to fields to omit NaN information var_std_factors dict of standardization factors for the fields by variable key ( var_name, avg_interval) fcast_against_src Name of prior source used to forecast against perf_figdir figure output path Returns ------- """ perf_fcast_dfs = [] perf_fcast_spatial = {} for var_key in incl_keys: var_name, avg_interval = var_key field_factor = field_factors[var_key] valid_data = valid_data_masks.get(var_key, None) var_std = var_std_factors.get(var_key, None) init_field = mutils.get_field_from_state(state_obj, var_key, valid_data=valid_data, var_std_factor=var_std) ar1_field_fcast = mutils.red_noise_forecast_ar1(init_field) target_field = init_field[1:] fcast_1yr_field = fcast_1yr @ field_factor lac = ST.calc_lac(fcast_1yr_field, target_field) # check for invalid LAC as a check of valid inputs invalid_data = np.isnan(lac) nonzero_data = np.logical_not(invalid_data) ce = ST.calc_ce(fcast_1yr_field, target_field) perf_fcast_spatial[var_key] = {'lac': lac, 'ce': ce} if valid_data is not None: perf_fcast_spatial[var_key]['valid_data'] = valid_data anom_corr = ST.calc_lac(fcast_1yr_field.T, target_field.T) ar1_lac = ST.calc_lac(ar1_field_fcast, target_field) ar1_ce = ST.calc_ce(ar1_field_fcast, target_field) ar1_anom_corr = ST.calc_lac(ar1_field_fcast.T[nonzero_data], target_field.T[nonzero_data]) if var_key in valid_data_masks: valid_data = valid_data_masks[var_key] lat = latgrid[valid_data] else: lat = latgrid if np.any(invalid_data): warnings.warn( 'Grid data resulted in invalid skill metric for ' 'field: {}, removing for average...'.format(var_name)) lat = lat[nonzero_data] # Get global average weights for field _, gm_weights = \ LMR_outputs.get_area_avg_mask_and_weights(lat, None, None) lac_gm = lac[nonzero_data] @ gm_weights ce_gm = ce[nonzero_data] @ gm_weights avg_anom_corr = anom_corr.mean() ar1_lac_gm = ar1_lac[nonzero_data] @ gm_weights ar1_ce_gm = ar1_ce[nonzero_data] @ gm_weights ar1_avg_anom_corr = ar1_anom_corr.mean() spatial_gm_df = \ mutils.ce_r_results_to_dataframe(var_name, avg_interval, 'spatial_verif_gm', lac_gm, ce_gm, ar1_lac_gm, ar1_ce_gm, anom_corr=avg_anom_corr, auto1_anom_corr=ar1_avg_anom_corr) perf_fcast_dfs.append(spatial_gm_df) if plot_spatial_verif: plot_maps = [lac, ce, ar1_lac, ar1_ce] plot_metrs = ['LIM LAC', 'LIM CE', 'AR(1) LAC', 'AR(1) CE'] for field, metric in zip(plot_maps, plot_metrs): valid_mask = valid_data_masks.get(var_key, None) sptl_shp = state_obj.var_space_shp[var_name] vutils.plot_spatial_verif(field, valid_mask, sptl_shp, latgrid, longrid, metric, fcast_against_src, avg_interval, var_name, fig_dir=perf_figdir) acorr_file = 'spatial_anomoly_corr_{}_{}.png'.format( var_name, var_key) acorr_path = os.path.join(perf_figdir, acorr_file) ptools.plot_anomaly_correlation(times, anom_corr, ar1_anom_corr, var_name, avg_interval, savefile=acorr_path) return perf_fcast_dfs, perf_fcast_spatial
def fcast_corr(h5file, avg_trial=False): """ Calculate the local anomaly correlation for a LIM forecast at every point. Parameters ---------- h5file: tables.File PyTables HDF5 file containing LIM forecast data. All necessary variables are loaded from this file. Returns ------- ndarray Local anomaly correlation for each forecast lead time at all points. (compared against observations) """ if avg_trial: corr_node_name = 'corr_trial_avg' signif_node_name = 'corr_tavg_signif' else: corr_node_name = 'corr' signif_node_name = 'corr_signif' parent = '/stats' assert(h5file is not None and type(h5file) == tb.File) # Load necessary data try: try: obs = h5file.root.data.anomaly[:] except tb.NoSuchNodeError: obs = h5file.root.data.detrended[:] test_start_idxs = h5file.root.data._v_attrs.test_start_idxs fcast_times = h5file.root.data._v_attrs.fcast_times fcasts = h5file.list_nodes(h5file.root.data.fcast_bin) eofs = h5file.root.data.eofs[:] yrsize = h5file.root.data._v_attrs.yrsize test_tdim = h5file.root.data._v_attrs.test_tdim except tb.NodeError as e: raise type(e)(e.message + ' Returning without finishing operation...') # Create output location in h5file atom = tb.Atom.from_dtype(obs.dtype) corr_shp = [len(fcast_times), obs.shape[1]] signif = np.ones(corr_shp, dtype=np.bool) try: corr_out = Dt.empty_hdf5_carray(h5file, parent, corr_node_name, atom, corr_shp, title="Spatial Correlation", createparents=True) signif_out = Dt.var_to_hdf5_carray(h5file, parent, signif_node_name, signif) except tb.FileModeError: corr_out = np.zeros(corr_shp) signif_out = signif # Calculate LAC for i, lead in enumerate(fcast_times): print 'Calculating Correlation: %i yr fcast' % lead if avg_trial: # TODO: Significance is currently ignored for avg_trial corr_trials = np.zeros((len(fcasts[i]), eofs.shape[1])) for j, trial in enumerate(fcasts[i]): phys_fcast = np.dot(trial.T, eofs[j].T) compiled_obs = build_trial_obs(obs, [test_start_idxs[j]], lead*yrsize, test_tdim) corr_trials[j] = St.calc_lac(phys_fcast, compiled_obs) # if j == 0: # corr = St.calc_lac(phys_fcast, compiled_obs) # else: # corr += St.calc_lac(phys_fcast, compiled_obs) corr = corr_trials.mean(axis=0) ttest, pval = ttest_1samp(corr_trials, 0, axis=0) sig = pval <= 0.05 #raise AssertionError else: compiled_obs = build_trial_obs(obs, test_start_idxs, lead*yrsize, test_tdim) data = fcasts[i].read() phys_fcast = build_trial_fcast(data, eofs) corr = St.calc_lac(phys_fcast, compiled_obs) sig, _ = calc_corr_signif(phys_fcast, compiled_obs, corr=corr) corr_out[i] = corr # if not avg_trial: signif_out[i] = sig return corr_out, signif_out
def perfect_fcast_verification(fcast_1yr, fcast_outputs, dobjs, state, verif_spec, nelem_in_yr, experiment_name, avg_key, var_name_map, out_name_map, units_map, fig_dir, do_scalar_plot=True, do_spatial_plot=True): output = {} scalar_verif = [] for var_key, out_types in fcast_outputs.items(): dobj = dobjs[var_key] var_fcast = state.get_var_from_state(var_key, data=fcast_1yr) verif_data_attr = verif_spec.get(var_key, 'detrended') curr_var_output = get_scalar_outputs(dobj, nelem_in_yr, var_fcast, verif_data_attr, out_types, use_dask=True) # Run scalar verification for out_type, scalar_output in curr_var_output.items(): fcast = scalar_output['fcast'] ref = scalar_output['1yr'] init_t0 = scalar_output['t0'] r_ce_results = calc_scalar_ce_r(fcast, ref, init_t0) verif_df = mutils.ce_r_results_to_dataframe(var_key, avg_key, out_type, *r_ce_results) scalar_verif.append(verif_df) title = '{}, {}'.format(var_name_map[var_key], out_name_map[out_type]) label = experiment_name + ' ' + avg_key yrs = get_yrs_from_dobj(dobj, nelem_in_yr) filename = 'scalar_plot_{}_{}_{}_{}.png'.format(experiment_name, avg_key, var_key, out_type) filepath = os.path.join(fig_dir, filename) if out_type == 'enso': ylabel = 'ENSO 3.4 Index' elif out_type == 'pdo': ylabel = 'PDO Index' else: ylabel = 'Anomaly ({})'.format([units_map[var_key]]) if do_scalar_plot: ptools.plot_scalar_verification(yrs, fcast, ref, *r_ce_results, title, label, ylabel, savefile=filepath) # Run Field verification if do_spatial_plot: fcast, ref_data, wgts = _get_spatial_field_and_wgts(dobj, var_fcast, var_key, verif_spec, get_dask=True) ref = ref_data[nelem_in_yr:] ref_init = ref_data[:-nelem_in_yr] lac = ST.calc_lac(fcast, ref) ce = ST.calc_ce(fcast, ref) # Persistence fcast metrics auto1_lac = ST.calc_lac(ref_init, ref) auto1_ce = ST.calc_ce(ref_init, ref) lac_out = np.empty(lac.shape) ce_out = np.empty(ce.shape) auto1_lac_out = np.empty(auto1_lac.shape) auto1_ce_out = np.empty(auto1_ce.shape) da.store([lac, ce, auto1_lac, auto1_ce], [lac_out, ce_out, auto1_lac_out, auto1_ce_out]) # spatial averages lac_gm = lac_out @ wgts ce_gm = ce_out @ wgts auto1_lac_gm = auto1_lac_out @ wgts auto1_ce_gm = auto1_ce_out @ wgts spatial_gm_df = mutils.ce_r_results_to_dataframe(var_key, avg_key, 'spatial_gm', lac_gm, None, auto1_lac_gm, None, ce_gm, None, auto1_ce_gm, None) scalar_verif.append(spatial_gm_df) curr_var_output['spatial_metr'] = {'lac': lac_out, 'ce': ce_out, 'auto1_lac': auto1_lac_out, 'auto1_ce': auto1_ce_out} output[var_key] = curr_var_output _plot_spatial(lac_out, 'LAC', experiment_name, avg_key, var_key, dobj, fig_dir) _plot_spatial(auto1_lac_out, 'Auto1_LAC', experiment_name, avg_key, var_key, dobj, fig_dir) _plot_spatial(ce_out, 'CE', experiment_name, avg_key, var_key, dobj, fig_dir) _plot_spatial(auto1_ce_out, 'Auto1_CE', experiment_name, avg_key, var_key, dobj, fig_dir) scalar_verif = pd.concat(scalar_verif) return output, scalar_verif
def fcast_corr_old(h5file): """ Calculate the local anomaly correlation for a LIM forecast at every point. Parameters ---------- h5file: tables.File PyTables HDF5 file containing LIM forecast data. All necessary variables are loaded from this file. Returns ------- ndarray Local anomaly correlation for each forecast lead time at all points. (compared against observations) """ node_name = 'corr' parent = '/stats' assert (h5file is not None and type(h5file) == tb.File) # Load necessary data try: obs = h5file.root.data.anomaly_srs[:] test_start_idxs = h5file.root.data.test_start_idxs[:] fcast_times = h5file.root.data.fcast_times[:] fcasts = h5file.list_nodes(h5file.root.data.fcast_bin) eofs = h5file.root.data.eofs[:] yrsize = h5file.root.data._v_attrs.yrsize test_tdim = h5file.root.data._v_attrs.test_tdim except tb.NodeError as e: raise type(e)(e.message + ' Returning without finishing operation...') # Create output location in h5file atom = tb.Atom.from_dtype(obs.dtype) corr_shp = [len(fcast_times), obs.shape[1]] try: corr_out = Dt.empty_hdf5_carray(h5file, parent, node_name, atom, corr_shp, title="Spatial Correlation", createparents=True) except tb.FileModeError: corr_out = np.zeros(corr_shp) # Calculate LAC for i, lead in enumerate(fcast_times): print('Calculating Correlation: %i yr fcast' % lead) compiled_obs = build_trial_obs(obs, test_start_idxs, lead * yrsize, test_tdim) data = fcasts[i].read() phys_fcast = build_trial_fcast(data, eofs) # for j, trial in enumerate(data): # phys_fcast = np.dot(trial.T, eofs[j].T) # corr_out[i] += St.calc_ce(phys_fcast, compiled_obs[j], obs) corr_out[i] = St.calc_lac(phys_fcast, compiled_obs) return corr_out
def red_noise_fit_ar1(data, lead=1): lag1_autocorr = ST.calc_lac(data[:-lead], data[lead:]) white_noise_var = (1 - lag1_autocorr**2) * data.var(ddof=1, axis=0) return lag1_autocorr, white_noise_var