Ejemplo n.º 1
0
def calc_corr_signif(fcast, obs, corr=None):
    """
    Calculate local anomaly correlation along with 95% significance.
    """
    assert(fcast.shape == obs.shape)

    corr_neff = St.calc_n_eff(fcast, obs)
    if corr is None:
        corr = St.calc_lac(fcast, obs)

    signif = np.empty_like(corr, dtype=np.bool)

    if True in (abs(corr) < 0.5):
        g_idx = np.where(abs(corr) < 0.5)
        gen_2std = 2./np.sqrt(corr_neff[g_idx])
        signif[g_idx] = (abs(corr[g_idx]) - gen_2std) > 0

    if True in (abs(corr) >= 0.5):
        z_idx = np.where(abs(corr) >= 0.5)
        z = 1./2 * np.log((1 + corr[z_idx]) / (1 - corr[z_idx]))
        z_2std = 2. / np.sqrt(corr_neff[z_idx] - 3)
        signif[z_idx] = (abs(z) - z_2std) > 0

    # if True in ((corr_neff <= 3) & (abs(corr) >= 0.5)):
    #     assert(False) # I have to figure out how to implement T_Test
    #     trow = np.where((corr_neff <= 20) & (corr >= 0.5))
    signif[corr_neff <= 3] = False

    return signif, corr
Ejemplo n.º 2
0
def calc_corr_signif(fcast, obs, corr=None):
    """
    Calculate local anomaly correlation along with 95% significance.
    """
    assert (fcast.shape == obs.shape)

    corr_neff = St.calc_n_eff(fcast, obs)
    if corr is None:
        corr = St.calc_lac(fcast, obs)

    signif = np.empty_like(corr, dtype=np.bool)

    if True in (abs(corr) < 0.5):
        g_idx = np.where(abs(corr) < 0.5)
        gen_2std = 2. / np.sqrt(corr_neff[g_idx])
        signif[g_idx] = (abs(corr[g_idx]) - gen_2std) > 0

    if True in (abs(corr) >= 0.5):
        z_idx = np.where(abs(corr) >= 0.5)
        z = 1. / 2 * np.log((1 + corr[z_idx]) / (1 - corr[z_idx]))
        z_2std = 2. / np.sqrt(corr_neff[z_idx] - 3)
        signif[z_idx] = (abs(z) - z_2std) > 0

    # if True in ((corr_neff <= 3) & (abs(corr) >= 0.5)):
    #     assert(False) # I have to figure out how to implement T_Test
    #     trow = np.where((corr_neff <= 20) & (corr >= 0.5))
    signif[corr_neff <= 3] = False

    return signif, corr
Ejemplo n.º 3
0
def get_scalar_outputs(dobj, nelem_in_yr, var_fcast, verif_data_attr,
                       out_types, use_dask=False, ):
    
    if use_dask:
        truth_data = dobj.reset_data(verif_data_attr)
    else:
        truth_data = getattr(dobj, verif_data_attr)
        
    truth_1yr = truth_data[nelem_in_yr:]
    truth_init = truth_data[:-nelem_in_yr]

    curr_var_output = {}

    for out_type in out_types:

        fcast_factor, verif_factor = get_scalar_factor(dobj, out_type,
                                                       verif_data_attr)

        var_out = var_fcast @ fcast_factor
        truth_init_out = truth_init @ verif_factor
        truth_1yr_out = truth_1yr @ verif_factor

        # Standardize PDO Index relative to truth output
        if out_type == 'pdo':
            truth_1yr_out, std_dev = _standardize_series(truth_1yr_out)
            var_out, _ = _standardize_series(var_out, std_dev=std_dev)
            truth_init_out, _ = _standardize_series(truth_init_out,
                                                    std_dev=std_dev)
            
        if use_dask:
            t_truth_1yr_out = np.empty(truth_1yr_out.shape)
            t_truth_init_out = np.empty(truth_init_out.shape)

            dask_vars = [truth_1yr_out, truth_init_out]
            dask_outs = [t_truth_1yr_out, t_truth_init_out]

            if ST.is_dask_array(var_out):
                t_var_out = np.empty(var_out.shape)
                dask_vars.append(var_out)
                dask_outs.append(t_var_out)

            da.store(dask_vars, dask_outs)

            truth_1yr_out = t_truth_1yr_out
            truth_init_out = t_truth_init_out

            if ST.is_dask_array(var_out):
                var_out = t_var_out

        curr_var_output[out_type] = {'fcast': var_out,
                                     't0': truth_init_out,
                                     '1yr': truth_1yr_out}
    return curr_var_output
Ejemplo n.º 4
0
def conf_bound95(fcast, reference, metric='r', use_sample_size=None):

    n_samples = len(fcast)
    n_iters = 10000
    threshold = 1e-5

    if metric == 'r':
        mean_target = np.corrcoef(fcast, reference)[0, 1]
    elif metric == 'ce':
        mean_target = ST.calc_ce(fcast, reference)
    else:
        raise KeyError('Unknown metric specified: {}'.format(metric))

    args = [
        (fcast, reference, n_samples, metric),
    ]
    seeds = np.random.choice(n_iters * 10, size=n_iters, replace=False)
    iter_args = product(seeds, args)
    with Pool(processes=10) as calc_pool:
        result = calc_pool.map(pool_func, iter_args)

    result = np.array(result)
    diff = abs(result.mean() - mean_target)
    print('Mean Target {} diff: {:1.3e}'.format(metric, diff))

    # Create bounds for plt.errorbars
    lower_bnd = np.percentile(result, 2.5)
    upper_bnd = np.percentile(result, 97.5)
    res_mean = result.mean()
    conf_bounds = (upper_bnd, lower_bnd)

    return res_mean, conf_bounds
Ejemplo n.º 5
0
def _calc_limstate_eofs(data, num_eofs):
    var_stats = {}
    state_eofs, state_svals = plstat.calc_eofs(data,
                                               num_eofs,
                                               var_stats_dict=var_stats)

    return state_eofs, state_svals, var_stats
Ejemplo n.º 6
0
def fcast_corr_old(h5file):
    """
    Calculate the local anomaly correlation for a LIM forecast at every point.

    Parameters
    ----------
    h5file: tables.File
        PyTables HDF5 file containing LIM forecast data.  All necessary
        variables are loaded from this file.

    Returns
    -------
    ndarray
        Local anomaly correlation for each forecast lead time at all points.
        (compared against observations)
    """
    node_name = 'corr'
    parent = '/stats'

    assert(h5file is not None and type(h5file) == tb.File)

    # Load necessary data
    try:
        obs = h5file.root.data.anomaly_srs[:]
        test_start_idxs = h5file.root.data.test_start_idxs[:]
        fcast_times = h5file.root.data.fcast_times[:]
        fcasts = h5file.list_nodes(h5file.root.data.fcast_bin)
        eofs = h5file.root.data.eofs[:]
        yrsize = h5file.root.data._v_attrs.yrsize
        test_tdim = h5file.root.data._v_attrs.test_tdim
    except tb.NodeError as e:
        raise type(e)(e.message + ' Returning without finishing operation...')

    # Create output location in h5file
    atom = tb.Atom.from_dtype(obs.dtype)
    corr_shp = [len(fcast_times), obs.shape[1]]

    try:
        corr_out = Dt.empty_hdf5_carray(h5file, parent, node_name, atom,
                                        corr_shp,
                                        title="Spatial Correlation",
                                        createparents=True)
    except tb.FileModeError:
        corr_out = np.zeros(corr_shp)

    # Calculate LAC
    for i, lead in enumerate(fcast_times):
        print 'Calculating Correlation: %i yr fcast' % lead
        compiled_obs = build_trial_obs(obs, test_start_idxs, lead*yrsize, test_tdim)
        data = fcasts[i].read()
        phys_fcast = build_trial_fcast(data, eofs)

        # for j, trial in enumerate(data):
        #     phys_fcast = np.dot(trial.T, eofs[j].T)
        #     corr_out[i] += St.calc_ce(phys_fcast, compiled_obs[j], obs)

        corr_out[i] = St.calc_lac(phys_fcast, compiled_obs)

    return corr_out
Ejemplo n.º 7
0
 def calc_state_eofs(self, num_eofs):
     
     var_stats = {}
     state_eofs, state_svals = ST.calc_eofs(self.data, num_eofs, 
                                            var_stats_dict=var_stats)
     self.eofs = state_eofs
     self.svals = state_svals
     
     return var_stats
Ejemplo n.º 8
0
def get_pdo_factor(eofs, data, latgrid, longrid):
    npac_mask = ((latgrid >= 20) & (latgrid <= 70) & (longrid >= 110) &
                 (longrid <= 250))
    # Have to perform a read here because fancy indexing doesn't work for CArr?
    data = data[:][:, npac_mask]
    npac_eofs, npac_svals = ST.calc_eofs(data, 1)
    npac_eofs_full = np.zeros_like(latgrid)
    npac_eofs_full[npac_mask] = npac_eofs[:, 0]
    pdo_factor = eofs.T @ npac_eofs_full

    return pdo_factor, npac_eofs_full
Ejemplo n.º 9
0
def pool_func(args):
    # TODO: Samples are correlated in time, need to do block resample instead
    i, (fcast, ref, n_samples, metric) = args

    np.random.seed(i)
    sample_idx = np.random.choice(n_samples, size=n_samples, replace=True)
    test_fcast = fcast[sample_idx]
    test_ref = ref[sample_idx]
    if metric == 'r':
        out = np.corrcoef(test_fcast, test_ref)[0, 1]
    else:
        out = ST.calc_ce(test_fcast, test_ref)

    return out
Ejemplo n.º 10
0
def _gen_nao_index_factor(prior_cfg, varname):
    print('Generating NAO EOF for index calculation.')
    prior_var = PriorVariable.load(prior_cfg, varname, anomaly=True, nens=None)
    var_dobj, latgrid, longrid = _load_prior_var_dat(prior_cfg, varname)

    mask = _gen_latlon_grid_mask(latgrid, longrid, 20, 80, 270, 40)

    valid_data = var_dobj.valid_data
    var_dobj.detrend_data()
    var_dobj.area_weight_data(use_sqrt=True)
    data = var_dobj.data[:][:, mask]
    natl_eofs, natl_svals = ST.calc_eofs(data, 1)
    nao_eof = natl_eofs[:, 0]

    return nao_eof, valid_data, mask
Ejemplo n.º 11
0
def long_output_to_scalar(output, dobjs, output_map, state, verif_spec,
                          use_dask=False):
    output_orig = state.proj_data_into_orig_basis(output, unstandardize=True)

    var_scalar_out = {}
    for var_key, dobj in dobjs.items():
        verif_data_attr = verif_spec.get(var_key, 'detrended')
        data = state.get_var_from_state(var_key, data=output_orig)

        if use_dask:
            truth_data = dobj.reset_data(verif_data_attr)
        else:
            truth_data = getattr(dobj, verif_data_attr)
            truth_data = truth_data[:]

        curr_var_output = {}
        for out_type in output_map[var_key]:

            fcast_factor, verif_factor = get_scalar_factor(dobj,
                                                           out_type,
                                                           verif_data_attr)
            scalar_out = data @ fcast_factor
            compare_scalar_out = truth_data @ verif_factor

            if out_type == 'pdo':
                [compare_scalar_out,
                 std_dev] = _standardize_series(compare_scalar_out)
                scalar_out, _ = _standardize_series(scalar_out, std_dev=std_dev)

            if use_dask:
                tmp_compare = np.empty(compare_scalar_out.shape)
                da.store(compare_scalar_out, tmp_compare)
                compare_scalar_out = tmp_compare

                if ST.is_dask_array(scalar_out):
                    tmp_scalar = np.empty(scalar_out.shape)
                    da.store(scalar_out, tmp_scalar)
                    scalar_out = tmp_scalar

            curr_var_output[out_type] = {'fcast': scalar_out,
                                         'source': compare_scalar_out}

        var_scalar_out[var_key] = curr_var_output

    return var_scalar_out
Ejemplo n.º 12
0
def _gen_pdo_index_factor(prior_cfg, varname):
    print('Generating PDO EOF for index calculation.')
    var_dobj, latgrid, longrid = _load_prior_var_dat(prior_cfg, varname)

    # PDO region mask from the compressed grid
    mask = _gen_latlon_grid_mask(latgrid, longrid, 20, 70, 110, 250)

    # Valid mask from the full grid
    valid_data = var_dobj.valid_data

    # TODO: Change to removal of GMT regression signal
    var_dobj.detrend_data()
    var_dobj.area_weight_data(use_sqrt=True)
    data = var_dobj.data[:][:, mask]
    npac_eofs, npac_svals = ST.calc_eofs(data, 1)
    pdo_eof = npac_eofs[:, 0]
    # full_grid_pdo_eof = var_dobj.inflate_full_grid(data=compressed_pdo_eof)

    return pdo_eof, valid_data, mask
Ejemplo n.º 13
0
def fcast_corr(h5file, avg_trial=False):
    """
    Calculate the local anomaly correlation for a LIM forecast at every point.

    Parameters
    ----------
    h5file: tables.File
        PyTables HDF5 file containing LIM forecast data.  All necessary
        variables are loaded from this file.

    Returns
    -------
    ndarray
        Local anomaly correlation for each forecast lead time at all points.
        (compared against observations)
    """
    if avg_trial:
        corr_node_name = 'corr_trial_avg'
        signif_node_name = 'corr_tavg_signif'
    else:
        corr_node_name = 'corr'
        signif_node_name = 'corr_signif'
    parent = '/stats'

    assert (h5file is not None and type(h5file) == tb.File)

    # Load necessary data
    try:
        try:
            obs = h5file.root.data.anomaly[:]
        except tb.NoSuchNodeError:
            obs = h5file.root.data.detrended[:]
        test_start_idxs = h5file.root.data._v_attrs.test_start_idxs
        fcast_times = h5file.root.data._v_attrs.fcast_times
        fcasts = h5file.list_nodes(h5file.root.data.fcast_bin)
        eofs = h5file.root.data.eofs[:]
        yrsize = h5file.root.data._v_attrs.yrsize
        test_tdim = h5file.root.data._v_attrs.test_tdim
    except tb.NodeError as e:
        raise type(e)(e.message + ' Returning without finishing operation...')

    # Create output location in h5file
    atom = tb.Atom.from_dtype(obs.dtype)
    corr_shp = [len(fcast_times), obs.shape[1]]
    signif = np.ones(corr_shp, dtype=np.bool)

    try:
        corr_out = Dt.empty_hdf5_carray(h5file,
                                        parent,
                                        corr_node_name,
                                        atom,
                                        corr_shp,
                                        title="Spatial Correlation",
                                        createparents=True)
        signif_out = Dt.var_to_hdf5_carray(h5file, parent, signif_node_name,
                                           signif)
    except tb.FileModeError:
        corr_out = np.zeros(corr_shp)
        signif_out = signif

    # Calculate LAC
    for i, lead in enumerate(fcast_times):
        print('Calculating Correlation: %i yr fcast' % lead)
        if avg_trial:
            # TODO: Significance is currently ignored for avg_trial
            corr_trials = np.zeros((len(fcasts[i]), eofs.shape[1]))
            for j, trial in enumerate(fcasts[i]):
                phys_fcast = np.dot(trial.T, eofs[j].T)
                compiled_obs = build_trial_obs(obs, [test_start_idxs[j]],
                                               lead * yrsize, test_tdim)

                corr_trials[j] = St.calc_lac(phys_fcast, compiled_obs)

                # if j == 0:
                #     corr = St.calc_lac(phys_fcast, compiled_obs)
                # else:
                #     corr += St.calc_lac(phys_fcast, compiled_obs)

            corr = corr_trials.mean(axis=0)
            ttest, pval = ttest_1samp(corr_trials, 0, axis=0)
            sig = pval <= 0.05
            #raise AssertionError
        else:
            compiled_obs = build_trial_obs(obs, test_start_idxs, lead * yrsize,
                                           test_tdim)
            data = fcasts[i].read()
            phys_fcast = build_trial_fcast(data, eofs)
            corr = St.calc_lac(phys_fcast, compiled_obs)
            sig, _ = calc_corr_signif(phys_fcast, compiled_obs, corr=corr)

        corr_out[i] = corr
        # if not avg_trial:
        signif_out[i] = sig

    return corr_out, signif_out
Ejemplo n.º 14
0
def spatial_perf_fcast_verification(incl_keys, field_factors, times, fcast_1yr,
                                    state_obj, latgrid, longrid,
                                    valid_data_masks, var_std_factors,
                                    fcast_against_src, perf_figdir):
    """

    Parameters
    ----------
    incl_keys
        State keys for the basic output variables specifified in the
        configuration. Keys are of the form (var_name, avg_interval)
    field_factors
        dict of factors by (var, avg_interval) to
        matrix multiply the lim space output by to get the full field
    times
        array of years corresponding to 1-year forecast times
    fcast_1yr
        lim forecast in lim space
    state_obj
        state used as initial conditions for the forecast
    latgrid
        flattened grid of latitude coordinates
    longrid
        flattened grid of longitude coordinates
    valid_data_masks
        dict of masks by (var, avg_interval) to be applied to fields to omit
        NaN information
    var_std_factors
        dict of standardization factors for the fields by variable key (
        var_name, avg_interval)
    fcast_against_src
        Name of prior source used to forecast against
    perf_figdir
        figure output path

    Returns
    -------

    """
    perf_fcast_dfs = []
    perf_fcast_spatial = {}
    for var_key in incl_keys:
        var_name, avg_interval = var_key
        field_factor = field_factors[var_key]

        valid_data = valid_data_masks.get(var_key, None)
        var_std = var_std_factors.get(var_key, None)
        init_field = mutils.get_field_from_state(state_obj,
                                                 var_key,
                                                 valid_data=valid_data,
                                                 var_std_factor=var_std)

        ar1_field_fcast = mutils.red_noise_forecast_ar1(init_field)
        target_field = init_field[1:]

        fcast_1yr_field = fcast_1yr @ field_factor

        lac = ST.calc_lac(fcast_1yr_field, target_field)

        # check for invalid LAC as a check of valid inputs
        invalid_data = np.isnan(lac)
        nonzero_data = np.logical_not(invalid_data)

        ce = ST.calc_ce(fcast_1yr_field, target_field)
        perf_fcast_spatial[var_key] = {'lac': lac, 'ce': ce}
        if valid_data is not None:
            perf_fcast_spatial[var_key]['valid_data'] = valid_data

        anom_corr = ST.calc_lac(fcast_1yr_field.T, target_field.T)

        ar1_lac = ST.calc_lac(ar1_field_fcast, target_field)
        ar1_ce = ST.calc_ce(ar1_field_fcast, target_field)
        ar1_anom_corr = ST.calc_lac(ar1_field_fcast.T[nonzero_data],
                                    target_field.T[nonzero_data])

        if var_key in valid_data_masks:
            valid_data = valid_data_masks[var_key]
            lat = latgrid[valid_data]
        else:
            lat = latgrid

        if np.any(invalid_data):
            warnings.warn(
                'Grid data resulted in invalid skill metric for '
                'field: {}, removing for average...'.format(var_name))
            lat = lat[nonzero_data]

        # Get global average weights for field
        _, gm_weights = \
            LMR_outputs.get_area_avg_mask_and_weights(lat, None, None)

        lac_gm = lac[nonzero_data] @ gm_weights
        ce_gm = ce[nonzero_data] @ gm_weights
        avg_anom_corr = anom_corr.mean()

        ar1_lac_gm = ar1_lac[nonzero_data] @ gm_weights
        ar1_ce_gm = ar1_ce[nonzero_data] @ gm_weights
        ar1_avg_anom_corr = ar1_anom_corr.mean()

        spatial_gm_df = \
            mutils.ce_r_results_to_dataframe(var_name, avg_interval,
                                             'spatial_verif_gm', lac_gm, ce_gm,
                                             ar1_lac_gm, ar1_ce_gm,
                                             anom_corr=avg_anom_corr,
                                             auto1_anom_corr=ar1_avg_anom_corr)

        perf_fcast_dfs.append(spatial_gm_df)

        if plot_spatial_verif:

            plot_maps = [lac, ce, ar1_lac, ar1_ce]
            plot_metrs = ['LIM LAC', 'LIM CE', 'AR(1) LAC', 'AR(1) CE']

            for field, metric in zip(plot_maps, plot_metrs):
                valid_mask = valid_data_masks.get(var_key, None)
                sptl_shp = state_obj.var_space_shp[var_name]
                vutils.plot_spatial_verif(field,
                                          valid_mask,
                                          sptl_shp,
                                          latgrid,
                                          longrid,
                                          metric,
                                          fcast_against_src,
                                          avg_interval,
                                          var_name,
                                          fig_dir=perf_figdir)

            acorr_file = 'spatial_anomoly_corr_{}_{}.png'.format(
                var_name, var_key)
            acorr_path = os.path.join(perf_figdir, acorr_file)

            ptools.plot_anomaly_correlation(times,
                                            anom_corr,
                                            ar1_anom_corr,
                                            var_name,
                                            avg_interval,
                                            savefile=acorr_path)

    return perf_fcast_dfs, perf_fcast_spatial
Ejemplo n.º 15
0
def fcast_corr(h5file, avg_trial=False):
    """
    Calculate the local anomaly correlation for a LIM forecast at every point.

    Parameters
    ----------
    h5file: tables.File
        PyTables HDF5 file containing LIM forecast data.  All necessary
        variables are loaded from this file.

    Returns
    -------
    ndarray
        Local anomaly correlation for each forecast lead time at all points.
        (compared against observations)
    """
    if avg_trial:
        corr_node_name = 'corr_trial_avg'
        signif_node_name = 'corr_tavg_signif'
    else:
        corr_node_name = 'corr'
        signif_node_name = 'corr_signif'
    parent = '/stats'

    assert(h5file is not None and type(h5file) == tb.File)

    # Load necessary data
    try:
        try:
            obs = h5file.root.data.anomaly[:]
        except tb.NoSuchNodeError:
            obs = h5file.root.data.detrended[:]
        test_start_idxs = h5file.root.data._v_attrs.test_start_idxs
        fcast_times = h5file.root.data._v_attrs.fcast_times
        fcasts = h5file.list_nodes(h5file.root.data.fcast_bin)
        eofs = h5file.root.data.eofs[:]
        yrsize = h5file.root.data._v_attrs.yrsize
        test_tdim = h5file.root.data._v_attrs.test_tdim
    except tb.NodeError as e:
        raise type(e)(e.message + ' Returning without finishing operation...')

    # Create output location in h5file
    atom = tb.Atom.from_dtype(obs.dtype)
    corr_shp = [len(fcast_times), obs.shape[1]]
    signif = np.ones(corr_shp, dtype=np.bool)

    try:
        corr_out = Dt.empty_hdf5_carray(h5file, parent, corr_node_name, atom,
                                        corr_shp,
                                        title="Spatial Correlation",
                                        createparents=True)
        signif_out = Dt.var_to_hdf5_carray(h5file, parent, signif_node_name,
                                           signif)
    except tb.FileModeError:
        corr_out = np.zeros(corr_shp)
        signif_out = signif

    # Calculate LAC
    for i, lead in enumerate(fcast_times):
        print 'Calculating Correlation: %i yr fcast' % lead
        if avg_trial:
            # TODO: Significance is currently ignored for avg_trial
            corr_trials = np.zeros((len(fcasts[i]), eofs.shape[1]))
            for j, trial in enumerate(fcasts[i]):
                phys_fcast = np.dot(trial.T, eofs[j].T)
                compiled_obs = build_trial_obs(obs, [test_start_idxs[j]],
                                               lead*yrsize, test_tdim)

                corr_trials[j] = St.calc_lac(phys_fcast, compiled_obs)
                
                # if j == 0:
                #     corr = St.calc_lac(phys_fcast, compiled_obs)
                # else:
                #     corr += St.calc_lac(phys_fcast, compiled_obs)

            corr = corr_trials.mean(axis=0)
            ttest, pval = ttest_1samp(corr_trials, 0, axis=0)
            sig = pval <= 0.05
            #raise AssertionError
        else:
            compiled_obs = build_trial_obs(obs, test_start_idxs, lead*yrsize, test_tdim)
            data = fcasts[i].read()
            phys_fcast = build_trial_fcast(data, eofs)
            corr = St.calc_lac(phys_fcast, compiled_obs)
            sig, _ = calc_corr_signif(phys_fcast, compiled_obs, corr=corr)

        corr_out[i] = corr
        # if not avg_trial:
        signif_out[i] = sig

    return corr_out, signif_out
Ejemplo n.º 16
0
def perfect_fcast_verification(fcast_1yr, fcast_outputs, dobjs, state,
                               verif_spec, nelem_in_yr, experiment_name,
                               avg_key,
                               var_name_map, out_name_map, units_map,
                               fig_dir, do_scalar_plot=True,
                               do_spatial_plot=True):

    output = {}
    scalar_verif = []

    for var_key, out_types in fcast_outputs.items():
        dobj = dobjs[var_key]
        var_fcast = state.get_var_from_state(var_key, data=fcast_1yr)
        verif_data_attr = verif_spec.get(var_key, 'detrended')

        curr_var_output = get_scalar_outputs(dobj, nelem_in_yr, var_fcast,
                                             verif_data_attr, out_types,
                                             use_dask=True)

        # Run scalar verification
        for out_type, scalar_output in curr_var_output.items():
            fcast = scalar_output['fcast']
            ref = scalar_output['1yr']
            init_t0 = scalar_output['t0']

            r_ce_results = calc_scalar_ce_r(fcast, ref, init_t0)
            verif_df = mutils.ce_r_results_to_dataframe(var_key, avg_key,
                                                        out_type,
                                                        *r_ce_results)
            scalar_verif.append(verif_df)

            title = '{}, {}'.format(var_name_map[var_key],
                                    out_name_map[out_type])
            label = experiment_name + ' ' + avg_key
            yrs = get_yrs_from_dobj(dobj, nelem_in_yr)
            filename = 'scalar_plot_{}_{}_{}_{}.png'.format(experiment_name,
                                                            avg_key,
                                                            var_key,
                                                            out_type)
            filepath = os.path.join(fig_dir, filename)

            if out_type == 'enso':
                ylabel = 'ENSO 3.4 Index'
            elif out_type == 'pdo':
                ylabel = 'PDO Index'
            else:
                ylabel = 'Anomaly ({})'.format([units_map[var_key]])

            if do_scalar_plot:
                ptools.plot_scalar_verification(yrs, fcast, ref, *r_ce_results,
                                                title,
                                                label,
                                                ylabel,
                                                savefile=filepath)

        # Run Field verification

        if do_spatial_plot:
            fcast, ref_data, wgts = _get_spatial_field_and_wgts(dobj,
                                                                var_fcast,
                                                                var_key,
                                                                verif_spec,
                                                                get_dask=True)

            ref = ref_data[nelem_in_yr:]
            ref_init = ref_data[:-nelem_in_yr]

            lac = ST.calc_lac(fcast, ref)
            ce = ST.calc_ce(fcast, ref)

            # Persistence fcast metrics
            auto1_lac = ST.calc_lac(ref_init, ref)
            auto1_ce = ST.calc_ce(ref_init, ref)

            lac_out = np.empty(lac.shape)
            ce_out = np.empty(ce.shape)
            auto1_lac_out = np.empty(auto1_lac.shape)
            auto1_ce_out = np.empty(auto1_ce.shape)

            da.store([lac, ce, auto1_lac, auto1_ce],
                     [lac_out, ce_out, auto1_lac_out, auto1_ce_out])

            # spatial averages
            lac_gm = lac_out @ wgts
            ce_gm = ce_out @ wgts
            auto1_lac_gm = auto1_lac_out @ wgts
            auto1_ce_gm = auto1_ce_out @ wgts

            spatial_gm_df = mutils.ce_r_results_to_dataframe(var_key, avg_key,
                                                             'spatial_gm',
                                                             lac_gm,
                                                             None,
                                                             auto1_lac_gm,
                                                             None,
                                                             ce_gm,
                                                             None,
                                                             auto1_ce_gm,
                                                             None)

            scalar_verif.append(spatial_gm_df)

            curr_var_output['spatial_metr'] = {'lac': lac_out,
                                               'ce': ce_out,
                                               'auto1_lac': auto1_lac_out,
                                               'auto1_ce': auto1_ce_out}
            output[var_key] = curr_var_output

            _plot_spatial(lac_out, 'LAC', experiment_name, avg_key, var_key,
                          dobj, fig_dir)
            _plot_spatial(auto1_lac_out, 'Auto1_LAC', experiment_name, avg_key,
                          var_key, dobj, fig_dir)
            _plot_spatial(ce_out, 'CE', experiment_name, avg_key, var_key, dobj,
                          fig_dir)
            _plot_spatial(auto1_ce_out, 'Auto1_CE', experiment_name, avg_key,
                          var_key, dobj, fig_dir)

    scalar_verif = pd.concat(scalar_verif)

    return output, scalar_verif
Ejemplo n.º 17
0
def fcast_corr_old(h5file):
    """
    Calculate the local anomaly correlation for a LIM forecast at every point.

    Parameters
    ----------
    h5file: tables.File
        PyTables HDF5 file containing LIM forecast data.  All necessary
        variables are loaded from this file.

    Returns
    -------
    ndarray
        Local anomaly correlation for each forecast lead time at all points.
        (compared against observations)
    """
    node_name = 'corr'
    parent = '/stats'

    assert (h5file is not None and type(h5file) == tb.File)

    # Load necessary data
    try:
        obs = h5file.root.data.anomaly_srs[:]
        test_start_idxs = h5file.root.data.test_start_idxs[:]
        fcast_times = h5file.root.data.fcast_times[:]
        fcasts = h5file.list_nodes(h5file.root.data.fcast_bin)
        eofs = h5file.root.data.eofs[:]
        yrsize = h5file.root.data._v_attrs.yrsize
        test_tdim = h5file.root.data._v_attrs.test_tdim
    except tb.NodeError as e:
        raise type(e)(e.message + ' Returning without finishing operation...')

    # Create output location in h5file
    atom = tb.Atom.from_dtype(obs.dtype)
    corr_shp = [len(fcast_times), obs.shape[1]]

    try:
        corr_out = Dt.empty_hdf5_carray(h5file,
                                        parent,
                                        node_name,
                                        atom,
                                        corr_shp,
                                        title="Spatial Correlation",
                                        createparents=True)
    except tb.FileModeError:
        corr_out = np.zeros(corr_shp)

    # Calculate LAC
    for i, lead in enumerate(fcast_times):
        print('Calculating Correlation: %i yr fcast' % lead)
        compiled_obs = build_trial_obs(obs, test_start_idxs, lead * yrsize,
                                       test_tdim)
        data = fcasts[i].read()
        phys_fcast = build_trial_fcast(data, eofs)

        # for j, trial in enumerate(data):
        #     phys_fcast = np.dot(trial.T, eofs[j].T)
        #     corr_out[i] += St.calc_ce(phys_fcast, compiled_obs[j], obs)

        corr_out[i] = St.calc_lac(phys_fcast, compiled_obs)

    return corr_out
Ejemplo n.º 18
0
def red_noise_fit_ar1(data, lead=1):
    lag1_autocorr = ST.calc_lac(data[:-lead], data[lead:])

    white_noise_var = (1 - lag1_autocorr**2) * data.var(ddof=1, axis=0)

    return lag1_autocorr, white_noise_var