Esempio n. 1
0
def test_averaging_period_negative_indices():
    res = Utils.get_averaging_period((-9, -11, 0, 2, 4), nelem_in_yr=12,
                                     is_zero_based=True)
    assert res == (9, 11, 12, 14, 16)

    res = Utils.get_averaging_period((-10, -11, -12, 1, 2, 3), nelem_in_yr=12)
    assert res == (9, 10, 11, 12, 13, 14)
Esempio n. 2
0
def test_generate_latlon_include_lat_endpts():
    lats, lons, clats, clons = Utils.generate_latlon(3, 5, include_endpts=True)
    np.testing.assert_equal(lats[:, 0], [-90, 0, 90])
    assert clats[0] == -90
    assert clats[-1] == 90

    lats, lons, clats, clons = Utils.generate_latlon(4, 5, include_endpts=True)
    np.testing.assert_equal(lats[:, 0], [-90, -30, 30, 90])
Esempio n. 3
0
def test_averaging_period_zero_vs_nonzero_indexed():

    res = Utils.get_averaging_period([0, 1, 2, 3], nelem_in_yr=12,
                                     is_zero_based=True)
    assert res == (0, 1, 2, 3)

    res = Utils.get_averaging_period([1, 2, 3, 4], nelem_in_yr=12)
    assert res == (0, 1, 2, 3)

    with pytest.raises(ValueError):
        Utils.get_averaging_period([0, 1, 2, 3], 12, is_zero_based=False)
Esempio n. 4
0
def test_calc_latlon_bnd_regular_grid():
    irregular_data = np.array([1, 2, 3, 5, 8, 13, 21], dtype=np.float32)
    regular_data = np.arange(10)
    irreg_bnds = [0.5, 1.5, 2.5, 4, 6.5, 10.5, 17, 25]
    reg_bnds = np.arange(11) - 0.5

    lat_bnds, lon_bnds = Utils.calculate_latlon_bnds(regular_data, irregular_data)
    np.testing.assert_equal(lat_bnds, reg_bnds)
    np.testing.assert_equal(lon_bnds, irreg_bnds)

    lat_bnds, lon_bnds = Utils.calculate_latlon_bnds(irregular_data, regular_data)
    np.testing.assert_equal(lat_bnds, irreg_bnds)
    np.testing.assert_equal(lon_bnds, reg_bnds)
Esempio n. 5
0
def ce_r_ens_avg(gmt_ens,
                 times,
                 analysis_gmt,
                 analysis_times,
                 trange=(1880, 2000),
                 center_trange=(1900, 2000)):

    start, end = trange
    # Get the global mean for the analysis dataset

    gmt_ens = center_to_time_range(times,
                                   gmt_ens,
                                   time_axis=-1,
                                   trange=center_trange)

    # Get a mask for overlapping times
    analysis_tidx = (analysis_times >= start) & (analysis_times <= end)
    lmr_tidx = (times >= start) & (times <= end)

    ens_ce = \
        np.array([utils2.coefficient_efficiency(analysis_gmt[analysis_tidx],
                                                a_ens_gmt[lmr_tidx])
                  for a_ens_gmt in gmt_ens])
    ens_ce[ens_ce == 1] = np.nan

    ens_r = np.array([
        np.corrcoef(analysis_gmt[analysis_tidx], a_ens_gmt[lmr_tidx])[0, 1]
        for a_ens_gmt in gmt_ens
    ])

    return ens_ce, ens_r
Esempio n. 6
0
def test_calc_latlon_bnd_bounds():
    lat_data = np.array([-33.75, -11.25, 11.25, 33.75])
    lon_data = np.array([18, 54, 90, 126, 162])

    lat_bnds, lon_bnds = Utils.calculate_latlon_bnds(lat_data, lon_data)

    np.testing.assert_equal(lat_bnds, [-45, -22.5, 0, 22.5, 45])
    np.testing.assert_equal(lon_bnds, [0, 36, 72, 108, 144, 180])
Esempio n. 7
0
def test_calc_latlon_bnd_bounds_half_shift():
    lat_data = np.array([-90, -60, -30, 0, 30, 60, 90])
    lon_data = np.array([0, 90, 180, 270])

    lat_bnds, lon_bnds = Utils.calculate_latlon_bnds(lat_data, lon_data)

    np.testing.assert_equal(lat_bnds, [-90, -75, -45, -15, 15, 45, 75, 90])
    np.testing.assert_equal(lon_bnds, [-45, 45, 135, 225, 315])
Esempio n. 8
0
def test_generate_latlon_output_shp():
    nlats = 4
    nlons = 5

    lats, lons, clats, clons = Utils.generate_latlon(nlats, nlons)
    assert lats.shape == (4, 5)
    assert lons.shape == (4, 5)
    assert clats.shape == (5,)
    assert clons.shape == (6,)
Esempio n. 9
0
def test_generate_latlon_center_corner():
    lats, lons, clats, clons = Utils.generate_latlon(4,5,
                                                     lat_bnd=(-45, 45),
                                                     lon_bnd=(0, 180))

    np.testing.assert_equal(lats[:, 0], [-33.75, -11.25, 11.25, 33.75])
    np.testing.assert_equal(lons[0], [0, 36, 72, 108, 144])
    np.testing.assert_equal(clats, [-45, -22.5, 0, 22.5, 45])
    np.testing.assert_equal(clons, [-18, 18, 54, 90, 126, 162])
Esempio n. 10
0
def test_global_mean2(ncf_data):

    dat = ncf_data['air'][0:4]
    lat = ncf_data['lat'][:]
    lon = ncf_data['lon'][:]

    longrid, latgrid = np.meshgrid(lon, lat)

    gm_time, _, _ = Utils.global_hemispheric_means(dat, lat)
    gm0, _, _ = Utils.global_hemispheric_means(dat[0], lat)

    # with time
    gm_test = Utils.global_mean2(dat, lat)
    np.testing.assert_allclose(gm_test, gm_time)

    # flattened lat w/ time
    flat_dat = dat.reshape(4, 94*192)
    gm_test = Utils.global_mean2(flat_dat, latgrid.flatten())
    np.testing.assert_allclose(gm_test, gm_time)

    # no time
    gm_test = Utils.global_mean2(dat[0], lat)
    np.testing.assert_allclose(gm_test, gm0)

    # no time flattened spatial
    gm_test = Utils.global_mean2(dat[0].flatten(), latgrid.flatten())
    np.testing.assert_allclose(gm_test, gm0)

    # NaN values
    dat[:, 0, :] = np.nan
    gm_nan_time, _, _ = Utils.global_hemispheric_means(dat, lat)
    gm_nan_test = Utils.global_mean2(dat, lat)
    np.testing.assert_allclose(gm_nan_test, gm_nan_time)

    # Test hemispheric
    gm_time, nhm_time, shm_time = Utils.global_hemispheric_means(dat, lat)
    gm_test, nhm_test, shm_test = Utils.global_mean2(dat, lat,
                                                     output_hemispheric=True)
    np.testing.assert_allclose(gm_test, gm_time)
    np.testing.assert_allclose(nhm_test, nhm_time)
    np.testing.assert_allclose(shm_test, shm_time)
Esempio n. 11
0
def calc_extent(SIC, GRID, CUTOFF):
    #    sie_nhtot = {}

    #for ref_dset in dset_chosen:
    sie_lalo = SIC

    sie_lalo[sie_lalo <= CUTOFF] = 0.0
    sie_lalo[sie_lalo > CUTOFF] = 100.0

    _, sie_nhtot, _ = lmr.global_hemispheric_means(sie_lalo, GRID.lat[:, 0])

    return sie_nhtot, sie_lalo
Esempio n. 12
0
def test_averaging_period_span_greater_than_nelem_in_yr():

    with pytest.raises(ValueError):
        Utils.get_averaging_period([-1, 0, 11], nelem_in_yr=12,
                                   is_zero_based=True)

    with pytest.raises(ValueError):
        Utils.get_averaging_period([0, 12], nelem_in_yr=12, is_zero_based=True)

    with pytest.raises(ValueError):
        Utils.get_averaging_period([-6, 7], 12)

    with pytest.raises(ValueError):
        Utils.get_averaging_period([4, 16], 12)
Esempio n. 13
0
def calc_analysis_gmt(analysis_var_obj,
                      trange=(1880, 2000),
                      center_trange=(1900, 2000),
                      detrend=False):
    gm_analysis = utils2.global_mean2(analysis_var_obj.data,
                                      analysis_var_obj.lat)
    gm_analysis = center_to_time_range(analysis_var_obj.time,
                                       gm_analysis,
                                       0,
                                       trange=center_trange)
    analysis_tidx = ((analysis_var_obj.time >= trange[0]) &
                     (analysis_var_obj.time <= trange[1]))
    gm_analysis = gm_analysis[analysis_tidx]

    if detrend:
        linfit_line, coef = detrend_data(
            analysis_var_obj.time[analysis_tidx][:, None],
            gm_analysis[:, None],
            ret_coef=True)
        gm_analysis -= linfit_line.squeeze()

    return gm_analysis
Esempio n. 14
0
def make_obs(ob_lat, ob_lon, dat_lat, dat_lon, dat, verbose=False):
    """
    make observations from a gridded dataset given lat and lon locations
    
    Inputs:
    ob_lat, ob_lon: vector lat,lon coordinates of observations. 
    dat_lat,dat_lon: vector lat,lon coordinates of input data
    dat: array of input data from which observations are drawn. (ntimes,nlat,nlon)

    Output:
    obs: the observations [nobs,nyears]
    """

    nyears = dat.shape[0]
    if verbose: print('nyears: ' + str(nyears))

    nobs = len(ob_lat) * len(ob_lon)
    if verbose: print('nobs: ' + str(nobs))

    # initialize
    obs = np.zeros([nobs, nyears])
    obs_ind_lat = np.zeros(nobs)
    obs_ind_lon = np.zeros(nobs)

    k = -1
    # make the obs
    for lon in ob_lon:
        for lat in ob_lat:
            k = k + 1
            dist = LMR_utils.get_distance(lon, lat, dat_lon, dat_lat)
            jind, kind = np.unravel_index(dist.argmin(), dist.shape)
            obs[k, :] = dat[:, jind, kind]
            obs_ind_lat[k] = jind
            obs_ind_lon[k] = kind

            #print(lat,jind,kind,ob[100,k])

    return obs, obs_ind_lat, obs_ind_lon
Esempio n. 15
0
def find_ce_corr(VAR,
                 REF,
                 REF_TIME,
                 VAR_TIME,
                 START_TIME,
                 END_TIME,
                 detrend=False):
    """Finds the correlation coefficient and coefficient of efficiency between 
       REF and VAR between START_TIME and END_TIME.
       inputs: 
           VAR = test data (1D in time)
           REF = reference data (1D in time) 
           REF_time = reference data time (1D time)
           VAR_TIME = test data time (1D years)
           START_TIME = comparison start year to be included (float)
           END_TIME = last year included in comparison (float)
       
    """
    yr_range_var = np.where((VAR_TIME >= START_TIME)
                            & (VAR_TIME < END_TIME + 1))
    yr_range_ref = np.where((REF_TIME >= START_TIME)
                            & (REF_TIME < END_TIME + 1))

    if detrend is False:
        ref = REF[yr_range_ref[0]]
        var = VAR[yr_range_var[0]]
    else:
        ref = spy.detrend(REF[yr_range_ref])
        var = spy.detrend(VAR[yr_range_var])

    ce = lmr.coefficient_efficiency(ref, var)
    corr = np.corrcoef(ref, var)[0, 1]
    var_ref = np.var(ref)
    var_var = np.var(var)

    return ce, corr, var_ref, var_var
Esempio n. 16
0
def test_averaging_period_nelem_greater_than_nelem_in_yr():

    with pytest.raises(ValueError):
        Utils.get_averaging_period((1, 2, 3, 4, 5), nelem_in_yr=3)
Esempio n. 17
0
def load_config(yaml_file, verbose=False):
    begin_time = time()

    if not LMR_config.LEGACY_CONFIG:

        try:
            if verbose: print('Loading configuration: {}'.format(yaml_file))
            f = open(yaml_file, 'r')
            yml_dict = yaml.load(f)
            update_result = LMR_config.update_config_class_yaml(
                yml_dict, LMR_config)

            # Check that all yml params match value in LMR_config
            if update_result:
                raise SystemExit(
                    'Extra or mismatching values found in the configuration yaml'
                    ' file.  Please fix or remove them.\n  Residual parameters:\n '
                    '{}'.format(update_result))

        except IOError as e:
            raise SystemExit(
                ('Could not locate {}.  If use of legacy LMR_config usage is '
                 'desired then please change LEGACY_CONFIG to True'
                 'in LMR_wrapper.py.').format(yaml_file))

    # Define main experiment output directory
    iter_range = LMR_config.wrapper.iter_range
    expdir = os.path.join(LMR_config.core.datadir_output, LMR_config.core.nexp)
    arc_dir = os.path.join(LMR_config.core.archive_dir, LMR_config.core.nexp)

    # Check if it exists, if not, create it
    if not os.path.isdir(expdir):
        os.system('mkdir {}'.format(expdir))

    # Monte-Carlo approach: loop over iterations (range of iterations defined in
    # namelist)
    MCiters = range(iter_range[0], iter_range[1] + 1)
    param_iterables = [MCiters]

    # get other parameters to sweep over in the reconstruction
    param_search = LMR_config.wrapper.param_search
    if param_search is not None:
        # sort them by parameter name and combine into a list of iterables
        sort_params = list(param_search.keys())
        sort_params.sort(key=lambda x: x.split('.')[-1])
        param_values = [param_search[key] for key in sort_params]
        param_iterables = param_values + [MCiters]

    for iter_and_params in itertools.product(*param_iterables):

        iter_num = iter_and_params[-1]
        cfg_dict = Utils.param_cfg_update('core.curr_iter', iter_num)

        if LMR_config.wrapper.multi_seed is not None:
            curr_seed = LMR_config.wrapper.multi_seed[iter_num]
            cfg_dict = Utils.param_cfg_update('core.seed',
                                              curr_seed,
                                              cfg_dict=cfg_dict)
            #print ('Setting current iteration seed: {}'.format(curr_seed))

        itr_str = 'r{:d}'.format(iter_num)
        # If parameter space search is being performed then set the current
        # search space values and create a special sub-directory
        if param_search is not None:
            curr_param_values = iter_and_params[:-1]
            cfg_dict, psearch_dir = Utils.psearch_list_cfg_update(
                sort_params, curr_param_values, cfg_dict=cfg_dict)

            working_dir = os.path.join(expdir, psearch_dir, itr_str)
            mc_arc_dir = os.path.join(arc_dir, psearch_dir, itr_str)
        else:
            working_dir = os.path.join(expdir, itr_str)
            mc_arc_dir = os.path.join(arc_dir, itr_str)

        cfg_params = Utils.param_cfg_update('core.datadir_output',
                                            working_dir,
                                            cfg_dict=cfg_dict)

        cfg = LMR_config.Config(**cfg_params)

        proceed = validate_config(cfg)
        if not proceed:
            raise SystemExit()
        else:
            print('OK!')
            pass

    if verbose:
        elapsed_time = time() - begin_time
        print('-----------------------------------------------------')
        print('completed in ' + str(elapsed_time) + ' seconds')
        print('-----------------------------------------------------')

    return cfg
Esempio n. 18
0
def test_generate_latlon_bnd_limits():
    # TODO: could be parametrized input
    # Defaults
    Utils.generate_latlon(5, 5)

    # Bad lat bounds
    with pytest.raises(ValueError):
        Utils.generate_latlon(5, 5, lat_bnd=(-100, 45))
    with pytest.raises(ValueError):
        Utils.generate_latlon(5, 5, lat_bnd=(-45, 91))

    # Bad lon bounds
    Utils.generate_latlon(5, 5, lon_bnd=(-90, 270))

    with pytest.raises(ValueError):
        Utils.generate_latlon(5, 5, lon_bnd=(-180, 181))
    with pytest.raises(ValueError):
        Utils.generate_latlon(5, 5, lon_bnd=(-181, 40))
    with pytest.raises(ValueError):
        Utils.generate_latlon(5, 5, lon_bnd=(14, 361))
Esempio n. 19
0
def test_calc_latlon_bnd_1d_input():
    test_data = np.linspace(10, 50, 5)
    with pytest.raises(ValueError):
        _ = Utils.calculate_latlon_bnds(test_data[:, None], test_data)
    with pytest.raises(ValueError):
        _ = Utils.calculate_latlon_bnds(test_data, test_data[:, None])
Esempio n. 20
0
cfg = LMRlite.load_config(yaml_file)

print('loading proxies...')
prox_manager = LMRlite.load_proxies(cfg)
print('loading prior...')
X, Xb_one = LMRlite.load_prior(cfg)
# check if config was set to regrid the prior
if cfg.prior.regrid_method:
    print('regridding prior...')
    # this function over-writes X, even if return is given a different name
    [X, Xb_one] = LMRlite.prior_regrid(cfg, X, Xb_one, verbose=True)
else:
    X.trunc_state_info = X.full_state_info

print('loading Ye...')
Ye_assim, Ye_assim_coords = LMR_utils.load_precalculated_ye_vals_psm_per_proxy(
    cfg, prox_manager, 'assim', X.prior_sample_indices)

#-----------------------------------------------------------------
# example reconstruction for one year
#-----------------------------------------------------------------
target_year = cfg.core.recon_period[0]
print('performing a test reconstruction for year:' + str(target_year))
vY, vR, vP, vYe, vT, vYe_coords = LMRlite.get_valid_proxies(
    cfg, prox_manager, target_year, Ye_assim, Ye_assim_coords)
xam, Xap, _ = LMRlite.Kalman_optimal(vY, vR, vYe, Xb_one, verbose=False)
xam2, Xap2 = LMRlite.Kalman_ESRF(cfg, vY, vR, vYe, Xb_one, verbose=False)
print('ens mean max difference from different solvers...',
      str(np.max(np.abs((xam2 - xam) / xam))))

#-----------------------------------------------------------------
# reconstruction over recon_period, computing GMT on the way
Esempio n. 21
0
def cov_localization(locRad, Y, X, X_coords):
    """

    Originator: R. Tardif, 
                Dept. Atmos. Sciences, Univ. of Washington
    -----------------------------------------------------------------
     Inputs:
        locRad : Localization radius (distance in km beyond which cov are forced to zero)
             Y : Proxy object, needed to get ob site lat/lon (to calculate distances w.r.t. grid pts
             X : Prior object, needed to get state vector info. 
      X_coords : Array containing geographic location information of state vector elements

     Output:
        covLoc : Localization vector (weights) applied to ensemble covariance estimates.
                 Dims = (Nx x 1), with Nx the dimension of the state vector.

     Note: Uses the Gaspari-Cohn localization function.

    """

    # declare the localization array, filled with ones to start with (as in no localization)
    stateVectDim, nbdimcoord = X_coords.shape
    covLoc = np.ones(shape=[stateVectDim], dtype=np.float64)

    # Mask to identify elements of state vector that are "localizeable"
    # i.e. fields with (lat,lon)
    localizeable = covLoc == 1.  # Initialize as True

    for var in X.trunc_state_info.keys():
        [var_state_pos_begin,
         var_state_pos_end] = X.trunc_state_info[var]['pos']
        # if variable is not a field with lats & lons, tag localizeable as False
        if X.trunc_state_info[var]['spacecoords'] != ('lat', 'lon'):
            localizeable[var_state_pos_begin:var_state_pos_end + 1] = False

    # array of distances between state vector elements & proxy site
    # initialized as zeros: this is important!
    dists = np.zeros(shape=[stateVectDim])

    # geographic location of proxy site
    site_lat = Y.lat
    site_lon = Y.lon
    # geographic locations of elements of state vector
    X_lon = X_coords[:, 1]
    X_lat = X_coords[:, 0]

    # calculate distances for elements tagged as "localizeable".
    dists[localizeable] = np.array(LMR_utils.haversine(site_lon, site_lat,
                                                       X_lon[localizeable],
                                                       X_lat[localizeable]),
                                   dtype=np.float64)

    # those not "localizeable" are assigned with a disdtance of "nan"
    # so these elements will not be included in the indexing
    # according to distances (see below)
    dists[~localizeable] = np.nan

    # Some transformation to variables used in calculating localization weights
    hlr = 0.5 * locRad
    # work with half the localization radius
    r = dists / hlr

    # indexing w.r.t. distances
    ind_inner = np.where(dists <= hlr)  # closest
    ind_outer = np.where(dists > hlr)  # close
    ind_out = np.where(dists > 2. * hlr)  # out

    # Gaspari-Cohn function
    # for pts within 1/2 of localization radius
    covLoc[ind_inner] = (((-0.25*r[ind_inner]+0.5)*r[ind_inner]+0.625)* \
                         r[ind_inner]-(5.0/3.0))*(r[ind_inner]**2)+1.0
    # for pts between 1/2 and one localization radius
    covLoc[ind_outer] = ((((r[ind_outer]/12. - 0.5) * r[ind_outer] + 0.625) * \
                          r[ind_outer] + 5.0/3.0) * r[ind_outer] - 5.0) * \
                          r[ind_outer] + 4.0 - 2.0/(3.0*r[ind_outer])
    # Impose zero for pts outside of localization radius
    covLoc[ind_out] = 0.0

    # prevent negative values: calc. above may produce tiny negative
    # values for distances very near the localization radius
    # TODO: revisit calculations to minimize round-off errors
    covLoc[covLoc < 0.0] = 0.0

    return covLoc
Esempio n. 22
0
def LMR_driver_callable(cfg=None):

    if cfg is None:
        cfg = BaseCfg.Config()  # Use base configuration from LMR_config

    # Temporary fix for old 'state usage'
    core = cfg.core
    prior = cfg.prior

    # verbose controls print comments (0 = none; 1 = most important;
    #  2 = many; 3 = a lot; >=4 = all)
    verbose = cfg.LOG_LEVEL

    nexp = core.nexp
    workdir = core.datadir_output
    recon_period = core.recon_period
    recon_timescale = core.recon_timescale
    online = core.online_reconstruction
    nens = core.nens
    loc_rad = core.loc_rad
    inflation_fact = core.inflation_fact
    prior_source = prior.prior_source
    datadir_prior = prior.datadir_prior
    datafile_prior = prior.datafile_prior
    state_variables = prior.state_variables
    state_variables_info = prior.state_variables_info
    regrid_method = prior.regrid_method
    regrid_resolution = prior.regrid_resolution

    # ==========================================================================
    # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< MAIN CODE >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
    # ==========================================================================
    # TODO: AP Logging instead of print statements
    if verbose > 0:
        print('')
        print('=====================================================')
        print('Running LMR reconstruction...')
        print('=====================================================')
        print('Name of experiment: ', nexp)
        print(' Monte Carlo iter : ', core.curr_iter)
        print('')

    begin_time = time()

    # Define the number of years of the reconstruction
    # (nb of assimilation times)
    recon_times = np.arange(recon_period[0], recon_period[1] + 1,
                            recon_timescale)
    ntimes, = recon_times.shape

    # ==========================================================================
    # Load prior data ----------------------------------------------------------
    # ==========================================================================
    if verbose > 0:
        print('-------------------------------------------')
        print('Uploading gridded (model) data as prior ...')
        print('-------------------------------------------')
        print('Source for prior: ', prior_source)

    # Assign prior object according to "prior_source" (from namelist)
    X = LMR_prior.prior_assignment(prior_source)

    # TODO: AP explicit requirements
    # add namelist attributes to the prior object
    X.prior_datadir = datadir_prior
    X.prior_datafile = datafile_prior
    X.statevars = state_variables
    X.statevars_info = state_variables_info
    X.Nens = nens
    # Use a specified reference period for state variable anomalies
    X.anom_reference = prior.anom_reference
    # new option: detrending the prior
    X.detrend = prior.detrend
    print('detrend:', X.detrend)
    X.avgInterval = prior.avgInterval

    # Read data file & populate initial prior ensemble
    X.populate_ensemble(prior_source, prior)
    Xb_one_full = X.ens

    # Prepare to check for files in the prior (work) directory (this object just
    # points to a directory)
    prior_check = np.DataSource(workdir)

    load_time = time() - begin_time
    if verbose > 2:
        print('-----------------------------------------------------')
        print('Loading completed in ' + str(load_time) + ' seconds')
        print('-----------------------------------------------------')

    # check covariance inflation from config
    inflate = None
    if inflation_fact is not None:
        inflate = inflation_fact
        if verbose > 2:
            print(('\nUsing covariance inflation factor: %8.2f' % inflate))

    # ==========================================================================
    # Get information on proxies to assimilate ---------------------------------
    # ==========================================================================

    begin_time_proxy_load = time()
    if verbose > 0:
        print('')
        print('-----------------------------------')
        print('Uploading proxy data & PSM info ...')
        print('-----------------------------------')

    # Build dictionaries of proxy sites to assimilate and those set aside for
    # verification
    prox_manager = LMR_proxy_pandas_rework.ProxyManager(cfg, recon_period)
    type_site_assim = prox_manager.assim_ids_by_group

    if verbose > 3:
        print('Assimilating proxy types/sites:', type_site_assim)

    if verbose > 0:
        print(
            '--------------------------------------------------------------------'
        )
        print('Proxy counts for experiment:')
        # count the total number of proxies
        assim_proxy_count = len(prox_manager.ind_assim)
        for pkey, plist in sorted(type_site_assim.items()):
            print(('%45s : %5d' % (pkey, len(plist))))
        print(('%45s : %5d' % ('TOTAL', assim_proxy_count)))
        print(
            '--------------------------------------------------------------------'
        )

    if verbose > 2:
        proxy_load_time = time() - begin_time_proxy_load
        print('-----------------------------------------------------')
        print('Loading completed in ' + str(proxy_load_time) + ' seconds')
        print('-----------------------------------------------------')

    # ==========================================================================
    # Calculate truncated state from prior, if option chosen -------------------
    # ==========================================================================
    if regrid_method:

        # Declare dictionary w/ info on content of truncated state vector
        new_state_info = {}

        # Transform every 2D state variable, one at a time
        Nx = 0
        for var in list(X.full_state_info.keys()):
            dct = {}

            dct['vartype'] = X.full_state_info[var]['vartype']

            # variable indices in full state vector
            ibeg_full = X.full_state_info[var]['pos'][0]
            iend_full = X.full_state_info[var]['pos'][1]
            # extract array corresponding to state variable "var"
            var_array_full = Xb_one_full[ibeg_full:iend_full + 1, :]
            # corresponding spatial coordinates
            coords_array_full = X.coords[ibeg_full:iend_full + 1, :]

            # Are we truncating this variable? (i.e. is it a 2D lat/lon variable?)

            if X.full_state_info[var]['vartype'] == '2D:horizontal':
                print(var, ' : 2D lat/lon variable, truncating this variable')
                # lat/lon column indices in X.coords
                ind_lon = X.full_state_info[var]['spacecoords'].index('lon')
                ind_lat = X.full_state_info[var]['spacecoords'].index('lat')
                nlat = X.full_state_info[var]['spacedims'][ind_lat]
                nlon = X.full_state_info[var]['spacedims'][ind_lon]

                # calculate the truncated fieldNtimes
                if regrid_method == 'simple':
                    [var_array_new, lat_new, lon_new] = \
                        LMR_utils.regrid_simple(nens, var_array_full, coords_array_full, \
                                                ind_lat, ind_lon, regrid_resolution)
                elif regrid_method == 'spherical_harmonics':
                    [var_array_new, lat_new, lon_new] = \
                        LMR_utils.regrid_sphere(nlat, nlon, nens, var_array_full, regrid_resolution)
                elif regrid_method == 'esmpy':
                    target_grid = prior.esmpy_grid_def

                    lat_2d = coords_array_full[:, ind_lat].reshape(nlat, nlon)
                    lon_2d = coords_array_full[:, ind_lon].reshape(nlat, nlon)

                    [var_array_new, lat_new, lon_new] = LMR_utils.regrid_esmpy(
                        target_grid['nlat'],
                        target_grid['nlon'],
                        nens,
                        var_array_full,
                        lat_2d,
                        lon_2d,
                        nlat,
                        nlon,
                        include_poles=target_grid['include_poles'],
                        method=prior.esmpy_interp_method)
                else:
                    print('Exiting! Unrecognized regridding method.')
                    raise SystemExit

                nlat_new = np.shape(lat_new)[0]
                nlon_new = np.shape(lat_new)[1]

                print(('=> Full array:      ' + str(np.min(var_array_full)) +
                       ' ' + str(np.max(var_array_full)) + ' ' +
                       str(np.mean(var_array_full)) + ' ' +
                       str(np.std(var_array_full))))
                print(('=> Truncated array: ' + str(np.min(var_array_new)) +
                       ' ' + str(np.max(var_array_new)) + ' ' +
                       str(np.mean(var_array_new)) + ' ' +
                       str(np.std(var_array_new))))

                # corresponding indices in truncated state vector
                ibeg_new = Nx
                iend_new = Nx + (nlat_new * nlon_new) - 1
                # for new state info dictionary
                dct['pos'] = (ibeg_new, iend_new)
                dct['spacecoords'] = X.full_state_info[var]['spacecoords']
                dct['spacedims'] = (nlat_new, nlon_new)
                # updated dimension
                new_dims = (nlat_new * nlon_new)

                # array with new spatial coords
                coords_array_new = np.zeros(shape=[new_dims, 2])
                coords_array_new[:, 0] = lat_new.flatten()
                coords_array_new[:, 1] = lon_new.flatten()

            else:
                print(var,\
                    ' : not truncating this variable: no changes from full state')

                var_array_new = var_array_full
                coords_array_new = coords_array_full
                # updated dimension
                new_dims = var_array_new.shape[0]
                ibeg_new = Nx
                iend_new = Nx + new_dims - 1
                dct['pos'] = (ibeg_new, iend_new)
                dct['spacecoords'] = X.full_state_info[var]['spacecoords']
                dct['spacedims'] = X.full_state_info[var]['spacedims']

            # fill in new state info dictionary
            new_state_info[var] = dct

            # if 1st time in loop over state variables, create Xb_one array as copy
            # of var_array_new
            if Nx == 0:
                Xb_one = np.copy(var_array_new)
                Xb_one_coords = np.copy(coords_array_new)
            else:  # if not 1st time, append to existing array
                Xb_one = np.append(Xb_one, var_array_new, axis=0)
                Xb_one_coords = np.append(Xb_one_coords,
                                          coords_array_new,
                                          axis=0)

            # making sure Xb_one has proper mask, if it contains
            # at least one invalid value
            if np.isnan(Xb_one).any():
                Xb_one = np.ma.masked_invalid(Xb_one)
                np.ma.set_fill_value(Xb_one, np.nan)

            # updating dimension of new state vector
            Nx = Nx + new_dims

        X.trunc_state_info = new_state_info

    else:  # no truncation: carry over full state to working array
        X.trunc_state_info = X.full_state_info
        Xb_one = Xb_one_full
        Xb_one_coords = X.coords

        [Nx, _] = Xb_one.shape

    # Keep dimension of pre-augmented version of state vector
    [state_dim, _] = Xb_one.shape

    # ==========================================================================
    # Calculate all Ye's (for all sites in sites_assim) ------------------------
    # ==========================================================================

    # Load or generate Ye Values for assimilation
    if not online:
        # Load pre calculated ye values if desired or possible
        try:
            if not cfg.core.use_precalc_ye:
                raise FlagError(
                    'use_precalc_ye=False: forego loading precalcul'
                    'ated ye values.')

            print(
                'Loading precalculated Ye values for proxies to be assimilated.'
            )
            [Ye_assim, Ye_assim_coords
             ] = LMR_utils.load_precalculated_ye_vals_psm_per_proxy(
                 cfg, prox_manager, 'assim', X.prior_sample_indices)

            eval_proxy_count = 0
            if prox_manager.ind_eval:
                print('Loading precalculated Ye values for withheld proxies.')
                [Ye_eval, Ye_eval_coords
                 ] = LMR_utils.load_precalculated_ye_vals_psm_per_proxy(
                     cfg, prox_manager, 'eval', X.prior_sample_indices)
                [eval_proxy_count, _] = Ye_eval.shape

        except (IOError, FlagError) as e:
            print(e)

            # Manually calculate ye_values from state vector
            print('Calculating ye_values from the prior...')
            Ye_assim = np.empty(shape=[assim_proxy_count, nens])
            Ye_assim_coords = np.empty(shape=[assim_proxy_count, 2])
            for k, proxy in enumerate(prox_manager.sites_assim_proxy_objs()):
                Ye_assim[k, :] = proxy.psm(Xb_one_full, X.full_state_info,
                                           X.coords)
                Ye_assim_coords[k, :] = np.asarray([proxy.lat, proxy.lon],
                                                   dtype=np.float64)

            eval_proxy_count = 0
            if prox_manager.ind_eval:
                eval_proxy_count = len(prox_manager.ind_eval)
                Ye_eval = np.empty(shape=[eval_proxy_count, nens])
                Ye_eval_coords = np.empty(shape=[eval_proxy_count, 2])
                for k, proxy in enumerate(
                        prox_manager.sites_eval_proxy_objs()):
                    Ye_eval[k, :] = proxy.psm(Xb_one_full, X.full_state_info,
                                              X.coords)
                    Ye_eval_coords[k, :] = np.asarray([proxy.lat, proxy.lon],
                                                      dtype=np.float64)

        # ----------------------------------
        # Augment state vector with the Ye's
        # ----------------------------------
        # Append ensemble of Ye's of assimilated proxies to prior state vector
        Xb_one_aug = np.append(Xb_one, Ye_assim, axis=0)
        Xb_one_coords = np.append(Xb_one_coords, Ye_assim_coords, axis=0)

        if prox_manager.ind_eval:
            # Append ensemble of Ye's of withheld proxies to prior state vector
            Xb_one_aug = np.append(Xb_one_aug, Ye_eval, axis=0)
            Xb_one_coords = np.append(Xb_one_coords, Ye_eval_coords, axis=0)

    else:
        Xb_one_aug = Xb_one

    # Dump entire prior state vector (Xb_one) to file
    filen = workdir + '/' + 'Xb_one'
    try:
        out_Xb_one = Xb_one.filled()
        out_Xb_one_aug = Xb_one_aug.filled()
    except AttributeError as e:
        out_Xb_one = Xb_one
        out_Xb_one_aug = Xb_one_aug

    np.savez(filen,
             Xb_one=out_Xb_one,
             Xb_one_aug=out_Xb_one_aug,
             stateDim=state_dim,
             Xb_one_coords=Xb_one_coords,
             state_info=X.trunc_state_info)

    # NEW: write out (to prior_sampling_info.txt file) the info on prior sampling
    # i.e. the list of indices (i.e. years for annual recons) randomly chosen
    # from available model states
    prior_samples = open(workdir + '/prior_sampling_info.txt', 'w')
    # include a header
    prior_samples.write(
        '# List of indices (i.e. years if annual recon) randomly'
        ' sampled from model output to form the prior(ensemble):\n')
    if core.seed is not None:
        prior_samples.write('# with seed=%d \n' % (core.seed))
    else:
        prior_samples.write('# with seed=None \n')
    # write out the list
    prior_samples.write(str(X.prior_sample_indices) + '\n')
    prior_samples.close()

    # NEW: Dump prior state vector (Xb_one) to file, one file per state variable
    print(
        '\n ---------- saving Xb_one for each variable to separate file -----------\n'
    )
    for var in list(X.trunc_state_info.keys()):
        print(var)
        # now need to pluck off the index region that goes with var
        ibeg = X.trunc_state_info[var]['pos'][0]
        iend = X.trunc_state_info[var]['pos'][1]

        if X.trunc_state_info[var]['vartype'] == '2D:horizontal':
            # if no truncation: lat_new and lon_new are not defined...rather get actual lats/lons info from state vector
            ind_lon = X.trunc_state_info[var]['spacecoords'].index('lon')
            ind_lat = X.trunc_state_info[var]['spacecoords'].index('lat')

            nlon_new = X.trunc_state_info[var]['spacedims'][ind_lon]
            nlat_new = X.trunc_state_info[var]['spacedims'][ind_lat]

            lat_sv = Xb_one_coords[ibeg:iend + 1, ind_lat]
            lon_sv = Xb_one_coords[ibeg:iend + 1, ind_lon]

            lat_new = np.unique(lat_sv)
            lon_new = np.unique(lon_sv)

            Xb_var = np.reshape(out_Xb_one[ibeg:iend + 1, :],
                                (nlat_new, nlon_new, nens))

            filen = workdir + '/' + 'Xb_one' + '_' + var
            np.savez(filen,
                     Xb_var=Xb_var,
                     nlat=nlat_new,
                     nlon=nlon_new,
                     nens=nens,
                     lat=lat_new,
                     lon=lon_new)

        else:
            print((
                'Warning: Only saving 2D:horizontal variable. Variable (%s) is of another type'
                % (var)))
            # TODO: Code mods above are a quick fix. Should allow saving other types of variables here!
    # END new file save

    # ==========================================================================
    # Loop over all years & proxies and perform assimilation -------------------
    # ==========================================================================

    # Array containing the global and hemispheric-mean state
    # (for diagnostic purposes)
    # Now doing surface air temperature only (var = tas_sfc_Amon)!

    # TODO: AP temporary fix for no TAS in state
    tas_var = [
        item for item in cfg.prior.state_variables.keys() if 'tas_sfc_' in item
    ]
    if tas_var:
        gmt_save = np.zeros([assim_proxy_count + 1, ntimes])
        nhmt_save = np.zeros([assim_proxy_count + 1, ntimes])
        shmt_save = np.zeros([assim_proxy_count + 1, ntimes])
        # get state vector indices where to find surface air temperature
        ibeg_tas = X.trunc_state_info[tas_var[0]]['pos'][0]
        iend_tas = X.trunc_state_info[tas_var[0]]['pos'][1]
        xbm = np.mean(Xb_one[ibeg_tas:iend_tas + 1, :],
                      axis=1)  # ensemble-mean

        nlat_new = X.trunc_state_info[tas_var[0]]['spacedims'][0]
        nlon_new = X.trunc_state_info[tas_var[0]]['spacedims'][1]
        xbm_lalo = xbm.reshape(nlat_new, nlon_new)
        lat_coords = Xb_one_coords[ibeg_tas:iend_tas + 1, 0]
        lat_lalo = lat_coords.reshape(nlat_new, nlon_new)

        [gmt, nhmt,
         shmt] = LMR_utils.global_hemispheric_means(xbm_lalo, lat_lalo[:, 0])

        # First row is prior GMT
        gmt_save[0, :] = gmt
        nhmt_save[0, :] = nhmt
        shmt_save[0, :] = shmt
        # Prior for first proxy assimilated
        gmt_save[1, :] = gmt
        nhmt_save[1, :] = nhmt
        shmt_save[1, :] = shmt

    # -------------------------------------
    # Loop over years of the reconstruction
    # -------------------------------------
    lasttime = time()
    for yr_idx, t in enumerate(
            range(recon_period[0], recon_period[1] + 1, recon_timescale)):

        start_yr = int(t - recon_timescale // 2)
        end_yr = int(t + recon_timescale // 2)

        if verbose > 0:
            if start_yr == end_yr:
                time_str = 'year: ' + str(t)
            else:
                time_str = 'time period (yrs): [' + str(start_yr) + ',' + str(
                    end_yr) + ']'
            print('\n==== Working on ' + time_str)

        ypad = '{:07d}'.format(t)
        filen = join(workdir, 'year' + ypad + '.npy')
        if prior_check.exists(filen) and not core.clean_start:
            if verbose > 2:
                print('prior file exists: ' + filen)
            Xb = np.load(filen)
        else:
            if verbose > 2:
                print('Prior file ', filen, ' does not exist...')
            Xb = Xb_one_aug.copy()

        # -----------------
        # Loop over proxies
        # -----------------
        for proxy_idx, Y in enumerate(prox_manager.sites_assim_proxy_objs()):
            # Check if we have proxy ob for current time interval
            try:
                if recon_timescale > 1:
                    # exclude lower bound to not include same obs in adjacent time intervals
                    Yvals = Y.values[(Y.values.index > start_yr)
                                     & (Y.values.index <= end_yr)]
                else:
                    Yvals = Y.values[(Y.values.index >= start_yr)
                                     & (Y.values.index <= end_yr)]
                if Yvals.empty: raise KeyError()
                nYobs = len(Yvals)
                Yobs = Yvals.mean()

            except KeyError:
                # Make sure GMT spot filled from previous proxy
                # TODO: AP temporary fix for no TAS in state
                if tas_var:
                    gmt_save[proxy_idx + 1, yr_idx] = gmt_save[proxy_idx,
                                                               yr_idx]
                continue  # skip to next loop iteration (proxy record)

            if verbose > 1:
                print('--------------- Processing proxy: ' + Y.id)
            if verbose > 2:
                print('Site:', Y.id, ':', Y.type)
                print(' latitude, longitude: ' + str(Y.lat), str(Y.lon))

            loc = None
            if loc_rad is not None:
                if verbose > 2:
                    print('...computing localization...')
                loc = cov_localization(loc_rad, Y, X, Xb_one_coords)

            # Get Ye values for current proxy
            if online:
                # Calculate from latest updated prior
                Ye = Y.psm(Xb)
            else:
                # Extract latest updated Ye from appended state vector
                Ye = Xb[proxy_idx - (assim_proxy_count + eval_proxy_count)]

            # Define the ob error variance
            ob_err = Y.psm_obj.R

            # if ob is an average of several values, adjust its ob error variance
            if nYobs > 1: ob_err = ob_err / float(nYobs)

            # ------------------------------------------------------------------
            # Do the update (assimilation) -------------------------------------
            # ------------------------------------------------------------------
            if verbose > 2:
                print(('updating time: ' + str(t) + ' proxy value : ' +
                       str(Yobs) + ' (nobs=' + str(nYobs) +
                       ') | mean prior proxy estimate: ' + str(Ye.mean())))

            # Update the state
            Xa = enkf_update_array(Xb, Yobs, Ye, ob_err, loc, inflate)

            # TODO: AP Temporary fix for no TAS in state
            if tas_var:
                xam = Xa.mean(axis=1)
                xam_lalo = xam[ibeg_tas:(iend_tas + 1)].reshape(
                    nlat_new, nlon_new)
                [gmt, nhmt, shmt] = \
                    LMR_utils.global_hemispheric_means(xam_lalo, lat_lalo[:, 0])
                gmt_save[proxy_idx + 1, yr_idx] = gmt
                nhmt_save[proxy_idx + 1, yr_idx] = nhmt
                shmt_save[proxy_idx + 1, yr_idx] = shmt

            # add check to detect whether recon has blown-up (and stop it if it has)
            xbvar = Xb.var(axis=1, ddof=1)
            xavar = Xa.var(ddof=1, axis=1)
            vardiff = xavar - xbvar
            if (not np.isfinite(np.min(vardiff))) or (not np.isfinite(
                    np.max(vardiff))):
                print('ERROR: Reconstruction has blown-up. Exiting!')
                raise SystemExit(1)

            # check the variance change for sign
            thistime = time()
            if verbose > 2:
                #xbvar = Xb.var(axis=1, ddof=1)
                #xavar = Xa.var(ddof=1, axis=1)
                #vardiff = xavar - xbvar
                print('min/max change in variance: (' + str(np.min(vardiff)) +
                      ',' + str(np.max(vardiff)) + ')')
                print('update took ' + str(thistime - lasttime) + 'seconds')
            lasttime = thistime

            # Put analysis Xa in Xb for next assimilation
            Xb = Xa

            # End of loop on proxies

        # Dump Xa to file (use Xb in case no proxies assimilated for
        # current year)
        try:
            np.save(filen, Xb.filled())
        except AttributeError as e:
            np.save(filen, Xb)

    end_time = time() - begin_time

    # End of loop on years
    if verbose > 0:
        print('')
        print('=====================================================')
        print('Reconstruction completed in ' + str(end_time / 60.0) + ' mins')
        print('=====================================================')

    # 3 July 2015: compute and save the GMT,NHMT,SHMT for the full ensemble
    # need to fix this so that every year is counted
    # TODO: AP temporary fix for no TAS
    if tas_var:
        gmt_ensemble = np.zeros([ntimes, nens])
        nhmt_ensemble = np.zeros([ntimes, nens])
        shmt_ensemble = np.zeros([ntimes, nens])
        for iyr, yr in enumerate(
                range(recon_period[0], recon_period[1] + 1, recon_timescale)):
            filen = join(workdir, 'year{:07d}'.format(yr))
            Xa = np.load(filen + '.npy')
            for k in range(nens):
                xam_lalo = Xa[ibeg_tas:iend_tas + 1,
                              k].reshape(nlat_new, nlon_new)
                [gmt, nhmt, shmt] = \
                    LMR_utils.global_hemispheric_means(xam_lalo, lat_lalo[:, 0])
                gmt_ensemble[iyr, k] = gmt
                nhmt_ensemble[iyr, k] = nhmt
                shmt_ensemble[iyr, k] = shmt

        filen = join(workdir, 'gmt_ensemble')
        np.savez(filen,
                 gmt_ensemble=gmt_ensemble,
                 nhmt_ensemble=nhmt_ensemble,
                 shmt_ensemble=shmt_ensemble,
                 recon_times=recon_times)

        # save global mean temperature history and the proxies assimilated
        print(('saving global mean temperature update history and ',
               'assimilated proxies...'))
        filen = join(workdir, 'gmt')
        np.savez(filen,
                 gmt_save=gmt_save,
                 nhmt_save=nhmt_save,
                 shmt_save=shmt_save,
                 recon_times=recon_times,
                 apcount=assim_proxy_count,
                 tpcount=assim_proxy_count)

    # TODO: (AP) The assim/eval lists of lists instead of lists of 1-item dicts
    assimilated_proxies = [{
        p.type: [p.id, p.lat, p.lon, p.time, p.psm_obj.sensitivity]
    } for p in prox_manager.sites_assim_proxy_objs()]
    filen = join(workdir, 'assimilated_proxies')
    np.save(filen, assimilated_proxies)

    # collecting info on non-assimilated proxies and save to file
    nonassimilated_proxies = [{
        p.type: [p.id, p.lat, p.lon, p.time, p.psm_obj.sensitivity]
    } for p in prox_manager.sites_eval_proxy_objs()]
    if nonassimilated_proxies:
        filen = join(workdir, 'nonassimilated_proxies')
        np.save(filen, nonassimilated_proxies)

    exp_end_time = time() - begin_time
    if verbose > 0:
        print('')
        print('=====================================================')
        print('Experiment completed in ' + str(exp_end_time / 60.0) + ' mins')
        print('=====================================================')

    # TODO: best method for Ye saving?
    return prox_manager.sites_assim_proxy_objs(
    ), prox_manager.sites_eval_proxy_objs()
Esempio n. 23
0
def load_analyses(cfg,
                  full_field=False,
                  lmr_gm=None,
                  lmr_time=None,
                  satime=1900,
                  eatime=1999,
                  svtime=1880,
                  evtime=1999):
    """Need to revise to do two things: 1) GMT for a verification interval
    and 2) send back the full data from the analyses. Add a flag and switches"""

    # full_field: Flag for sending back full fields instead of global means
    # --- define a reference time period for anomalies (e.g., 20th century)
    # satime: starting year of common reference time period
    # setime: ending year of common reference time
    # --- define the time period for verification
    # svtime: starting year of the verification time period
    # evtime: ending year of the verification time period

    # check if a global-mean file has been written previously, and if yes, use it
    load = False
    if not full_field:
        try:
            filen = 'analyses' + '_' + str(satime) + '_' + str(
                eatime) + '_' + str(svtime) + '_' + str(evtime) + '.npz'
            npzfile = np.load(filen)
            print(filen + ' exists...loading it')
            load = True
            analyses = npzfile['analyses']
            analysis_data = analyses[0]
            analysis_time = analyses[1]
            analysis_lat = {}
            analysis_lon = {}
        except:
            if load:
                print('analyses.npz exists, but error reading the file!!!')
            load = False

    if not load:

        # ==========================================
        # load GISTEMP, HadCRU, BerkeleyEarth, MLOST
        # ==========================================
        from load_gridded_data import read_gridded_data_GISTEMP
        from load_gridded_data import read_gridded_data_HadCRUT
        from load_gridded_data import read_gridded_data_BerkeleyEarth
        from load_gridded_data import read_gridded_data_MLOST
        import csv

        analysis_data = {}
        analysis_time = {}
        analysis_lat = {}
        analysis_lon = {}

        # location of the datasets from the configuration file
        datadir_calib = cfg.psm.linear.datadir_calib

        # load GISTEMP
        print('loading GISTEMP...')
        datafile_calib = 'gistemp1200_ERSSTv4.nc'
        calib_vars = ['Tsfc']
        [gtime, GIS_lat, GIS_lon,
         GIS_anomaly] = read_gridded_data_GISTEMP(datadir_calib,
                                                  datafile_calib, calib_vars,
                                                  'annual', [satime, eatime])
        GIS_time = np.array([d.year for d in gtime])
        # fix longitude shift
        nlon_GIS = len(GIS_lon)
        nlat_GIS = len(GIS_lat)
        GIS_lon = np.roll(GIS_lon, shift=nlon_GIS // 2, axis=0)
        GIS_anomaly = np.roll(GIS_anomaly, shift=nlon_GIS // 2, axis=2)
        analysis_data['GIS'] = GIS_anomaly
        analysis_time['GIS'] = GIS_time
        analysis_lat['GIS'] = GIS_lat
        analysis_lon['GIS'] = GIS_lon

        # load HadCRUT
        print('loading HadCRUT...')
        datafile_calib = 'HadCRUT.4.3.0.0.median.nc'
        calib_vars = ['Tsfc']
        [ctime, CRU_lat, CRU_lon,
         CRU_anomaly] = read_gridded_data_HadCRUT(datadir_calib,
                                                  datafile_calib, calib_vars,
                                                  'annual', [satime, eatime])
        CRU_time = np.array([d.year for d in ctime])
        # fix longitude shift
        nlon_CRU = len(CRU_lon)
        nlat_CRU = len(CRU_lat)
        CRU_lon = np.roll(CRU_lon, shift=nlon_CRU // 2, axis=0)
        CRU_anomaly = np.roll(CRU_anomaly, shift=nlon_CRU // 2, axis=2)
        analysis_data['CRU'] = CRU_anomaly
        analysis_time['CRU'] = CRU_time
        analysis_lat['CRU'] = CRU_lat
        analysis_lon['CRU'] = CRU_lon

        # load BerkeleyEarth
        print('loading BEST...')
        datafile_calib = 'Land_and_Ocean_LatLong1.nc'
        calib_vars = ['Tsfc']
        [btime, BE_lat, BE_lon, BE_anomaly
         ] = read_gridded_data_BerkeleyEarth(datadir_calib,
                                             datafile_calib,
                                             calib_vars,
                                             'annual',
                                             ref_period=[satime, eatime])
        BE_time = np.array([d.year for d in btime])
        # fix longitude shift
        nlon_BE = BE_lon.shape[0]
        BE_lon = np.roll(BE_lon, shift=nlon_BE // 2, axis=0)
        BE_anomaly = np.roll(BE_anomaly, shift=nlon_BE // 2, axis=2)
        analysis_data['BE'] = BE_anomaly
        analysis_time['BE'] = BE_time
        analysis_lat['BE'] = BE_lat
        analysis_lon['BE'] = BE_lon

        # load NOAA MLOST
        # Note: Product is anomalies w.r.t. 1961-1990 mean
        print('loading MLOST...')
        #path = datadir_calib + '/NOAA/'
        datafile_calib = 'MLOST_air.mon.anom_V3.5.4.nc'
        calib_vars = ['Tsfc']
        [mtime, MLOST_lat, MLOST_lon,
         MLOST_anomaly] = read_gridded_data_MLOST(datadir_calib,
                                                  datafile_calib,
                                                  calib_vars,
                                                  outfreq='annual',
                                                  ref_period=[satime, eatime])
        MLOST_time = np.array([d.year for d in mtime])
        nlat_MLOST = len(MLOST_lat)
        nlon_MLOST = len(MLOST_lon)
        analysis_data['MLOST'] = MLOST_anomaly
        analysis_time['MLOST'] = MLOST_time
        analysis_lat['MLOST'] = MLOST_lat
        analysis_lon['MLOST'] = MLOST_lon

    if full_field:
        print('returning spatial fields...')
        return analysis_data, analysis_time, analysis_lat, analysis_lon

    else:

        if not load:
            [gis_gm, _,
             _] = LMR_utils.global_hemispheric_means(GIS_anomaly, GIS_lat)
            [cru_gm, _,
             _] = LMR_utils.global_hemispheric_means(CRU_anomaly, CRU_lat)
            [be_gm, _,
             _] = LMR_utils.global_hemispheric_means(BE_anomaly, BE_lat)
            [mlost_gm, _,
             _] = LMR_utils.global_hemispheric_means(MLOST_anomaly, MLOST_lat)

            # set common reference period to define anomalies
            smatch, ematch = LMR_utils.find_date_indices(
                GIS_time, satime, eatime)
            gis_gm = gis_gm - np.mean(gis_gm[smatch:ematch])
            smatch, ematch = LMR_utils.find_date_indices(
                CRU_time, satime, eatime)
            cru_gm = cru_gm - np.mean(cru_gm[smatch:ematch])
            smatch, ematch = LMR_utils.find_date_indices(
                BE_time, satime, eatime)
            be_gm = be_gm - np.mean(be_gm[smatch:ematch])
            smatch, ematch = LMR_utils.find_date_indices(
                MLOST_time, satime, eatime)
            mlost_gm = mlost_gm - np.mean(mlost_gm[smatch:ematch])

            # now pull out the time window for the verification time period
            gis_smatch, gis_ematch = LMR_utils.find_date_indices(
                GIS_time, svtime, evtime)
            cru_smatch, cru_ematch = LMR_utils.find_date_indices(
                CRU_time, svtime, evtime)
            be_smatch, be_ematch = LMR_utils.find_date_indices(
                BE_time, svtime, evtime)
            mlost_smatch, mlost_ematch = LMR_utils.find_date_indices(
                MLOST_time, svtime, evtime)
            # "consensus" global mean: average all non-LMR (obs-based) values
            consensus_gmt = np.array([
                gis_gm[gis_smatch:gis_ematch], cru_gm[cru_smatch:cru_ematch],
                be_gm[be_smatch:be_ematch], mlost_gm[mlost_smatch:mlost_ematch]
            ])
            con_gm = np.mean(consensus_gmt, axis=0)
            CON_time = np.arange(svtime, evtime)
            CON_time = np.asarray(CON_time)

            analysis_data['GIS'] = gis_gm[gis_smatch:gis_ematch]
            analysis_data['CRU'] = cru_gm[cru_smatch:cru_ematch]
            analysis_data['BE'] = be_gm[be_smatch:be_ematch]
            analysis_data['MLOST'] = mlost_gm[mlost_smatch:mlost_ematch]
            analysis_data['CON'] = con_gm
            analysis_time['CON'] = CON_time
            # for global mean, there is only one common time series and no lat,lon
            analysis_time = {}
            analysis_time['time'] = CON_time
            analysis_lat = {}
            analysis_lon = {}
            # save file for use next time
            analyses = [analysis_data, analysis_time]
            readme = 'this files contains gmt for analysis products with anomalies relative to a reference time period'
            filen = 'analyses' + '_' + str(satime) + '_' + str(
                eatime) + '_' + str(svtime) + '_' + str(evtime) + '.npz'
            print('writing to:' + filen)
            np.savez(filen, analyses=analyses, readme=readme)

        # LMR GMT was passed to this routine for inclusion in the dictionary
        if np.any(lmr_gm):
            lmr_smatch, lmr_ematch = LMR_utils.find_date_indices(
                lmr_time, svtime, evtime)
            analysis_data['LMR'] = lmr_gm[lmr_smatch:lmr_ematch]

    # lat and lon don't inform on global means, but consistent return with full field
    print('returning global means...')
    return analysis_data, analysis_time, analysis_lat, analysis_lon
Esempio n. 24
0
def test_averaging_period_unique_warning():

    with pytest.warns(UserWarning):
        res = Utils.get_averaging_period([1, 8, 3, 1, 8], 12)
        assert res == (0, 2, 7)
Esempio n. 25
0
def compile_ens_var(parent_dir,
                    out_dir,
                    out_fname,
                    a_d_vals=None,
                    r_iters=None,
                    ignore_npz=False):

    if exists(join(out_dir, out_fname)) and not ignore_npz:
        print('Loading pre-compiled ensemble variance metrics.')
        return np.load(join(out_dir, out_fname))

    # Get reconstruction iteration directory
    if r_iters is not None:
        print('Joining on r iters ', r_iters)
        parent_iters = [join(parent_dir, 'r{:d}'.format(r)) for r in r_iters]
    else:
        parent_iters = glob.glob(join(parent_dir, 'r*'))

    ens_var = None
    gm_ens_var = None
    pri_ens_var = None
    pri_gm_ens_var = None
    lats = None
    lons = None
    times = None

    for i, parent in enumerate(parent_iters):

        print('Compiling iteration {:d}/{:d}'.format(i + 1, len(parent_iters)))
        # Directories for each parameter value
        if a_d_vals is not None:
            ad_dir = 'a{:1.2g}_d{:1.2f}'
            ad_dir2 = 'a{:1.1f}_d{:1.2f}'
            param_iters = []
            for a, d in a_d_vals:
                curr_ad = ad_dir.format(a, d)
                if a == 1.0 or a == 0.0:
                    if not exists(join(parent, curr_ad)):
                        curr_ad = ad_dir2.format(a, d)
                param_iters.append(join(parent, curr_ad))
        else:
            param_iters = [parent]

        for j, f in enumerate(param_iters):
            try:
                # Load analysis ensemble variance
                analy_var = np.load(
                    join(f, 'ensemble_variance_tas_sfc_Amon.npz'))
                if times is None:
                    times = analy_var['years']
                    lats = analy_var['lat']
                    lons = analy_var['lon']
                    var_shape = [len(parent_iters), len(param_iters)] + \
                                list(analy_var['xav'].shape)
                    ens_var = np.zeros(var_shape) * np.nan
                    gm_ens_var = np.zeros(var_shape[:3]) * np.nan
                    pri_ens_var = np.zeros_like(ens_var) * np.nan
                    pri_gm_ens_var = np.zeros_like(gm_ens_var) * np.nan

                ens_var[i, j] = analy_var['xav']
                gm_ens_var[i, j] = utils2.global_mean2(ens_var[i, j], lats)

                prior_var = np.load(join(f, 'prior_ensvar_tas_sfc_Amon.npz'))
                pri_ens_var[i, j] = prior_var['xbv']
                pri_gm_ens_var[i, j] = utils2.global_mean2(
                    pri_ens_var[i, j], lats)

            except IOError as e:
                print(e)

    ens_var = ens_var.mean(axis=0).astype(np.float32)
    gm_ens_var = gm_ens_var.mean(axis=0)
    pri_ens_var = pri_ens_var.mean(axis=0).astype(np.float32)
    pri_gm_ens_var = pri_gm_ens_var.mean(axis=0)

    res_dict = {
        'times': times,
        'lats': lats,
        'lons': lons,
        'ens_var': ens_var.squeeze(),
        'gm_ens_var': gm_ens_var.squeeze(),
        'pri_ens_var': pri_ens_var.squeeze(),
        'pri_gm_ens_var': pri_gm_ens_var.squeeze()
    }

    if not exists(out_dir):
        os.makedirs(out_dir)
    np.savez(join(out_dir, out_fname), **res_dict)

    return res_dict
Esempio n. 26
0
def test_calc_latlon_bnd_monotonic():
    test_data = np.linspace(0, 10, 11)
    with pytest.raises(ValueError):
        _ = Utils.calculate_latlon_bnds(test_data[::-1], test_data)
    with pytest.raises(ValueError):
        _ = Utils.calculate_latlon_bnds(test_data, test_data[::-1])
Esempio n. 27
0
def test_averaging_period_sorting():
    res = Utils.get_averaging_period((-12, -11, -10, 3, 2, 1), 12)
    assert res == (9, 10, 11, 12, 13, 14)
Esempio n. 28
0
         stateDim=stateDim,
         lat=lat_new,
         lon=lon_new,
         nlat=nlat_new,
         nlon=nlon_new)
gmt = np.zeros([ntims])
k = -1
for t in range(startim, startim + ntims):
    k = k + 1
    # make up some data with the right shape as in the LMR code (Ndof,Nens)
    Xa = np.random.randn(Ndof, Nens)
    xam = np.mean(Xa, axis=1)
    print('Xa shape: ' + str(np.shape(Xa)))

    # Dump Xa to file (to be used as prior for next assimilation)
    ypad = LMR_utils.year_fix(t)
    filen = workdir + '/' + 'year' + ypad
    np.save(filen, Xa)

    # compute global mean for check later
    xam_lalo = np.reshape(xam[0:stateDim], (nlat_new, nlon_new))
    [gmt[k], _,
     _] = LMR_utils.global_hemispheric_means(xam_lalo, lat_new[:, 0])

# generate the ensemble-mean files as in LMR_wrapper.py
LMR_utils.ensemble_mean(workdir)

#
# now "post-process" the file just written as in verify_grid_testing.py
#
Esempio n. 29
0
MCiters = range(iter_range[0], iter_range[1]+1)
param_iterables = [MCiters]

# get other parameters to sweep over in the reconstruction
param_search = LMR_config.wrapper.param_search
if param_search is not None:
    # sort them by parameter name and combine into a list of iterables
    sort_params = list(param_search.keys())
    sort_params.sort(key=lambda x: x.split('.')[-1])
    param_values = [param_search[key] for key in sort_params]
    param_iterables = param_values + [MCiters]

for iter_and_params in itertools.product(*param_iterables):

    iter_num = iter_and_params[-1]
    cfg_dict = Utils.param_cfg_update('core.curr_iter', iter_num)

    if LMR_config.wrapper.multi_seed is not None:
        try:
            curr_seed = LMR_config.wrapper.multi_seed[iter_num]
            cfg_dict = Utils.param_cfg_update('core.seed', curr_seed,
                                              cfg_dict=cfg_dict)
            print('Setting current iteration seed: {}'.format(curr_seed))
        except IndexError:
            print('ERROR: multi_seed activated but current MC iteration out of'
                  ' range for list of seed values provided in config.')
            raise SystemExit(1)
        
    itr_str = 'r{:d}'.format(iter_num)
    # If parameter space search is being performed then set the current
    # search space values and create a special sub-directory
Esempio n. 30
0
def prior_regrid(cfg, X, Xb_one, verbose=False):

    # scraped from LMR_utils.py on 20 April 2018 and modified for local use

    # this block sets variables for compatability with original code
    regrid_method = cfg.prior.regrid_method
    prior = cfg.prior
    nens = cfg.core.nens
    Xb_one_full = X.ens

    # Declare dictionary w/ info on content of truncated state vector
    new_state_info = {}

    # Transform every 2D state variable, one at a time
    Nx = 0
    for var in list(X.full_state_info.keys()):
        dct = {}

        dct['vartype'] = X.full_state_info[var]['vartype']

        # variable indices in full state vector
        ibeg_full = X.full_state_info[var]['pos'][0]
        iend_full = X.full_state_info[var]['pos'][1]
        # extract array corresponding to state variable "var"
        var_array_full = Xb_one_full[ibeg_full:iend_full + 1, :]
        # corresponding spatial coordinates
        coords_array_full = X.coords[ibeg_full:iend_full + 1, :]

        # Are we truncating this variable? (i.e. is it a 2D lat/lon variable?)

        if X.full_state_info[var]['vartype'] == '2D:horizontal':
            print(var, ' : 2D lat/lon variable, truncating this variable')
            # lat/lon column indices in X.coords
            ind_lon = X.full_state_info[var]['spacecoords'].index('lon')
            ind_lat = X.full_state_info[var]['spacecoords'].index('lat')
            nlat = X.full_state_info[var]['spacedims'][ind_lat]
            nlon = X.full_state_info[var]['spacedims'][ind_lon]

            # calculate the truncated fieldNtimes
            if regrid_method == 'simple':
                [var_array_new, lat_new, lon_new] = \
                    LMR_utils.regrid_simple(nens, var_array_full, coords_array_full, \
                                            ind_lat, ind_lon, regrid_resolution)
            elif regrid_method == 'spherical_harmonics':
                [var_array_new, lat_new, lon_new] = \
                    LMR_utils.regrid_sphere(nlat, nlon, nens, var_array_full, regrid_resolution)
            elif regrid_method == 'esmpy':
                target_grid = prior.esmpy_grid_def

                lat_2d = coords_array_full[:, ind_lat].reshape(nlat, nlon)
                lon_2d = coords_array_full[:, ind_lon].reshape(nlat, nlon)

                [var_array_new, lat_new, lon_new
                 ] = LMR_utils.regrid_esmpy(target_grid['nlat'],
                                            target_grid['nlon'],
                                            nens,
                                            var_array_full,
                                            lat_2d,
                                            lon_2d,
                                            nlat,
                                            nlon,
                                            method=prior.esmpy_interp_method)
            else:
                print('Exiting! Unrecognized regridding method.')
                raise SystemExit

            nlat_new = np.shape(lat_new)[0]
            nlon_new = np.shape(lat_new)[1]

            print(('=> Full array:      ' + str(np.min(var_array_full)) + ' ' +
                   str(np.max(var_array_full)) + ' ' +
                   str(np.mean(var_array_full)) + ' ' +
                   str(np.std(var_array_full))))
            print(('=> Truncated array: ' + str(np.min(var_array_new)) + ' ' +
                   str(np.max(var_array_new)) + ' ' +
                   str(np.mean(var_array_new)) + ' ' +
                   str(np.std(var_array_new))))

            # corresponding indices in truncated state vector
            ibeg_new = Nx
            iend_new = Nx + (nlat_new * nlon_new) - 1
            # for new state info dictionary
            dct['pos'] = (ibeg_new, iend_new)
            dct['spacecoords'] = X.full_state_info[var]['spacecoords']
            dct['spacedims'] = (nlat_new, nlon_new)
            # updated dimension
            new_dims = (nlat_new * nlon_new)

            # array with new spatial coords
            coords_array_new = np.zeros(shape=[new_dims, 2])
            coords_array_new[:, 0] = lat_new.flatten()
            coords_array_new[:, 1] = lon_new.flatten()

        else:
            print(var,\
                ' : not truncating this variable: no changes from full state')

            var_array_new = var_array_full
            coords_array_new = coords_array_full
            # updated dimension
            new_dims = var_array_new.shape[0]
            ibeg_new = Nx
            iend_new = Nx + new_dims - 1
            dct['pos'] = (ibeg_new, iend_new)
            dct['spacecoords'] = X.full_state_info[var]['spacecoords']
            dct['spacedims'] = X.full_state_info[var]['spacedims']

        # fill in new state info dictionary
        new_state_info[var] = dct

        # if 1st time in loop over state variables, create Xb_one array as copy
        # of var_array_new
        if Nx == 0:
            Xb_one = np.copy(var_array_new)
            Xb_one_coords = np.copy(coords_array_new)
        else:  # if not 1st time, append to existing array
            Xb_one = np.append(Xb_one, var_array_new, axis=0)
            Xb_one_coords = np.append(Xb_one_coords, coords_array_new, axis=0)

        # making sure Xb_one has proper mask, if it contains
        # at least one invalid value
        if np.isnan(Xb_one).any():
            Xb_one = np.ma.masked_invalid(Xb_one)
            np.ma.set_fill_value(Xb_one, np.nan)

        # updating dimension of new state vector
        Nx = Nx + new_dims

        # LMR_lite specific mod: update lat,lon information in X for later use
        X.prior_dict[var]['lat'] = lat_new
        X.prior_dict[var]['lon'] = lon_new

        # end loop over vars

    X.trunc_state_info = new_state_info

    return X, Xb_one