def test_averaging_period_negative_indices(): res = Utils.get_averaging_period((-9, -11, 0, 2, 4), nelem_in_yr=12, is_zero_based=True) assert res == (9, 11, 12, 14, 16) res = Utils.get_averaging_period((-10, -11, -12, 1, 2, 3), nelem_in_yr=12) assert res == (9, 10, 11, 12, 13, 14)
def test_generate_latlon_include_lat_endpts(): lats, lons, clats, clons = Utils.generate_latlon(3, 5, include_endpts=True) np.testing.assert_equal(lats[:, 0], [-90, 0, 90]) assert clats[0] == -90 assert clats[-1] == 90 lats, lons, clats, clons = Utils.generate_latlon(4, 5, include_endpts=True) np.testing.assert_equal(lats[:, 0], [-90, -30, 30, 90])
def test_averaging_period_zero_vs_nonzero_indexed(): res = Utils.get_averaging_period([0, 1, 2, 3], nelem_in_yr=12, is_zero_based=True) assert res == (0, 1, 2, 3) res = Utils.get_averaging_period([1, 2, 3, 4], nelem_in_yr=12) assert res == (0, 1, 2, 3) with pytest.raises(ValueError): Utils.get_averaging_period([0, 1, 2, 3], 12, is_zero_based=False)
def test_calc_latlon_bnd_regular_grid(): irregular_data = np.array([1, 2, 3, 5, 8, 13, 21], dtype=np.float32) regular_data = np.arange(10) irreg_bnds = [0.5, 1.5, 2.5, 4, 6.5, 10.5, 17, 25] reg_bnds = np.arange(11) - 0.5 lat_bnds, lon_bnds = Utils.calculate_latlon_bnds(regular_data, irregular_data) np.testing.assert_equal(lat_bnds, reg_bnds) np.testing.assert_equal(lon_bnds, irreg_bnds) lat_bnds, lon_bnds = Utils.calculate_latlon_bnds(irregular_data, regular_data) np.testing.assert_equal(lat_bnds, irreg_bnds) np.testing.assert_equal(lon_bnds, reg_bnds)
def ce_r_ens_avg(gmt_ens, times, analysis_gmt, analysis_times, trange=(1880, 2000), center_trange=(1900, 2000)): start, end = trange # Get the global mean for the analysis dataset gmt_ens = center_to_time_range(times, gmt_ens, time_axis=-1, trange=center_trange) # Get a mask for overlapping times analysis_tidx = (analysis_times >= start) & (analysis_times <= end) lmr_tidx = (times >= start) & (times <= end) ens_ce = \ np.array([utils2.coefficient_efficiency(analysis_gmt[analysis_tidx], a_ens_gmt[lmr_tidx]) for a_ens_gmt in gmt_ens]) ens_ce[ens_ce == 1] = np.nan ens_r = np.array([ np.corrcoef(analysis_gmt[analysis_tidx], a_ens_gmt[lmr_tidx])[0, 1] for a_ens_gmt in gmt_ens ]) return ens_ce, ens_r
def test_calc_latlon_bnd_bounds(): lat_data = np.array([-33.75, -11.25, 11.25, 33.75]) lon_data = np.array([18, 54, 90, 126, 162]) lat_bnds, lon_bnds = Utils.calculate_latlon_bnds(lat_data, lon_data) np.testing.assert_equal(lat_bnds, [-45, -22.5, 0, 22.5, 45]) np.testing.assert_equal(lon_bnds, [0, 36, 72, 108, 144, 180])
def test_calc_latlon_bnd_bounds_half_shift(): lat_data = np.array([-90, -60, -30, 0, 30, 60, 90]) lon_data = np.array([0, 90, 180, 270]) lat_bnds, lon_bnds = Utils.calculate_latlon_bnds(lat_data, lon_data) np.testing.assert_equal(lat_bnds, [-90, -75, -45, -15, 15, 45, 75, 90]) np.testing.assert_equal(lon_bnds, [-45, 45, 135, 225, 315])
def test_generate_latlon_output_shp(): nlats = 4 nlons = 5 lats, lons, clats, clons = Utils.generate_latlon(nlats, nlons) assert lats.shape == (4, 5) assert lons.shape == (4, 5) assert clats.shape == (5,) assert clons.shape == (6,)
def test_generate_latlon_center_corner(): lats, lons, clats, clons = Utils.generate_latlon(4,5, lat_bnd=(-45, 45), lon_bnd=(0, 180)) np.testing.assert_equal(lats[:, 0], [-33.75, -11.25, 11.25, 33.75]) np.testing.assert_equal(lons[0], [0, 36, 72, 108, 144]) np.testing.assert_equal(clats, [-45, -22.5, 0, 22.5, 45]) np.testing.assert_equal(clons, [-18, 18, 54, 90, 126, 162])
def test_global_mean2(ncf_data): dat = ncf_data['air'][0:4] lat = ncf_data['lat'][:] lon = ncf_data['lon'][:] longrid, latgrid = np.meshgrid(lon, lat) gm_time, _, _ = Utils.global_hemispheric_means(dat, lat) gm0, _, _ = Utils.global_hemispheric_means(dat[0], lat) # with time gm_test = Utils.global_mean2(dat, lat) np.testing.assert_allclose(gm_test, gm_time) # flattened lat w/ time flat_dat = dat.reshape(4, 94*192) gm_test = Utils.global_mean2(flat_dat, latgrid.flatten()) np.testing.assert_allclose(gm_test, gm_time) # no time gm_test = Utils.global_mean2(dat[0], lat) np.testing.assert_allclose(gm_test, gm0) # no time flattened spatial gm_test = Utils.global_mean2(dat[0].flatten(), latgrid.flatten()) np.testing.assert_allclose(gm_test, gm0) # NaN values dat[:, 0, :] = np.nan gm_nan_time, _, _ = Utils.global_hemispheric_means(dat, lat) gm_nan_test = Utils.global_mean2(dat, lat) np.testing.assert_allclose(gm_nan_test, gm_nan_time) # Test hemispheric gm_time, nhm_time, shm_time = Utils.global_hemispheric_means(dat, lat) gm_test, nhm_test, shm_test = Utils.global_mean2(dat, lat, output_hemispheric=True) np.testing.assert_allclose(gm_test, gm_time) np.testing.assert_allclose(nhm_test, nhm_time) np.testing.assert_allclose(shm_test, shm_time)
def calc_extent(SIC, GRID, CUTOFF): # sie_nhtot = {} #for ref_dset in dset_chosen: sie_lalo = SIC sie_lalo[sie_lalo <= CUTOFF] = 0.0 sie_lalo[sie_lalo > CUTOFF] = 100.0 _, sie_nhtot, _ = lmr.global_hemispheric_means(sie_lalo, GRID.lat[:, 0]) return sie_nhtot, sie_lalo
def test_averaging_period_span_greater_than_nelem_in_yr(): with pytest.raises(ValueError): Utils.get_averaging_period([-1, 0, 11], nelem_in_yr=12, is_zero_based=True) with pytest.raises(ValueError): Utils.get_averaging_period([0, 12], nelem_in_yr=12, is_zero_based=True) with pytest.raises(ValueError): Utils.get_averaging_period([-6, 7], 12) with pytest.raises(ValueError): Utils.get_averaging_period([4, 16], 12)
def calc_analysis_gmt(analysis_var_obj, trange=(1880, 2000), center_trange=(1900, 2000), detrend=False): gm_analysis = utils2.global_mean2(analysis_var_obj.data, analysis_var_obj.lat) gm_analysis = center_to_time_range(analysis_var_obj.time, gm_analysis, 0, trange=center_trange) analysis_tidx = ((analysis_var_obj.time >= trange[0]) & (analysis_var_obj.time <= trange[1])) gm_analysis = gm_analysis[analysis_tidx] if detrend: linfit_line, coef = detrend_data( analysis_var_obj.time[analysis_tidx][:, None], gm_analysis[:, None], ret_coef=True) gm_analysis -= linfit_line.squeeze() return gm_analysis
def make_obs(ob_lat, ob_lon, dat_lat, dat_lon, dat, verbose=False): """ make observations from a gridded dataset given lat and lon locations Inputs: ob_lat, ob_lon: vector lat,lon coordinates of observations. dat_lat,dat_lon: vector lat,lon coordinates of input data dat: array of input data from which observations are drawn. (ntimes,nlat,nlon) Output: obs: the observations [nobs,nyears] """ nyears = dat.shape[0] if verbose: print('nyears: ' + str(nyears)) nobs = len(ob_lat) * len(ob_lon) if verbose: print('nobs: ' + str(nobs)) # initialize obs = np.zeros([nobs, nyears]) obs_ind_lat = np.zeros(nobs) obs_ind_lon = np.zeros(nobs) k = -1 # make the obs for lon in ob_lon: for lat in ob_lat: k = k + 1 dist = LMR_utils.get_distance(lon, lat, dat_lon, dat_lat) jind, kind = np.unravel_index(dist.argmin(), dist.shape) obs[k, :] = dat[:, jind, kind] obs_ind_lat[k] = jind obs_ind_lon[k] = kind #print(lat,jind,kind,ob[100,k]) return obs, obs_ind_lat, obs_ind_lon
def find_ce_corr(VAR, REF, REF_TIME, VAR_TIME, START_TIME, END_TIME, detrend=False): """Finds the correlation coefficient and coefficient of efficiency between REF and VAR between START_TIME and END_TIME. inputs: VAR = test data (1D in time) REF = reference data (1D in time) REF_time = reference data time (1D time) VAR_TIME = test data time (1D years) START_TIME = comparison start year to be included (float) END_TIME = last year included in comparison (float) """ yr_range_var = np.where((VAR_TIME >= START_TIME) & (VAR_TIME < END_TIME + 1)) yr_range_ref = np.where((REF_TIME >= START_TIME) & (REF_TIME < END_TIME + 1)) if detrend is False: ref = REF[yr_range_ref[0]] var = VAR[yr_range_var[0]] else: ref = spy.detrend(REF[yr_range_ref]) var = spy.detrend(VAR[yr_range_var]) ce = lmr.coefficient_efficiency(ref, var) corr = np.corrcoef(ref, var)[0, 1] var_ref = np.var(ref) var_var = np.var(var) return ce, corr, var_ref, var_var
def test_averaging_period_nelem_greater_than_nelem_in_yr(): with pytest.raises(ValueError): Utils.get_averaging_period((1, 2, 3, 4, 5), nelem_in_yr=3)
def load_config(yaml_file, verbose=False): begin_time = time() if not LMR_config.LEGACY_CONFIG: try: if verbose: print('Loading configuration: {}'.format(yaml_file)) f = open(yaml_file, 'r') yml_dict = yaml.load(f) update_result = LMR_config.update_config_class_yaml( yml_dict, LMR_config) # Check that all yml params match value in LMR_config if update_result: raise SystemExit( 'Extra or mismatching values found in the configuration yaml' ' file. Please fix or remove them.\n Residual parameters:\n ' '{}'.format(update_result)) except IOError as e: raise SystemExit( ('Could not locate {}. If use of legacy LMR_config usage is ' 'desired then please change LEGACY_CONFIG to True' 'in LMR_wrapper.py.').format(yaml_file)) # Define main experiment output directory iter_range = LMR_config.wrapper.iter_range expdir = os.path.join(LMR_config.core.datadir_output, LMR_config.core.nexp) arc_dir = os.path.join(LMR_config.core.archive_dir, LMR_config.core.nexp) # Check if it exists, if not, create it if not os.path.isdir(expdir): os.system('mkdir {}'.format(expdir)) # Monte-Carlo approach: loop over iterations (range of iterations defined in # namelist) MCiters = range(iter_range[0], iter_range[1] + 1) param_iterables = [MCiters] # get other parameters to sweep over in the reconstruction param_search = LMR_config.wrapper.param_search if param_search is not None: # sort them by parameter name and combine into a list of iterables sort_params = list(param_search.keys()) sort_params.sort(key=lambda x: x.split('.')[-1]) param_values = [param_search[key] for key in sort_params] param_iterables = param_values + [MCiters] for iter_and_params in itertools.product(*param_iterables): iter_num = iter_and_params[-1] cfg_dict = Utils.param_cfg_update('core.curr_iter', iter_num) if LMR_config.wrapper.multi_seed is not None: curr_seed = LMR_config.wrapper.multi_seed[iter_num] cfg_dict = Utils.param_cfg_update('core.seed', curr_seed, cfg_dict=cfg_dict) #print ('Setting current iteration seed: {}'.format(curr_seed)) itr_str = 'r{:d}'.format(iter_num) # If parameter space search is being performed then set the current # search space values and create a special sub-directory if param_search is not None: curr_param_values = iter_and_params[:-1] cfg_dict, psearch_dir = Utils.psearch_list_cfg_update( sort_params, curr_param_values, cfg_dict=cfg_dict) working_dir = os.path.join(expdir, psearch_dir, itr_str) mc_arc_dir = os.path.join(arc_dir, psearch_dir, itr_str) else: working_dir = os.path.join(expdir, itr_str) mc_arc_dir = os.path.join(arc_dir, itr_str) cfg_params = Utils.param_cfg_update('core.datadir_output', working_dir, cfg_dict=cfg_dict) cfg = LMR_config.Config(**cfg_params) proceed = validate_config(cfg) if not proceed: raise SystemExit() else: print('OK!') pass if verbose: elapsed_time = time() - begin_time print('-----------------------------------------------------') print('completed in ' + str(elapsed_time) + ' seconds') print('-----------------------------------------------------') return cfg
def test_generate_latlon_bnd_limits(): # TODO: could be parametrized input # Defaults Utils.generate_latlon(5, 5) # Bad lat bounds with pytest.raises(ValueError): Utils.generate_latlon(5, 5, lat_bnd=(-100, 45)) with pytest.raises(ValueError): Utils.generate_latlon(5, 5, lat_bnd=(-45, 91)) # Bad lon bounds Utils.generate_latlon(5, 5, lon_bnd=(-90, 270)) with pytest.raises(ValueError): Utils.generate_latlon(5, 5, lon_bnd=(-180, 181)) with pytest.raises(ValueError): Utils.generate_latlon(5, 5, lon_bnd=(-181, 40)) with pytest.raises(ValueError): Utils.generate_latlon(5, 5, lon_bnd=(14, 361))
def test_calc_latlon_bnd_1d_input(): test_data = np.linspace(10, 50, 5) with pytest.raises(ValueError): _ = Utils.calculate_latlon_bnds(test_data[:, None], test_data) with pytest.raises(ValueError): _ = Utils.calculate_latlon_bnds(test_data, test_data[:, None])
cfg = LMRlite.load_config(yaml_file) print('loading proxies...') prox_manager = LMRlite.load_proxies(cfg) print('loading prior...') X, Xb_one = LMRlite.load_prior(cfg) # check if config was set to regrid the prior if cfg.prior.regrid_method: print('regridding prior...') # this function over-writes X, even if return is given a different name [X, Xb_one] = LMRlite.prior_regrid(cfg, X, Xb_one, verbose=True) else: X.trunc_state_info = X.full_state_info print('loading Ye...') Ye_assim, Ye_assim_coords = LMR_utils.load_precalculated_ye_vals_psm_per_proxy( cfg, prox_manager, 'assim', X.prior_sample_indices) #----------------------------------------------------------------- # example reconstruction for one year #----------------------------------------------------------------- target_year = cfg.core.recon_period[0] print('performing a test reconstruction for year:' + str(target_year)) vY, vR, vP, vYe, vT, vYe_coords = LMRlite.get_valid_proxies( cfg, prox_manager, target_year, Ye_assim, Ye_assim_coords) xam, Xap, _ = LMRlite.Kalman_optimal(vY, vR, vYe, Xb_one, verbose=False) xam2, Xap2 = LMRlite.Kalman_ESRF(cfg, vY, vR, vYe, Xb_one, verbose=False) print('ens mean max difference from different solvers...', str(np.max(np.abs((xam2 - xam) / xam)))) #----------------------------------------------------------------- # reconstruction over recon_period, computing GMT on the way
def cov_localization(locRad, Y, X, X_coords): """ Originator: R. Tardif, Dept. Atmos. Sciences, Univ. of Washington ----------------------------------------------------------------- Inputs: locRad : Localization radius (distance in km beyond which cov are forced to zero) Y : Proxy object, needed to get ob site lat/lon (to calculate distances w.r.t. grid pts X : Prior object, needed to get state vector info. X_coords : Array containing geographic location information of state vector elements Output: covLoc : Localization vector (weights) applied to ensemble covariance estimates. Dims = (Nx x 1), with Nx the dimension of the state vector. Note: Uses the Gaspari-Cohn localization function. """ # declare the localization array, filled with ones to start with (as in no localization) stateVectDim, nbdimcoord = X_coords.shape covLoc = np.ones(shape=[stateVectDim], dtype=np.float64) # Mask to identify elements of state vector that are "localizeable" # i.e. fields with (lat,lon) localizeable = covLoc == 1. # Initialize as True for var in X.trunc_state_info.keys(): [var_state_pos_begin, var_state_pos_end] = X.trunc_state_info[var]['pos'] # if variable is not a field with lats & lons, tag localizeable as False if X.trunc_state_info[var]['spacecoords'] != ('lat', 'lon'): localizeable[var_state_pos_begin:var_state_pos_end + 1] = False # array of distances between state vector elements & proxy site # initialized as zeros: this is important! dists = np.zeros(shape=[stateVectDim]) # geographic location of proxy site site_lat = Y.lat site_lon = Y.lon # geographic locations of elements of state vector X_lon = X_coords[:, 1] X_lat = X_coords[:, 0] # calculate distances for elements tagged as "localizeable". dists[localizeable] = np.array(LMR_utils.haversine(site_lon, site_lat, X_lon[localizeable], X_lat[localizeable]), dtype=np.float64) # those not "localizeable" are assigned with a disdtance of "nan" # so these elements will not be included in the indexing # according to distances (see below) dists[~localizeable] = np.nan # Some transformation to variables used in calculating localization weights hlr = 0.5 * locRad # work with half the localization radius r = dists / hlr # indexing w.r.t. distances ind_inner = np.where(dists <= hlr) # closest ind_outer = np.where(dists > hlr) # close ind_out = np.where(dists > 2. * hlr) # out # Gaspari-Cohn function # for pts within 1/2 of localization radius covLoc[ind_inner] = (((-0.25*r[ind_inner]+0.5)*r[ind_inner]+0.625)* \ r[ind_inner]-(5.0/3.0))*(r[ind_inner]**2)+1.0 # for pts between 1/2 and one localization radius covLoc[ind_outer] = ((((r[ind_outer]/12. - 0.5) * r[ind_outer] + 0.625) * \ r[ind_outer] + 5.0/3.0) * r[ind_outer] - 5.0) * \ r[ind_outer] + 4.0 - 2.0/(3.0*r[ind_outer]) # Impose zero for pts outside of localization radius covLoc[ind_out] = 0.0 # prevent negative values: calc. above may produce tiny negative # values for distances very near the localization radius # TODO: revisit calculations to minimize round-off errors covLoc[covLoc < 0.0] = 0.0 return covLoc
def LMR_driver_callable(cfg=None): if cfg is None: cfg = BaseCfg.Config() # Use base configuration from LMR_config # Temporary fix for old 'state usage' core = cfg.core prior = cfg.prior # verbose controls print comments (0 = none; 1 = most important; # 2 = many; 3 = a lot; >=4 = all) verbose = cfg.LOG_LEVEL nexp = core.nexp workdir = core.datadir_output recon_period = core.recon_period recon_timescale = core.recon_timescale online = core.online_reconstruction nens = core.nens loc_rad = core.loc_rad inflation_fact = core.inflation_fact prior_source = prior.prior_source datadir_prior = prior.datadir_prior datafile_prior = prior.datafile_prior state_variables = prior.state_variables state_variables_info = prior.state_variables_info regrid_method = prior.regrid_method regrid_resolution = prior.regrid_resolution # ========================================================================== # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< MAIN CODE >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> # ========================================================================== # TODO: AP Logging instead of print statements if verbose > 0: print('') print('=====================================================') print('Running LMR reconstruction...') print('=====================================================') print('Name of experiment: ', nexp) print(' Monte Carlo iter : ', core.curr_iter) print('') begin_time = time() # Define the number of years of the reconstruction # (nb of assimilation times) recon_times = np.arange(recon_period[0], recon_period[1] + 1, recon_timescale) ntimes, = recon_times.shape # ========================================================================== # Load prior data ---------------------------------------------------------- # ========================================================================== if verbose > 0: print('-------------------------------------------') print('Uploading gridded (model) data as prior ...') print('-------------------------------------------') print('Source for prior: ', prior_source) # Assign prior object according to "prior_source" (from namelist) X = LMR_prior.prior_assignment(prior_source) # TODO: AP explicit requirements # add namelist attributes to the prior object X.prior_datadir = datadir_prior X.prior_datafile = datafile_prior X.statevars = state_variables X.statevars_info = state_variables_info X.Nens = nens # Use a specified reference period for state variable anomalies X.anom_reference = prior.anom_reference # new option: detrending the prior X.detrend = prior.detrend print('detrend:', X.detrend) X.avgInterval = prior.avgInterval # Read data file & populate initial prior ensemble X.populate_ensemble(prior_source, prior) Xb_one_full = X.ens # Prepare to check for files in the prior (work) directory (this object just # points to a directory) prior_check = np.DataSource(workdir) load_time = time() - begin_time if verbose > 2: print('-----------------------------------------------------') print('Loading completed in ' + str(load_time) + ' seconds') print('-----------------------------------------------------') # check covariance inflation from config inflate = None if inflation_fact is not None: inflate = inflation_fact if verbose > 2: print(('\nUsing covariance inflation factor: %8.2f' % inflate)) # ========================================================================== # Get information on proxies to assimilate --------------------------------- # ========================================================================== begin_time_proxy_load = time() if verbose > 0: print('') print('-----------------------------------') print('Uploading proxy data & PSM info ...') print('-----------------------------------') # Build dictionaries of proxy sites to assimilate and those set aside for # verification prox_manager = LMR_proxy_pandas_rework.ProxyManager(cfg, recon_period) type_site_assim = prox_manager.assim_ids_by_group if verbose > 3: print('Assimilating proxy types/sites:', type_site_assim) if verbose > 0: print( '--------------------------------------------------------------------' ) print('Proxy counts for experiment:') # count the total number of proxies assim_proxy_count = len(prox_manager.ind_assim) for pkey, plist in sorted(type_site_assim.items()): print(('%45s : %5d' % (pkey, len(plist)))) print(('%45s : %5d' % ('TOTAL', assim_proxy_count))) print( '--------------------------------------------------------------------' ) if verbose > 2: proxy_load_time = time() - begin_time_proxy_load print('-----------------------------------------------------') print('Loading completed in ' + str(proxy_load_time) + ' seconds') print('-----------------------------------------------------') # ========================================================================== # Calculate truncated state from prior, if option chosen ------------------- # ========================================================================== if regrid_method: # Declare dictionary w/ info on content of truncated state vector new_state_info = {} # Transform every 2D state variable, one at a time Nx = 0 for var in list(X.full_state_info.keys()): dct = {} dct['vartype'] = X.full_state_info[var]['vartype'] # variable indices in full state vector ibeg_full = X.full_state_info[var]['pos'][0] iend_full = X.full_state_info[var]['pos'][1] # extract array corresponding to state variable "var" var_array_full = Xb_one_full[ibeg_full:iend_full + 1, :] # corresponding spatial coordinates coords_array_full = X.coords[ibeg_full:iend_full + 1, :] # Are we truncating this variable? (i.e. is it a 2D lat/lon variable?) if X.full_state_info[var]['vartype'] == '2D:horizontal': print(var, ' : 2D lat/lon variable, truncating this variable') # lat/lon column indices in X.coords ind_lon = X.full_state_info[var]['spacecoords'].index('lon') ind_lat = X.full_state_info[var]['spacecoords'].index('lat') nlat = X.full_state_info[var]['spacedims'][ind_lat] nlon = X.full_state_info[var]['spacedims'][ind_lon] # calculate the truncated fieldNtimes if regrid_method == 'simple': [var_array_new, lat_new, lon_new] = \ LMR_utils.regrid_simple(nens, var_array_full, coords_array_full, \ ind_lat, ind_lon, regrid_resolution) elif regrid_method == 'spherical_harmonics': [var_array_new, lat_new, lon_new] = \ LMR_utils.regrid_sphere(nlat, nlon, nens, var_array_full, regrid_resolution) elif regrid_method == 'esmpy': target_grid = prior.esmpy_grid_def lat_2d = coords_array_full[:, ind_lat].reshape(nlat, nlon) lon_2d = coords_array_full[:, ind_lon].reshape(nlat, nlon) [var_array_new, lat_new, lon_new] = LMR_utils.regrid_esmpy( target_grid['nlat'], target_grid['nlon'], nens, var_array_full, lat_2d, lon_2d, nlat, nlon, include_poles=target_grid['include_poles'], method=prior.esmpy_interp_method) else: print('Exiting! Unrecognized regridding method.') raise SystemExit nlat_new = np.shape(lat_new)[0] nlon_new = np.shape(lat_new)[1] print(('=> Full array: ' + str(np.min(var_array_full)) + ' ' + str(np.max(var_array_full)) + ' ' + str(np.mean(var_array_full)) + ' ' + str(np.std(var_array_full)))) print(('=> Truncated array: ' + str(np.min(var_array_new)) + ' ' + str(np.max(var_array_new)) + ' ' + str(np.mean(var_array_new)) + ' ' + str(np.std(var_array_new)))) # corresponding indices in truncated state vector ibeg_new = Nx iend_new = Nx + (nlat_new * nlon_new) - 1 # for new state info dictionary dct['pos'] = (ibeg_new, iend_new) dct['spacecoords'] = X.full_state_info[var]['spacecoords'] dct['spacedims'] = (nlat_new, nlon_new) # updated dimension new_dims = (nlat_new * nlon_new) # array with new spatial coords coords_array_new = np.zeros(shape=[new_dims, 2]) coords_array_new[:, 0] = lat_new.flatten() coords_array_new[:, 1] = lon_new.flatten() else: print(var,\ ' : not truncating this variable: no changes from full state') var_array_new = var_array_full coords_array_new = coords_array_full # updated dimension new_dims = var_array_new.shape[0] ibeg_new = Nx iend_new = Nx + new_dims - 1 dct['pos'] = (ibeg_new, iend_new) dct['spacecoords'] = X.full_state_info[var]['spacecoords'] dct['spacedims'] = X.full_state_info[var]['spacedims'] # fill in new state info dictionary new_state_info[var] = dct # if 1st time in loop over state variables, create Xb_one array as copy # of var_array_new if Nx == 0: Xb_one = np.copy(var_array_new) Xb_one_coords = np.copy(coords_array_new) else: # if not 1st time, append to existing array Xb_one = np.append(Xb_one, var_array_new, axis=0) Xb_one_coords = np.append(Xb_one_coords, coords_array_new, axis=0) # making sure Xb_one has proper mask, if it contains # at least one invalid value if np.isnan(Xb_one).any(): Xb_one = np.ma.masked_invalid(Xb_one) np.ma.set_fill_value(Xb_one, np.nan) # updating dimension of new state vector Nx = Nx + new_dims X.trunc_state_info = new_state_info else: # no truncation: carry over full state to working array X.trunc_state_info = X.full_state_info Xb_one = Xb_one_full Xb_one_coords = X.coords [Nx, _] = Xb_one.shape # Keep dimension of pre-augmented version of state vector [state_dim, _] = Xb_one.shape # ========================================================================== # Calculate all Ye's (for all sites in sites_assim) ------------------------ # ========================================================================== # Load or generate Ye Values for assimilation if not online: # Load pre calculated ye values if desired or possible try: if not cfg.core.use_precalc_ye: raise FlagError( 'use_precalc_ye=False: forego loading precalcul' 'ated ye values.') print( 'Loading precalculated Ye values for proxies to be assimilated.' ) [Ye_assim, Ye_assim_coords ] = LMR_utils.load_precalculated_ye_vals_psm_per_proxy( cfg, prox_manager, 'assim', X.prior_sample_indices) eval_proxy_count = 0 if prox_manager.ind_eval: print('Loading precalculated Ye values for withheld proxies.') [Ye_eval, Ye_eval_coords ] = LMR_utils.load_precalculated_ye_vals_psm_per_proxy( cfg, prox_manager, 'eval', X.prior_sample_indices) [eval_proxy_count, _] = Ye_eval.shape except (IOError, FlagError) as e: print(e) # Manually calculate ye_values from state vector print('Calculating ye_values from the prior...') Ye_assim = np.empty(shape=[assim_proxy_count, nens]) Ye_assim_coords = np.empty(shape=[assim_proxy_count, 2]) for k, proxy in enumerate(prox_manager.sites_assim_proxy_objs()): Ye_assim[k, :] = proxy.psm(Xb_one_full, X.full_state_info, X.coords) Ye_assim_coords[k, :] = np.asarray([proxy.lat, proxy.lon], dtype=np.float64) eval_proxy_count = 0 if prox_manager.ind_eval: eval_proxy_count = len(prox_manager.ind_eval) Ye_eval = np.empty(shape=[eval_proxy_count, nens]) Ye_eval_coords = np.empty(shape=[eval_proxy_count, 2]) for k, proxy in enumerate( prox_manager.sites_eval_proxy_objs()): Ye_eval[k, :] = proxy.psm(Xb_one_full, X.full_state_info, X.coords) Ye_eval_coords[k, :] = np.asarray([proxy.lat, proxy.lon], dtype=np.float64) # ---------------------------------- # Augment state vector with the Ye's # ---------------------------------- # Append ensemble of Ye's of assimilated proxies to prior state vector Xb_one_aug = np.append(Xb_one, Ye_assim, axis=0) Xb_one_coords = np.append(Xb_one_coords, Ye_assim_coords, axis=0) if prox_manager.ind_eval: # Append ensemble of Ye's of withheld proxies to prior state vector Xb_one_aug = np.append(Xb_one_aug, Ye_eval, axis=0) Xb_one_coords = np.append(Xb_one_coords, Ye_eval_coords, axis=0) else: Xb_one_aug = Xb_one # Dump entire prior state vector (Xb_one) to file filen = workdir + '/' + 'Xb_one' try: out_Xb_one = Xb_one.filled() out_Xb_one_aug = Xb_one_aug.filled() except AttributeError as e: out_Xb_one = Xb_one out_Xb_one_aug = Xb_one_aug np.savez(filen, Xb_one=out_Xb_one, Xb_one_aug=out_Xb_one_aug, stateDim=state_dim, Xb_one_coords=Xb_one_coords, state_info=X.trunc_state_info) # NEW: write out (to prior_sampling_info.txt file) the info on prior sampling # i.e. the list of indices (i.e. years for annual recons) randomly chosen # from available model states prior_samples = open(workdir + '/prior_sampling_info.txt', 'w') # include a header prior_samples.write( '# List of indices (i.e. years if annual recon) randomly' ' sampled from model output to form the prior(ensemble):\n') if core.seed is not None: prior_samples.write('# with seed=%d \n' % (core.seed)) else: prior_samples.write('# with seed=None \n') # write out the list prior_samples.write(str(X.prior_sample_indices) + '\n') prior_samples.close() # NEW: Dump prior state vector (Xb_one) to file, one file per state variable print( '\n ---------- saving Xb_one for each variable to separate file -----------\n' ) for var in list(X.trunc_state_info.keys()): print(var) # now need to pluck off the index region that goes with var ibeg = X.trunc_state_info[var]['pos'][0] iend = X.trunc_state_info[var]['pos'][1] if X.trunc_state_info[var]['vartype'] == '2D:horizontal': # if no truncation: lat_new and lon_new are not defined...rather get actual lats/lons info from state vector ind_lon = X.trunc_state_info[var]['spacecoords'].index('lon') ind_lat = X.trunc_state_info[var]['spacecoords'].index('lat') nlon_new = X.trunc_state_info[var]['spacedims'][ind_lon] nlat_new = X.trunc_state_info[var]['spacedims'][ind_lat] lat_sv = Xb_one_coords[ibeg:iend + 1, ind_lat] lon_sv = Xb_one_coords[ibeg:iend + 1, ind_lon] lat_new = np.unique(lat_sv) lon_new = np.unique(lon_sv) Xb_var = np.reshape(out_Xb_one[ibeg:iend + 1, :], (nlat_new, nlon_new, nens)) filen = workdir + '/' + 'Xb_one' + '_' + var np.savez(filen, Xb_var=Xb_var, nlat=nlat_new, nlon=nlon_new, nens=nens, lat=lat_new, lon=lon_new) else: print(( 'Warning: Only saving 2D:horizontal variable. Variable (%s) is of another type' % (var))) # TODO: Code mods above are a quick fix. Should allow saving other types of variables here! # END new file save # ========================================================================== # Loop over all years & proxies and perform assimilation ------------------- # ========================================================================== # Array containing the global and hemispheric-mean state # (for diagnostic purposes) # Now doing surface air temperature only (var = tas_sfc_Amon)! # TODO: AP temporary fix for no TAS in state tas_var = [ item for item in cfg.prior.state_variables.keys() if 'tas_sfc_' in item ] if tas_var: gmt_save = np.zeros([assim_proxy_count + 1, ntimes]) nhmt_save = np.zeros([assim_proxy_count + 1, ntimes]) shmt_save = np.zeros([assim_proxy_count + 1, ntimes]) # get state vector indices where to find surface air temperature ibeg_tas = X.trunc_state_info[tas_var[0]]['pos'][0] iend_tas = X.trunc_state_info[tas_var[0]]['pos'][1] xbm = np.mean(Xb_one[ibeg_tas:iend_tas + 1, :], axis=1) # ensemble-mean nlat_new = X.trunc_state_info[tas_var[0]]['spacedims'][0] nlon_new = X.trunc_state_info[tas_var[0]]['spacedims'][1] xbm_lalo = xbm.reshape(nlat_new, nlon_new) lat_coords = Xb_one_coords[ibeg_tas:iend_tas + 1, 0] lat_lalo = lat_coords.reshape(nlat_new, nlon_new) [gmt, nhmt, shmt] = LMR_utils.global_hemispheric_means(xbm_lalo, lat_lalo[:, 0]) # First row is prior GMT gmt_save[0, :] = gmt nhmt_save[0, :] = nhmt shmt_save[0, :] = shmt # Prior for first proxy assimilated gmt_save[1, :] = gmt nhmt_save[1, :] = nhmt shmt_save[1, :] = shmt # ------------------------------------- # Loop over years of the reconstruction # ------------------------------------- lasttime = time() for yr_idx, t in enumerate( range(recon_period[0], recon_period[1] + 1, recon_timescale)): start_yr = int(t - recon_timescale // 2) end_yr = int(t + recon_timescale // 2) if verbose > 0: if start_yr == end_yr: time_str = 'year: ' + str(t) else: time_str = 'time period (yrs): [' + str(start_yr) + ',' + str( end_yr) + ']' print('\n==== Working on ' + time_str) ypad = '{:07d}'.format(t) filen = join(workdir, 'year' + ypad + '.npy') if prior_check.exists(filen) and not core.clean_start: if verbose > 2: print('prior file exists: ' + filen) Xb = np.load(filen) else: if verbose > 2: print('Prior file ', filen, ' does not exist...') Xb = Xb_one_aug.copy() # ----------------- # Loop over proxies # ----------------- for proxy_idx, Y in enumerate(prox_manager.sites_assim_proxy_objs()): # Check if we have proxy ob for current time interval try: if recon_timescale > 1: # exclude lower bound to not include same obs in adjacent time intervals Yvals = Y.values[(Y.values.index > start_yr) & (Y.values.index <= end_yr)] else: Yvals = Y.values[(Y.values.index >= start_yr) & (Y.values.index <= end_yr)] if Yvals.empty: raise KeyError() nYobs = len(Yvals) Yobs = Yvals.mean() except KeyError: # Make sure GMT spot filled from previous proxy # TODO: AP temporary fix for no TAS in state if tas_var: gmt_save[proxy_idx + 1, yr_idx] = gmt_save[proxy_idx, yr_idx] continue # skip to next loop iteration (proxy record) if verbose > 1: print('--------------- Processing proxy: ' + Y.id) if verbose > 2: print('Site:', Y.id, ':', Y.type) print(' latitude, longitude: ' + str(Y.lat), str(Y.lon)) loc = None if loc_rad is not None: if verbose > 2: print('...computing localization...') loc = cov_localization(loc_rad, Y, X, Xb_one_coords) # Get Ye values for current proxy if online: # Calculate from latest updated prior Ye = Y.psm(Xb) else: # Extract latest updated Ye from appended state vector Ye = Xb[proxy_idx - (assim_proxy_count + eval_proxy_count)] # Define the ob error variance ob_err = Y.psm_obj.R # if ob is an average of several values, adjust its ob error variance if nYobs > 1: ob_err = ob_err / float(nYobs) # ------------------------------------------------------------------ # Do the update (assimilation) ------------------------------------- # ------------------------------------------------------------------ if verbose > 2: print(('updating time: ' + str(t) + ' proxy value : ' + str(Yobs) + ' (nobs=' + str(nYobs) + ') | mean prior proxy estimate: ' + str(Ye.mean()))) # Update the state Xa = enkf_update_array(Xb, Yobs, Ye, ob_err, loc, inflate) # TODO: AP Temporary fix for no TAS in state if tas_var: xam = Xa.mean(axis=1) xam_lalo = xam[ibeg_tas:(iend_tas + 1)].reshape( nlat_new, nlon_new) [gmt, nhmt, shmt] = \ LMR_utils.global_hemispheric_means(xam_lalo, lat_lalo[:, 0]) gmt_save[proxy_idx + 1, yr_idx] = gmt nhmt_save[proxy_idx + 1, yr_idx] = nhmt shmt_save[proxy_idx + 1, yr_idx] = shmt # add check to detect whether recon has blown-up (and stop it if it has) xbvar = Xb.var(axis=1, ddof=1) xavar = Xa.var(ddof=1, axis=1) vardiff = xavar - xbvar if (not np.isfinite(np.min(vardiff))) or (not np.isfinite( np.max(vardiff))): print('ERROR: Reconstruction has blown-up. Exiting!') raise SystemExit(1) # check the variance change for sign thistime = time() if verbose > 2: #xbvar = Xb.var(axis=1, ddof=1) #xavar = Xa.var(ddof=1, axis=1) #vardiff = xavar - xbvar print('min/max change in variance: (' + str(np.min(vardiff)) + ',' + str(np.max(vardiff)) + ')') print('update took ' + str(thistime - lasttime) + 'seconds') lasttime = thistime # Put analysis Xa in Xb for next assimilation Xb = Xa # End of loop on proxies # Dump Xa to file (use Xb in case no proxies assimilated for # current year) try: np.save(filen, Xb.filled()) except AttributeError as e: np.save(filen, Xb) end_time = time() - begin_time # End of loop on years if verbose > 0: print('') print('=====================================================') print('Reconstruction completed in ' + str(end_time / 60.0) + ' mins') print('=====================================================') # 3 July 2015: compute and save the GMT,NHMT,SHMT for the full ensemble # need to fix this so that every year is counted # TODO: AP temporary fix for no TAS if tas_var: gmt_ensemble = np.zeros([ntimes, nens]) nhmt_ensemble = np.zeros([ntimes, nens]) shmt_ensemble = np.zeros([ntimes, nens]) for iyr, yr in enumerate( range(recon_period[0], recon_period[1] + 1, recon_timescale)): filen = join(workdir, 'year{:07d}'.format(yr)) Xa = np.load(filen + '.npy') for k in range(nens): xam_lalo = Xa[ibeg_tas:iend_tas + 1, k].reshape(nlat_new, nlon_new) [gmt, nhmt, shmt] = \ LMR_utils.global_hemispheric_means(xam_lalo, lat_lalo[:, 0]) gmt_ensemble[iyr, k] = gmt nhmt_ensemble[iyr, k] = nhmt shmt_ensemble[iyr, k] = shmt filen = join(workdir, 'gmt_ensemble') np.savez(filen, gmt_ensemble=gmt_ensemble, nhmt_ensemble=nhmt_ensemble, shmt_ensemble=shmt_ensemble, recon_times=recon_times) # save global mean temperature history and the proxies assimilated print(('saving global mean temperature update history and ', 'assimilated proxies...')) filen = join(workdir, 'gmt') np.savez(filen, gmt_save=gmt_save, nhmt_save=nhmt_save, shmt_save=shmt_save, recon_times=recon_times, apcount=assim_proxy_count, tpcount=assim_proxy_count) # TODO: (AP) The assim/eval lists of lists instead of lists of 1-item dicts assimilated_proxies = [{ p.type: [p.id, p.lat, p.lon, p.time, p.psm_obj.sensitivity] } for p in prox_manager.sites_assim_proxy_objs()] filen = join(workdir, 'assimilated_proxies') np.save(filen, assimilated_proxies) # collecting info on non-assimilated proxies and save to file nonassimilated_proxies = [{ p.type: [p.id, p.lat, p.lon, p.time, p.psm_obj.sensitivity] } for p in prox_manager.sites_eval_proxy_objs()] if nonassimilated_proxies: filen = join(workdir, 'nonassimilated_proxies') np.save(filen, nonassimilated_proxies) exp_end_time = time() - begin_time if verbose > 0: print('') print('=====================================================') print('Experiment completed in ' + str(exp_end_time / 60.0) + ' mins') print('=====================================================') # TODO: best method for Ye saving? return prox_manager.sites_assim_proxy_objs( ), prox_manager.sites_eval_proxy_objs()
def load_analyses(cfg, full_field=False, lmr_gm=None, lmr_time=None, satime=1900, eatime=1999, svtime=1880, evtime=1999): """Need to revise to do two things: 1) GMT for a verification interval and 2) send back the full data from the analyses. Add a flag and switches""" # full_field: Flag for sending back full fields instead of global means # --- define a reference time period for anomalies (e.g., 20th century) # satime: starting year of common reference time period # setime: ending year of common reference time # --- define the time period for verification # svtime: starting year of the verification time period # evtime: ending year of the verification time period # check if a global-mean file has been written previously, and if yes, use it load = False if not full_field: try: filen = 'analyses' + '_' + str(satime) + '_' + str( eatime) + '_' + str(svtime) + '_' + str(evtime) + '.npz' npzfile = np.load(filen) print(filen + ' exists...loading it') load = True analyses = npzfile['analyses'] analysis_data = analyses[0] analysis_time = analyses[1] analysis_lat = {} analysis_lon = {} except: if load: print('analyses.npz exists, but error reading the file!!!') load = False if not load: # ========================================== # load GISTEMP, HadCRU, BerkeleyEarth, MLOST # ========================================== from load_gridded_data import read_gridded_data_GISTEMP from load_gridded_data import read_gridded_data_HadCRUT from load_gridded_data import read_gridded_data_BerkeleyEarth from load_gridded_data import read_gridded_data_MLOST import csv analysis_data = {} analysis_time = {} analysis_lat = {} analysis_lon = {} # location of the datasets from the configuration file datadir_calib = cfg.psm.linear.datadir_calib # load GISTEMP print('loading GISTEMP...') datafile_calib = 'gistemp1200_ERSSTv4.nc' calib_vars = ['Tsfc'] [gtime, GIS_lat, GIS_lon, GIS_anomaly] = read_gridded_data_GISTEMP(datadir_calib, datafile_calib, calib_vars, 'annual', [satime, eatime]) GIS_time = np.array([d.year for d in gtime]) # fix longitude shift nlon_GIS = len(GIS_lon) nlat_GIS = len(GIS_lat) GIS_lon = np.roll(GIS_lon, shift=nlon_GIS // 2, axis=0) GIS_anomaly = np.roll(GIS_anomaly, shift=nlon_GIS // 2, axis=2) analysis_data['GIS'] = GIS_anomaly analysis_time['GIS'] = GIS_time analysis_lat['GIS'] = GIS_lat analysis_lon['GIS'] = GIS_lon # load HadCRUT print('loading HadCRUT...') datafile_calib = 'HadCRUT.4.3.0.0.median.nc' calib_vars = ['Tsfc'] [ctime, CRU_lat, CRU_lon, CRU_anomaly] = read_gridded_data_HadCRUT(datadir_calib, datafile_calib, calib_vars, 'annual', [satime, eatime]) CRU_time = np.array([d.year for d in ctime]) # fix longitude shift nlon_CRU = len(CRU_lon) nlat_CRU = len(CRU_lat) CRU_lon = np.roll(CRU_lon, shift=nlon_CRU // 2, axis=0) CRU_anomaly = np.roll(CRU_anomaly, shift=nlon_CRU // 2, axis=2) analysis_data['CRU'] = CRU_anomaly analysis_time['CRU'] = CRU_time analysis_lat['CRU'] = CRU_lat analysis_lon['CRU'] = CRU_lon # load BerkeleyEarth print('loading BEST...') datafile_calib = 'Land_and_Ocean_LatLong1.nc' calib_vars = ['Tsfc'] [btime, BE_lat, BE_lon, BE_anomaly ] = read_gridded_data_BerkeleyEarth(datadir_calib, datafile_calib, calib_vars, 'annual', ref_period=[satime, eatime]) BE_time = np.array([d.year for d in btime]) # fix longitude shift nlon_BE = BE_lon.shape[0] BE_lon = np.roll(BE_lon, shift=nlon_BE // 2, axis=0) BE_anomaly = np.roll(BE_anomaly, shift=nlon_BE // 2, axis=2) analysis_data['BE'] = BE_anomaly analysis_time['BE'] = BE_time analysis_lat['BE'] = BE_lat analysis_lon['BE'] = BE_lon # load NOAA MLOST # Note: Product is anomalies w.r.t. 1961-1990 mean print('loading MLOST...') #path = datadir_calib + '/NOAA/' datafile_calib = 'MLOST_air.mon.anom_V3.5.4.nc' calib_vars = ['Tsfc'] [mtime, MLOST_lat, MLOST_lon, MLOST_anomaly] = read_gridded_data_MLOST(datadir_calib, datafile_calib, calib_vars, outfreq='annual', ref_period=[satime, eatime]) MLOST_time = np.array([d.year for d in mtime]) nlat_MLOST = len(MLOST_lat) nlon_MLOST = len(MLOST_lon) analysis_data['MLOST'] = MLOST_anomaly analysis_time['MLOST'] = MLOST_time analysis_lat['MLOST'] = MLOST_lat analysis_lon['MLOST'] = MLOST_lon if full_field: print('returning spatial fields...') return analysis_data, analysis_time, analysis_lat, analysis_lon else: if not load: [gis_gm, _, _] = LMR_utils.global_hemispheric_means(GIS_anomaly, GIS_lat) [cru_gm, _, _] = LMR_utils.global_hemispheric_means(CRU_anomaly, CRU_lat) [be_gm, _, _] = LMR_utils.global_hemispheric_means(BE_anomaly, BE_lat) [mlost_gm, _, _] = LMR_utils.global_hemispheric_means(MLOST_anomaly, MLOST_lat) # set common reference period to define anomalies smatch, ematch = LMR_utils.find_date_indices( GIS_time, satime, eatime) gis_gm = gis_gm - np.mean(gis_gm[smatch:ematch]) smatch, ematch = LMR_utils.find_date_indices( CRU_time, satime, eatime) cru_gm = cru_gm - np.mean(cru_gm[smatch:ematch]) smatch, ematch = LMR_utils.find_date_indices( BE_time, satime, eatime) be_gm = be_gm - np.mean(be_gm[smatch:ematch]) smatch, ematch = LMR_utils.find_date_indices( MLOST_time, satime, eatime) mlost_gm = mlost_gm - np.mean(mlost_gm[smatch:ematch]) # now pull out the time window for the verification time period gis_smatch, gis_ematch = LMR_utils.find_date_indices( GIS_time, svtime, evtime) cru_smatch, cru_ematch = LMR_utils.find_date_indices( CRU_time, svtime, evtime) be_smatch, be_ematch = LMR_utils.find_date_indices( BE_time, svtime, evtime) mlost_smatch, mlost_ematch = LMR_utils.find_date_indices( MLOST_time, svtime, evtime) # "consensus" global mean: average all non-LMR (obs-based) values consensus_gmt = np.array([ gis_gm[gis_smatch:gis_ematch], cru_gm[cru_smatch:cru_ematch], be_gm[be_smatch:be_ematch], mlost_gm[mlost_smatch:mlost_ematch] ]) con_gm = np.mean(consensus_gmt, axis=0) CON_time = np.arange(svtime, evtime) CON_time = np.asarray(CON_time) analysis_data['GIS'] = gis_gm[gis_smatch:gis_ematch] analysis_data['CRU'] = cru_gm[cru_smatch:cru_ematch] analysis_data['BE'] = be_gm[be_smatch:be_ematch] analysis_data['MLOST'] = mlost_gm[mlost_smatch:mlost_ematch] analysis_data['CON'] = con_gm analysis_time['CON'] = CON_time # for global mean, there is only one common time series and no lat,lon analysis_time = {} analysis_time['time'] = CON_time analysis_lat = {} analysis_lon = {} # save file for use next time analyses = [analysis_data, analysis_time] readme = 'this files contains gmt for analysis products with anomalies relative to a reference time period' filen = 'analyses' + '_' + str(satime) + '_' + str( eatime) + '_' + str(svtime) + '_' + str(evtime) + '.npz' print('writing to:' + filen) np.savez(filen, analyses=analyses, readme=readme) # LMR GMT was passed to this routine for inclusion in the dictionary if np.any(lmr_gm): lmr_smatch, lmr_ematch = LMR_utils.find_date_indices( lmr_time, svtime, evtime) analysis_data['LMR'] = lmr_gm[lmr_smatch:lmr_ematch] # lat and lon don't inform on global means, but consistent return with full field print('returning global means...') return analysis_data, analysis_time, analysis_lat, analysis_lon
def test_averaging_period_unique_warning(): with pytest.warns(UserWarning): res = Utils.get_averaging_period([1, 8, 3, 1, 8], 12) assert res == (0, 2, 7)
def compile_ens_var(parent_dir, out_dir, out_fname, a_d_vals=None, r_iters=None, ignore_npz=False): if exists(join(out_dir, out_fname)) and not ignore_npz: print('Loading pre-compiled ensemble variance metrics.') return np.load(join(out_dir, out_fname)) # Get reconstruction iteration directory if r_iters is not None: print('Joining on r iters ', r_iters) parent_iters = [join(parent_dir, 'r{:d}'.format(r)) for r in r_iters] else: parent_iters = glob.glob(join(parent_dir, 'r*')) ens_var = None gm_ens_var = None pri_ens_var = None pri_gm_ens_var = None lats = None lons = None times = None for i, parent in enumerate(parent_iters): print('Compiling iteration {:d}/{:d}'.format(i + 1, len(parent_iters))) # Directories for each parameter value if a_d_vals is not None: ad_dir = 'a{:1.2g}_d{:1.2f}' ad_dir2 = 'a{:1.1f}_d{:1.2f}' param_iters = [] for a, d in a_d_vals: curr_ad = ad_dir.format(a, d) if a == 1.0 or a == 0.0: if not exists(join(parent, curr_ad)): curr_ad = ad_dir2.format(a, d) param_iters.append(join(parent, curr_ad)) else: param_iters = [parent] for j, f in enumerate(param_iters): try: # Load analysis ensemble variance analy_var = np.load( join(f, 'ensemble_variance_tas_sfc_Amon.npz')) if times is None: times = analy_var['years'] lats = analy_var['lat'] lons = analy_var['lon'] var_shape = [len(parent_iters), len(param_iters)] + \ list(analy_var['xav'].shape) ens_var = np.zeros(var_shape) * np.nan gm_ens_var = np.zeros(var_shape[:3]) * np.nan pri_ens_var = np.zeros_like(ens_var) * np.nan pri_gm_ens_var = np.zeros_like(gm_ens_var) * np.nan ens_var[i, j] = analy_var['xav'] gm_ens_var[i, j] = utils2.global_mean2(ens_var[i, j], lats) prior_var = np.load(join(f, 'prior_ensvar_tas_sfc_Amon.npz')) pri_ens_var[i, j] = prior_var['xbv'] pri_gm_ens_var[i, j] = utils2.global_mean2( pri_ens_var[i, j], lats) except IOError as e: print(e) ens_var = ens_var.mean(axis=0).astype(np.float32) gm_ens_var = gm_ens_var.mean(axis=0) pri_ens_var = pri_ens_var.mean(axis=0).astype(np.float32) pri_gm_ens_var = pri_gm_ens_var.mean(axis=0) res_dict = { 'times': times, 'lats': lats, 'lons': lons, 'ens_var': ens_var.squeeze(), 'gm_ens_var': gm_ens_var.squeeze(), 'pri_ens_var': pri_ens_var.squeeze(), 'pri_gm_ens_var': pri_gm_ens_var.squeeze() } if not exists(out_dir): os.makedirs(out_dir) np.savez(join(out_dir, out_fname), **res_dict) return res_dict
def test_calc_latlon_bnd_monotonic(): test_data = np.linspace(0, 10, 11) with pytest.raises(ValueError): _ = Utils.calculate_latlon_bnds(test_data[::-1], test_data) with pytest.raises(ValueError): _ = Utils.calculate_latlon_bnds(test_data, test_data[::-1])
def test_averaging_period_sorting(): res = Utils.get_averaging_period((-12, -11, -10, 3, 2, 1), 12) assert res == (9, 10, 11, 12, 13, 14)
stateDim=stateDim, lat=lat_new, lon=lon_new, nlat=nlat_new, nlon=nlon_new) gmt = np.zeros([ntims]) k = -1 for t in range(startim, startim + ntims): k = k + 1 # make up some data with the right shape as in the LMR code (Ndof,Nens) Xa = np.random.randn(Ndof, Nens) xam = np.mean(Xa, axis=1) print('Xa shape: ' + str(np.shape(Xa))) # Dump Xa to file (to be used as prior for next assimilation) ypad = LMR_utils.year_fix(t) filen = workdir + '/' + 'year' + ypad np.save(filen, Xa) # compute global mean for check later xam_lalo = np.reshape(xam[0:stateDim], (nlat_new, nlon_new)) [gmt[k], _, _] = LMR_utils.global_hemispheric_means(xam_lalo, lat_new[:, 0]) # generate the ensemble-mean files as in LMR_wrapper.py LMR_utils.ensemble_mean(workdir) # # now "post-process" the file just written as in verify_grid_testing.py #
MCiters = range(iter_range[0], iter_range[1]+1) param_iterables = [MCiters] # get other parameters to sweep over in the reconstruction param_search = LMR_config.wrapper.param_search if param_search is not None: # sort them by parameter name and combine into a list of iterables sort_params = list(param_search.keys()) sort_params.sort(key=lambda x: x.split('.')[-1]) param_values = [param_search[key] for key in sort_params] param_iterables = param_values + [MCiters] for iter_and_params in itertools.product(*param_iterables): iter_num = iter_and_params[-1] cfg_dict = Utils.param_cfg_update('core.curr_iter', iter_num) if LMR_config.wrapper.multi_seed is not None: try: curr_seed = LMR_config.wrapper.multi_seed[iter_num] cfg_dict = Utils.param_cfg_update('core.seed', curr_seed, cfg_dict=cfg_dict) print('Setting current iteration seed: {}'.format(curr_seed)) except IndexError: print('ERROR: multi_seed activated but current MC iteration out of' ' range for list of seed values provided in config.') raise SystemExit(1) itr_str = 'r{:d}'.format(iter_num) # If parameter space search is being performed then set the current # search space values and create a special sub-directory
def prior_regrid(cfg, X, Xb_one, verbose=False): # scraped from LMR_utils.py on 20 April 2018 and modified for local use # this block sets variables for compatability with original code regrid_method = cfg.prior.regrid_method prior = cfg.prior nens = cfg.core.nens Xb_one_full = X.ens # Declare dictionary w/ info on content of truncated state vector new_state_info = {} # Transform every 2D state variable, one at a time Nx = 0 for var in list(X.full_state_info.keys()): dct = {} dct['vartype'] = X.full_state_info[var]['vartype'] # variable indices in full state vector ibeg_full = X.full_state_info[var]['pos'][0] iend_full = X.full_state_info[var]['pos'][1] # extract array corresponding to state variable "var" var_array_full = Xb_one_full[ibeg_full:iend_full + 1, :] # corresponding spatial coordinates coords_array_full = X.coords[ibeg_full:iend_full + 1, :] # Are we truncating this variable? (i.e. is it a 2D lat/lon variable?) if X.full_state_info[var]['vartype'] == '2D:horizontal': print(var, ' : 2D lat/lon variable, truncating this variable') # lat/lon column indices in X.coords ind_lon = X.full_state_info[var]['spacecoords'].index('lon') ind_lat = X.full_state_info[var]['spacecoords'].index('lat') nlat = X.full_state_info[var]['spacedims'][ind_lat] nlon = X.full_state_info[var]['spacedims'][ind_lon] # calculate the truncated fieldNtimes if regrid_method == 'simple': [var_array_new, lat_new, lon_new] = \ LMR_utils.regrid_simple(nens, var_array_full, coords_array_full, \ ind_lat, ind_lon, regrid_resolution) elif regrid_method == 'spherical_harmonics': [var_array_new, lat_new, lon_new] = \ LMR_utils.regrid_sphere(nlat, nlon, nens, var_array_full, regrid_resolution) elif regrid_method == 'esmpy': target_grid = prior.esmpy_grid_def lat_2d = coords_array_full[:, ind_lat].reshape(nlat, nlon) lon_2d = coords_array_full[:, ind_lon].reshape(nlat, nlon) [var_array_new, lat_new, lon_new ] = LMR_utils.regrid_esmpy(target_grid['nlat'], target_grid['nlon'], nens, var_array_full, lat_2d, lon_2d, nlat, nlon, method=prior.esmpy_interp_method) else: print('Exiting! Unrecognized regridding method.') raise SystemExit nlat_new = np.shape(lat_new)[0] nlon_new = np.shape(lat_new)[1] print(('=> Full array: ' + str(np.min(var_array_full)) + ' ' + str(np.max(var_array_full)) + ' ' + str(np.mean(var_array_full)) + ' ' + str(np.std(var_array_full)))) print(('=> Truncated array: ' + str(np.min(var_array_new)) + ' ' + str(np.max(var_array_new)) + ' ' + str(np.mean(var_array_new)) + ' ' + str(np.std(var_array_new)))) # corresponding indices in truncated state vector ibeg_new = Nx iend_new = Nx + (nlat_new * nlon_new) - 1 # for new state info dictionary dct['pos'] = (ibeg_new, iend_new) dct['spacecoords'] = X.full_state_info[var]['spacecoords'] dct['spacedims'] = (nlat_new, nlon_new) # updated dimension new_dims = (nlat_new * nlon_new) # array with new spatial coords coords_array_new = np.zeros(shape=[new_dims, 2]) coords_array_new[:, 0] = lat_new.flatten() coords_array_new[:, 1] = lon_new.flatten() else: print(var,\ ' : not truncating this variable: no changes from full state') var_array_new = var_array_full coords_array_new = coords_array_full # updated dimension new_dims = var_array_new.shape[0] ibeg_new = Nx iend_new = Nx + new_dims - 1 dct['pos'] = (ibeg_new, iend_new) dct['spacecoords'] = X.full_state_info[var]['spacecoords'] dct['spacedims'] = X.full_state_info[var]['spacedims'] # fill in new state info dictionary new_state_info[var] = dct # if 1st time in loop over state variables, create Xb_one array as copy # of var_array_new if Nx == 0: Xb_one = np.copy(var_array_new) Xb_one_coords = np.copy(coords_array_new) else: # if not 1st time, append to existing array Xb_one = np.append(Xb_one, var_array_new, axis=0) Xb_one_coords = np.append(Xb_one_coords, coords_array_new, axis=0) # making sure Xb_one has proper mask, if it contains # at least one invalid value if np.isnan(Xb_one).any(): Xb_one = np.ma.masked_invalid(Xb_one) np.ma.set_fill_value(Xb_one, np.nan) # updating dimension of new state vector Nx = Nx + new_dims # LMR_lite specific mod: update lat,lon information in X for later use X.prior_dict[var]['lat'] = lat_new X.prior_dict[var]['lon'] = lon_new # end loop over vars X.trunc_state_info = new_state_info return X, Xb_one