def _get_anomaly_data(input_data, year_idx=None): """Calculate anomaly data for all variables.""" logger.info("Calculating anomaly data") project = input_data[0]['project'] new_input_data = [] for (var, var_data) in group_metadata(input_data, 'short_name').items(): grouped_data = group_metadata(var_data, 'dataset') for (dataset_name, datasets) in grouped_data.items(): logger.debug("Calculating '%s' anomaly for dataset '%s'", var, dataset_name) data_4x = select_metadata(datasets, exp=EXP_4XCO2[project]) data_pic = select_metadata(datasets, exp='piControl') # Check if all experiments are available if not data_4x: raise ValueError( f"No '{EXP_4XCO2[project]}' data available for '{var}' of " f"'{dataset_name}'") if not data_pic: raise ValueError( f"No 'piControl' data available for '{var}' of " f"'{dataset_name}'") # Calculate anomaly, extract correct years and save it cube = calculate_anomaly(data_4x, data_pic) _check_cube_dimensions(cube) cube = cube[year_idx] new_input_data.append({ **data_4x[0], 'ancestors': [data_4x[0]['filename'], data_pic[0]['filename']], 'cube': cube, }) msg = '' if not COORDS else f" with additional coordinates {COORDS['rad']}" logger.info("Found %iD 'tas' data and %iD radiation data%s", NDIMS.get('tas'), NDIMS.get('rad'), msg) return new_input_data
def _get_anomaly_data(input_data): """Calculate anomaly data for all variables.""" logger.info("Calculating anomaly data") project = input_data[0]['project'] new_input_data = [] for (var, var_data) in group_metadata(input_data, 'short_name').items(): grouped_data = group_metadata(var_data, 'dataset') for (dataset_name, datasets) in grouped_data.items(): logger.debug("Calculating '%s' anomaly for dataset '%s'", var, dataset_name) data_4x = select_metadata(datasets, exp=EXP_4XCO2[project]) data_pic = select_metadata(datasets, exp='piControl') # Check if all experiments are available if not data_4x: raise ValueError( f"No '{EXP_4XCO2[project]}' data available for '{var}' of " f"'{dataset_name}'") if not data_pic: raise ValueError( f"No 'piControl' data available for '{var}' of " f"'{dataset_name}'") # Calculate anomaly, extract correct years and save it cube = _calculate_anomaly(data_4x, data_pic) if cube.ndim != 1: raise ValueError( f"This diagnostic supports only 1D (time), input data, " f"got {cube.ndim}D data") new_input_data.append({ **data_4x[0], 'ancestors': [data_4x[0]['filename'], data_pic[0]['filename']], 'cube': cube, }) return new_input_data
def main(cfg): """Compute the time average for each input dataset.""" # Get a description of the preprocessed data that we will use as input. input_data = cfg['input_data'].values() # Demonstrate use of metadata access convenience functions. selection = select_metadata(input_data, short_name='tas', project='CMIP5') logger.info("Example of how to select only CMIP5 temperature data:\n%s", pformat(selection)) selection = sorted_metadata(selection, sort='dataset') logger.info("Example of how to sort this selection by dataset:\n%s", pformat(selection)) grouped_input_data = group_metadata(input_data, 'variable_group', sort='dataset') logger.info( "Example of how to group and sort input data by variable groups from " "the recipe:\n%s", pformat(grouped_input_data)) # Example of how to loop over variables/datasets in alphabetical order groups = group_metadata(input_data, 'variable_group', sort='dataset') for group_name in groups: logger.info("Processing variable %s", group_name) for attributes in groups[group_name]: logger.info("Processing dataset %s", attributes['dataset']) input_file = attributes['filename'] cube = compute_diagnostic(input_file) output_basename = Path(input_file).stem if group_name != attributes['short_name']: output_basename = group_name + '_' + output_basename provenance_record = get_provenance_record( attributes, ancestor_files=[input_file]) plot_diagnostic(cube, output_basename, provenance_record, cfg)
def main(cfg): # The config object is a dict of all the metadata from the pre-processor logger.debug(cfg) projects = group_metadata(cfg["input_data"].values(), "project") for k, p in projects.items(): m_list = set() for ds in p: if k == "CORDEX": ds_str = f"{ds['driver']} - {ds['dataset']}" else: ds_str = ds["dataset"] m_list.add(ds_str) print(f"{k} - {len(m_list)} models:") print(m_list)
def prepare_data(config): """Perform data calculations.""" groups = group_metadata(config['input_data'].values(), 'variable_group') zm_g = groups["tos_zm"] zm_ref = prepare_reference(zm_g)['cube'] zm_errors = [calc_error(dataset['cube'], zm_ref) for dataset in zm_g] eq_g = groups["tos_eq"] eq_ref = mask_equatorial(prepare_reference(eq_g)['cube']) eqs = [mask_equatorial(ds['cube']) for ds in eq_g] eq_errors = [calc_error(eq, eq_ref) for eq in eqs] data = { 'zonal_mean_errors': zm_errors, 'equatorials': eqs, 'equatorial_ref': eq_ref, 'equatorial_errors': eq_errors, } return data
def do_preamble(cfg): """Execute some preamble functionality""" # prepare output dirs time_chunks = ['alltime', 'DJF', 'MAM', 'JJA', 'SON'] time_plot_dirs = [ os.path.join(cfg['plot_dir'], t_dir) for t_dir in time_chunks ] for time_plot_dir in time_plot_dirs: if not os.path.exists(time_plot_dir): os.makedirs(time_plot_dir) # get data input_data = cfg['input_data'].values() grouped_input_data = group_metadata( input_data, 'short_name', sort='dataset') return input_data, grouped_input_data
def main(cfg): """ Main function. Handles data wrangling and such. Parameters ---------- cfg - Dictionary Nested dictionary containing dataset names and variables. Returns ------- None. Notes ----- * Dictionary returned by preprocessor is keyed by dataset name, value is list of metadata dictionaries for variables belonging to that dataset. Ex: dict = {'MPI-ESM-LR': [var1, var2...]}, where var1, var2 are dicts holding all variable metadata. * Since the preprocessor extracts the 1000 hPa level data, the cube's data will have shape (36, 180, 360) corresponding to time (in months), latitude, longitude. """ # Plot configuration dictionary. plt_config = { 'ggplot': True, 'out_dir': cfg['plot_dir'], 'plt_name': 'time_series-initial_analysis-giss-{}.pdf', 'time_interval': 'annual', 'title': 'Annual Area Average - {}' } file_dict = group_metadata(cfg['input_data'].values(), 'dataset') common_emip_funcs.log_meta_dict(file_dict, main_log) # Get a dictionary keyed on variable name where the value is a list of # variable metadata dict from the various model configs. var_groups = common_emip_funcs.group_meta_by_var(file_dict) # Iterate over the variable dictionary and process each varible one-by-one. for esm_var, dict_list in var_groups.items(): # Get list of ESMVariable objects. var_list = [ ESMVariable(var_dict).get_area_statistic('mean') for var_dict in dict_list ] common_emip_funcs.plot_timeseries(var_list, plt_config)
def main(cfg): """Compute the time average for each input dataset.""" input_data = group_metadata(cfg['input_data'].values(), 'standard_name', sort='dataset') for standard_name in input_data: logger.info("Processing variable %s", standard_name) # Load reference dataset for attributes in input_data[standard_name]: if attributes['reference_dataset'] == attributes['dataset']: reference_name = attributes['dataset'] logger.info("Using %s as a reference dataset", reference_name) reference_filename = attributes['filename'] reference = iris.load_cube(reference_filename) reference = reference.collapsed('time', MEAN) logger.info("Reference cube:\n%s\n%s", reference_filename, reference) break else: raise ValueError("No reference_dataset defined in recipe.") # Compute and plot correlation for attributes in input_data[standard_name]: if attributes['dataset'] == reference_name: continue logger.info("Processing dataset %s", attributes['dataset']) filename = attributes['filename'] dataset = iris.load_cube(filename) kwargs = cfg.get('pearsonr', {}) logger.info( "Computing correlation with settings %s between " "reference and cube:\n%s\n%s", kwargs, filename, dataset) dataset = dataset.collapsed('time', MEAN) cube = pearsonr(dataset, reference, **kwargs) name = '{}_correlation_with_{}'.format( os.path.splitext(os.path.basename(filename))[0], reference_name) provenance_record = get_provenance_record( attributes, ancestor_files=[reference_filename, filename], plot_type=cfg['plot_type']) plot_diagnostic(cube, name, provenance_record, cfg)
def main(cfg): """Process data for use as input to the LISFLOOD hydrological model.""" input_metadata = cfg['input_data'].values() logger.info(input_metadata) for dataset, metadata in group_metadata(input_metadata, 'dataset').items(): cubes, ancestors = get_input_cubes(metadata) if dataset == 'ERA5': shift_era5_time_coordinate(cubes['tas']) shift_era5_time_coordinate(cubes['tdps']) shift_era5_time_coordinate(cubes['uas']) shift_era5_time_coordinate(cubes['vas']) # Compute additional variables as input for lisvap tdps = cubes.pop('tdps') uas = cubes.pop('uas') vas = cubes.pop('vas') cubes['e'] = compute_vapour_pressure(tdps) ancestors['e'] = ancestors['tdps'] cubes['sfcWind'] = compute_windspeed(uas, vas) ancestors['sfcWind'] = ancestors['uas'] + ancestors['vas'] cubes['pr'].units = 'mm d-1' for var_name, cube in cubes.items(): # Western emisphere longitudes should be negative points = cube.coord('longitude').points cube.coord('longitude').points = (points + 180) % 360 - 180 # latitudes decreasing cube = cube[:, ::-1, ...] # convert to xarray dataset (xrds) # remove coordinate bounds drop extra coordinates and reorder xrds = xr.DataArray.from_iris(cube).to_dataset() ordered_coords = ['lon', 'lat', 'time'] extra_coords = np.setdiff1d(xrds.coords, ordered_coords) xrds = xrds.drop(extra_coords)[ordered_coords + [var_name]] output_file = save(xrds, var_name, dataset, cfg) # Store provenance provenance_record = get_provenance_record(ancestors[var_name]) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(output_file, provenance_record)
def setup_namelist(cfg): """Set the namelist file of the cvdp package.""" input_data = cfg['input_data'].values() grouped_selection = group_metadata(input_data, 'alias') content = [] for _, attributes in grouped_selection.items(): for item in attributes: create_link(cfg, item["filename"], item['alias']) ppath = "{0}/".format(cfg['lnk_dir']) content.append("{0} | {1}{0} | {2} | {3}\n".format( attributes[0]["alias"], ppath, attributes[0]["start_year"], attributes[0]["end_year"])) namelist = os.path.join(cfg['run_dir'], "namelist") with open(namelist, 'w') as namelist_file: namelist_file.write("\n".join(content))
def main(cfg): """Load and plot hydro forcing data.""" plot_type = cfg['plot_type'] input_data = cfg['input_data'].values() variable_groups = group_metadata(input_data, 'variable_group') plot_func_mapping = { 'climatology': plot_climatology, 'timeseries': plot_timeseries, } for metadata in variable_groups.values(): try: plot_func = plot_func_mapping[plot_type] except KeyError as err: raise ValueError(f'Unknown plot_type: {plot_type!r}') from err plot_func(cfg, metadata=metadata)
def main(cfg): """Process data for use as input to the LISFLOOD hydrological model.""" input_metadata = cfg['input_data'].values() logger.info(input_metadata) for dataset, metadata in group_metadata(input_metadata, 'dataset').items(): cubes, ancestors = get_input_cubes(metadata) if dataset == 'ERA5': shift_era5_time_coordinate(cubes['tas']) shift_era5_time_coordinate(cubes['tdps']) shift_era5_time_coordinate(cubes['uas']) shift_era5_time_coordinate(cubes['vas']) # Compute additional variables as input for lisvap tdps = cubes.pop('tdps') uas = cubes.pop('uas') vas = cubes.pop('vas') cubes['e'] = compute_vapour_pressure(tdps) ancestors['e'] = ancestors['tdps'] cubes['sfcWind'] = compute_windspeed(uas, vas) ancestors['sfcWind'] = ancestors['uas'] + ancestors['vas'] cubes['pr'].units = 'mm d-1' for var_name, cube in cubes.items(): cube.remove_coord('shape_id') # Western emisphere longitudes should be negative points = cube.coord('longitude').points cube.coord('longitude').points = (points + 180) % 360 - 180 # latitudes decreasing cube = cube[:, ::-1, ...] output_file = save(cube, var_name, dataset, cfg) # Store provenance provenance_record = get_provenance_record(ancestors[var_name]) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(output_file, provenance_record)
def _get_multi_model_mean(input_data): """Get multi-model mean for all variables.""" logger.info("Calculating multi-model means") project = input_data[0]['project'] mmm_data = [] for (var, datasets) in group_metadata(input_data, 'short_name').items(): logger.debug("Calculating multi-model mean for variable '%s'", var) ancestors = [] dataset_names = [] mmm = [] for dataset in datasets: try: cube = dataset['cube'] except KeyError: raise KeyError( f"No data for '{var}' of dataset '{dataset['dataset']}' " f"for multi-model mean calculation") if cube.ndim > 1: raise ValueError( f"Calculation of multi-model mean not supported for input " f"data with more than one dimension (which should be " f"time), got {cube.ndim:d}-dimensional cube") ancestors.extend(dataset['ancestors']) dataset_names.append(dataset['dataset']) mmm.append(cube.data) mmm = np.ma.array(mmm) mmm_cube = cube.copy(data=np.ma.mean(mmm, axis=0)) attributes = { 'ancestors': ancestors, 'dataset': 'MultiModelMean', 'datasets': '|'.join(dataset_names), 'project': project, 'short_name': var, } mmm_cube.attributes = attributes mmm_data.append({**attributes, 'cube': mmm_cube}) input_data.extend(mmm_data) return input_data
def get_anomalies(ds_list, relative=False): # determine historic and future periods start_years = list(group_metadata(ds_list, "start_year")) base_clim_start = min(start_years) fut_clim_start = max(start_years) # construct baseline base_metadata = select_metadata(ds_list, start_year=base_clim_start) base_file = base_metadata[0]["filename"] base_cube = iris.load_cube(base_file) # get future fut_metadata = select_metadata(ds_list, start_year=fut_clim_start) fut_file = fut_metadata[0]["filename"] fut_cube = iris.load_cube(fut_file) if relative: diff = fut_cube - base_cube anomaly = (diff / base_cube) * 100.0 anomaly.units = "%" else: anomaly = fut_cube - base_cube return anomaly
def _compute_dataset(self, alias, dataset): var_info = group_metadata(dataset, 'short_name') logger.info('Computing %s', alias) area_cello = iris.load_cube(var_info['areacello'][0]['filename']) cellarea = area_cello.data sit = iris.load_cube(var_info['sit'][0]['filename']) mask = np.asarray(sit.coord('latitude').points > 80.0, dtype=np.int8) try: mask = np.broadcast_to(mask, cellarea.shape) except ValueError: try: mask = np.broadcast_to(np.expand_dims(mask, -1), cellarea.shape) except ValueError: mask = np.broadcast_to(np.expand_dims(mask, 0), cellarea.shape) volume = self.compute_volume(sit, cellarea, mask=mask) del cellarea, sit neg_feedback, stats, _ = self.negative_seaice_feedback( var_info['sit'][0], volume, period=12, order=2) del volume logger.info("Negative feedback: %10.4f", neg_feedback) logger.info("P-Value: %10.4f", stats[1]) return (neg_feedback, stats[1])
def main(cfg): """Calculate linear regression between albedo and xxfrac. Arguments: --------- cfg - nested dictionary of metadata """ # Assemble the data dictionary keyed by dataset name my_files_dict = group_metadata(cfg['input_data'].values(), 'dataset') all_short_names = [ 'alb', 'snc', 'cropFrac', 'treeFrac', 'grassFrac', 'shrubFrac', 'pastureFrac' ] # Loop over all datasets for dataset_name in my_files_dict: dataset_dict = my_files_dict[dataset_name] if dataset_name == 'Duveiller2018': logger.info("Only do plotting for dataset %s", dataset_name) cube = iris.load_cube(dataset_dict[0]['filename']) # Set plot title and plot suptitle cube.attributes['plottitle'] = cube.coord('time').units.num2date( cube.coord('time').points)[0].strftime('%b') + '-'\ + 'Duveiller2018' cube.attributes['model_id'] = 'Duveiller2018' _plot_cube(cube, cfg) continue logger.info("Starting diagnostic for dataset %s", dataset_name) # Now reorder the dictionary in a meaningfull way, making data # accessible by short name datadict = {} for file_dict in dataset_dict: if file_dict['short_name'] in all_short_names: datadict[file_dict['short_name']] = file_dict # Define the different lc classes this_models_xxfracs = [key for key in datadict if 'Frac' in key] # Note that lc3 class depends on the classes available for this model lc3_class = cfg['params']['lc3_class'] cfg['params']['lc3_class'] = [ key for key in this_models_xxfracs if key in lc3_class ] # Load all data model_data = { frac_key: iris.load_cube(datadict[frac_key]['filename']) for frac_key in this_models_xxfracs } # Load albedo and snow cover model_data['alb'] = iris.load_cube(datadict['alb']['filename']) model_data['snc'] = iris.load_cube(datadict['snc']['filename']) # Make sure that for each cube the dimension equals 2 assert {c.ndim for _, c in model_data.items()} == {2} # Add the appropriate masks to model_data model_data = _add_masks_albedolandcover(model_data, this_models_xxfracs, cfg) # Now get albedo change due to landcover change alb_lc = _get_reconstructed_albedos(model_data, cfg) # Now mask where albedo values are physically impossible alb_lc[alb_lc < 0] = np.nan alb_lc[alb_lc > 1] = np.nan # Calculate differences between them and save _write_albedochanges_to_disk(alb_lc, model_data['snc'], datadict, cfg) # Loop through all nc files and plot them for ncfile in glob.glob(os.path.join(cfg['work_dir'], '*.nc')): transition_cube = iris.load_cube(ncfile) _plot_cube(transition_cube, cfg)
def compute(self): print('----------- COMPUTE ----------') # --------------------------------------------------------------------- # Every dataset in the recipe is associated with an alias. We are going # to use th:We alias and the group_metadata shared function to loop over # the datasets. #---------------------------------------------------------------------- data = group_metadata(self.cfg['input_data'].values(), 'alias') ssp_trend = {} ssp_clim = {} hist_trend = {} hist_clim = {} # Loop over the datasets. for alias in data: exp = data[alias][0]['exp'] variables = group_metadata(data[alias], 'short_name') # Returns the path to the preprocessed files. tas_file = variables['tas'][0]['filename'] tas = iris.load(tas_file)[0] tas.convert_units('degC') # Calculate Trends nlat = tas.coord('latitude').shape[0] nlon = tas.coord('longitude').shape[0] lat = tas.coord('latitude').points lon = tas.coord('longitude').points time_array = np.arange(1, tas.coord('time').shape[0] + 1, 1) regr = np.zeros([nlat, nlon]) for j in range(nlat): for k in range(nlon): p = np.polyfit(time_array, tas[:, j, k].data, 1) regr[j, k] = p[0] * 10 # the 10 is to convert to decadal latitude = DimCoord(lat, standard_name='latitude', units='degrees') longitude = DimCoord(lon, standard_name='longitude', units='degrees') regr_cube = Cube(regr, dim_coords_and_dims=[(latitude, 0), (longitude, 1)]) ### ---------- remask -------------- ### # finding the trends turns the remask usseless as pyplot doesn't care about masked arrays # Ergo another remask is needed. output_trend = mask_landsea(regr_cube, ['/blablabla/where/the/fx/at/'], 'sea', True) # Save the output trends in the cube dict output_trend.standard_name = None output_trend.long_name = 'tas_trend_med' output_trend.short_name = 'tastrend' # Calculate Climatology output_clim = climate_statistics(tas) output_clim.standard_name = None output_clim.long_name = 'tas_clim_med' output_clim.short_name = 'tasclim' # Save diagnosed dataset to dict. TODO: what about averaging first? if exp == 'historical': hist_trend[alias] = output_trend hist_clim[alias] = output_clim if exp == 'ssp585': ssp_trend[alias] = output_trend ssp_clim[alias] = output_clim # Save the outputs for each dataset. #self.save(output, alias, data) # Plot the results. #self.plot_2D(total, data) print(ssp_trend.variables) print(len(ssp_trend))
def main(cfg): # The config object is a dict of all the metadata from the pre-processor # set global plotting settings plt.rcParams.update({'font.size': 18}) # get variable processed var = get_var(cfg) if var == "pr": rel_change = True else: rel_change = False # first group datasets by project.. # this creates a dict of datasets keyed by project (CMIP5, CMIP6 etc.) projects = group_metadata(cfg["input_data"].values(), "project") # how to uniquely define a dataset varies by project, for CMIP it's simple, just dataset... # for CORDEX, combo of dataset and driver (and possibly also domain if we start adding those) # also gets more complex if we start adding in different ensembles.. # This section of the code loads and organises the data to be ready for plotting logger.info("Loading data") # empty dict to store results projections = {} model_lists = {} cordex_drivers = [] cordex_rcms = [] # loop over projects for proj in projects: # we now have a list of all the data entries.. # for CMIPs we can just group metadata again by dataset then work with that.. models = group_metadata(projects[proj], "dataset") # empty dict for results if proj == 'non-cordex-rcm': proj = 'CORDEX' if proj == 'non-cmip5-gcm': proj = 'CMIP5' if proj not in projections.keys(): projections[proj] = {} # loop over the models for m in models: if "CORDEX" in proj.upper(): # then we need to go one deeper in the dictionary to deal with driving models drivers = group_metadata(models[m], "driver") projections[proj][m] = dict.fromkeys(drivers.keys()) for d in drivers: logging.info(f"Calculating anomalies for {proj} {m} {d}") anoms = get_anomalies(drivers[d], rel_change) if anoms is None: continue projections[proj][m][d] = anoms if proj not in model_lists: model_lists[proj] = [] model_lists[proj].append(f"{m} {d}") cordex_drivers.append(d) cordex_rcms.append(m) elif proj == "UKCP18": # go deeper to deal with ensembles and datasets # split UKCP into seperate GCM and RCM proj_key = f"UKCP18 {m}" ensembles = group_metadata(models[m], "ensemble") projections[proj_key] = dict.fromkeys(ensembles.keys()) for ens in ensembles: logging.info(f"Calculating anomalies for {proj_key} {ens}") anoms = get_anomalies(ensembles[ens], rel_change) if anoms is None: continue projections[proj_key][ens] = anoms if proj_key not in model_lists: model_lists[proj_key] = [] model_lists[proj_key].append(f"{proj_key} {ens}") else: logging.info(f"Calculating anomalies for {proj} {m}") anoms = get_anomalies(models[m], rel_change) if anoms is None: continue projections[proj][m] = anoms if proj not in model_lists: model_lists[proj] = [] model_lists[proj].append(f"{m}") # remove any empty categories (i.e. UKCP18 which has been split into rcm and gcm) if projections[proj] == {}: del projections[proj] cordex_drivers = set(cordex_drivers) cordex_rcms = set(cordex_rcms) # reorganise and extract data for plotting n_seasons = len(anoms.coord('season_number').points) plotting_dict = proj_dict_to_season_dict(projections, n_seasons) for season in plotting_dict.keys(): # this section of the code does all the plotting.. # mega scatter plot # need to prepare subsets of projects gcm_sc, rcm_sc1, labels1 = prepare_scatter_data( plotting_dict[season]['CMIP5'], plotting_dict[season]['CORDEX'], 'CORDEX') rcm_sc2, cpm_sc, labels2 = prepare_scatter_data( plotting_dict[season]['CORDEX'], plotting_dict[season]['cordex-cpm'], 'CPM') mega_scatter(gcm_sc, rcm_sc1, rcm_sc2, cpm_sc, list(plotting_dict[season]['CMIP5'].values()), list(plotting_dict[season]['CORDEX'].values()), labels1, labels2, f'{season}') # simpler scatter for UKCP if 'UKCP18 land-gcm' in plotting_dict[season].keys(): UKCP_g, UKCP_r, UKCP_labels = prepare_scatter_data( plotting_dict[season]['UKCP18 land-gcm'], plotting_dict[season]['UKCP18 land-rcm'], "UKCP18") simpler_scatter(UKCP_g, UKCP_r, UKCP_labels, f'UKCP_{season}') # side by side plots / dots for all models plus Glen's method... if 'CMIP6' in plotting_dict[season].keys(): data_for_plotting = [ plotting_dict[season]['CMIP6'].values(), plotting_dict[season]['CMIP5'].values(), rcm_sc1, cpm_sc, UKCP_g, UKCP_r ] labels_for_plotting = [ 'CMIP6', 'CMIP5', 'CORDEX', 'CPM', 'UKCP_g', 'UKCP_r' ] else: data_for_plotting = [ plotting_dict[season]['CMIP5'].values(), rcm_sc1, cpm_sc ] labels_for_plotting = [ 'CMIP5', 'CORDEX', 'CPM', ] plot_datasets(data_for_plotting, labels_for_plotting, season) # save some plotting data for notebook experiments # create dictionary of all the required data for one particular season if season == 'JJA': pickle_dict = {} pickle_dict['CMIP5_sc'] = gcm_sc pickle_dict['RCM_sc1'] = rcm_sc1 pickle_dict['RCM_sc2'] = rcm_sc2 pickle_dict['labels1'] = labels1 pickle_dict['labels2'] = labels2 pickle_dict['cpm'] = cpm_sc pickle_dict['CMIP6'] = list( plotting_dict[season]['CMIP6'].values()) pickle_dict['CMIP5'] = list( plotting_dict[season]['CMIP5'].values()) pickle_dict['CORDEX'] = list( plotting_dict[season]['CORDEX'].values()) pickle_dict['UKCP18 land-gcm'] = plotting_dict[season][ 'UKCP18 land-gcm'] pickle_dict['UKCP18 land-rcm'] = plotting_dict[season][ 'UKCP18 land-rcm'] pickle.dump( pickle_dict, open(f'{cfg["work_dir"]}/sample_plotting_data.pkl', 'wb')) # save details of values used for plotting the boxplots save_anoms_txt(plotting_dict[season]['CMIP6'], f'{cfg["work_dir"]}/CMIP6_{season}.txt') save_anoms_txt(plotting_dict[season]['CMIP5'], f'{cfg["work_dir"]}/CMIP5_{season}.txt') save_anoms_txt(plotting_dict[season]['CORDEX'], f'{cfg["work_dir"]}/CORDEX_{season}.txt') save_anoms_txt(plotting_dict[season]['cordex-cpm'], f'{cfg["work_dir"]}/CPM_{season}.txt') save_anoms_txt(plotting_dict[season]['UKCP18 land-gcm'], f'{cfg["work_dir"]}/UKCP_gcm_{season}.txt') save_anoms_txt(plotting_dict[season]['UKCP18 land-rcm'], f'{cfg["work_dir"]}/UKCP_rcm_{season}.txt') # print all datasets used print("Input models for plots:") for p in model_lists.keys(): print(f"{p}: {len(model_lists[p])} models") print(model_lists[p]) print("")
def main(cfg): """Run the diagnostic. Parameters : ---------- cfg : dict Configuration dictionary of the recipe. """ ########################################################################### # Read recipe data ########################################################################### # Dataset data containers data = e.Datasets(cfg) logging.debug("Found datasets in recipe:\n%s", data) # Variables # var = e.Variables(cfg) available_vars = list( group_metadata(cfg['input_data'].values(), 'short_name')) logging.debug("Found variables in recipe:\n%s", available_vars) available_exp = list(group_metadata(cfg['input_data'].values(), 'exp')) if len(available_exp) > 6: raise ValueError("The diagnostic can only plot up to 6 different " + "model experiments.") ########################################################################### # Read data ########################################################################### # Create iris cube for each dataset and save annual means for dataset_path in data: cube = iris.load(dataset_path)[0] # cube = iris.load(dataset_path, var.standard_names())[0] cube = cube.collapsed('time', iris.analysis.MEAN) data.set_data(cube.data, dataset_path) ########################################################################### # Process data ########################################################################### data_var = OrderedDict() for iexp in available_exp: data_var[iexp] = OrderedDict() for jvar in available_vars: # data_var[iexp] = OrderedDict() data_var[iexp][jvar] = 0.0 pathlist = data.get_path_list(short_name=available_vars[0], exp=available_exp[0]) for dataset_path in pathlist: # Substract piControl experiment from abrupt4xCO2 experiment dataset = data.get_info(n.DATASET, dataset_path) for jvar in available_vars: for iexp in available_exp: print(data_var[iexp]) print((data_var[iexp].values())) (data_var[iexp])[jvar] = (data_var[iexp])[jvar] + \ data.get_data(short_name=jvar, exp=iexp, dataset=dataset) data_var_sum = {} for iexp in available_exp: data_var_sum[iexp] = np.fromiter(data_var[iexp].values(), dtype=float) / float(len(pathlist)) # Plot ECS regression if desired plot_bar_deangelis(cfg, data_var_sum, available_exp, available_vars)
def main(cfg): """Process data for use as input to the wflow hydrological model.""" input_metadata = cfg['input_data'].values() for dataset, metadata in group_metadata(input_metadata, 'dataset').items(): all_vars, provenance = get_input_cubes(metadata) if dataset == 'ERA5': shift_era5_time_coordinate(all_vars['tas']) shift_era5_time_coordinate(all_vars['psl']) # Interpolating variables onto the dem grid # Read the target cube, which contains target grid and target elevation dem_path = Path(cfg['auxiliary_data_dir']) / cfg['dem_file'] dem = load_dem(dem_path) check_dem(dem, cfg['region']) dem = extract_region(dem, **cfg['region']) logger.info("Processing variable precipitation_flux") pr_dem = regrid(all_vars['pr'], target_grid=dem, scheme='linear') logger.info("Processing variable temperature") tas_dem = regrid_temperature(all_vars['tas'], all_vars['orog'], dem) logger.info("Processing variable potential evapotranspiration") if 'evspsblpot' in all_vars: pet = all_vars['evspsblpot'] pet_dem = regrid(pet, target_grid=dem, scheme='linear') else: logger.info("Potential evapotransporation not available, deriving") psl_dem = regrid(all_vars['psl'], target_grid=dem, scheme='linear') rsds_dem = regrid(all_vars['rsds'], target_grid=dem, scheme='linear') rsdt_dem = regrid(all_vars['rsdt'], target_grid=dem, scheme='linear') pet_dem = debruin_pet( tas=tas_dem, psl=psl_dem, rsds=rsds_dem, rsdt=rsdt_dem, ) pet_dem.var_name = 'pet' logger.info("Converting units") pet_dem.units = pet_dem.units / 'kg m-3' pet_dem.data = pet_dem.core_data() / 1000. pet_dem.convert_units('mm day-1') pr_dem.units = pr_dem.units / 'kg m-3' pr_dem.data = pr_dem.core_data() / 1000. pr_dem.convert_units('mm day-1') tas_dem.convert_units('degC') # Adjust longitude coordinate to wflow convention for cube in [tas_dem, pet_dem, pr_dem]: cube.coord('longitude').points = (cube.coord('longitude').points + 180) % 360 - 180 cubes = iris.cube.CubeList([pr_dem, tas_dem, pet_dem]) save(cubes, dataset, provenance, cfg)
def main(cfg): """Process data for use as input to the marrmot hydrological model. These variables are needed in all_vars: tas (air_temperature) pr (precipitation_flux) psl (air_pressure_at_mean_sea_level) rsds (surface_downwelling_shortwave_flux_in_air) rsdt (toa_incoming_shortwave_flux) """ input_metadata = cfg['input_data'].values() for dataset, metadata in group_metadata(input_metadata, 'dataset').items(): all_vars, provenance = get_input_cubes(metadata) # Fix time coordinate of ERA5 instantaneous variables if dataset == 'ERA5': _shift_era5_time_coordinate(all_vars['psl']) _shift_era5_time_coordinate(all_vars['tas']) # Processing variables and unit conversion # Unit of the fluxes in marrmot should be in kg m-2 day-1 (or mm/day) logger.info("Processing variable PET") pet = debruin_pet( psl=all_vars['psl'], rsds=all_vars['rsds'], rsdt=all_vars['rsdt'], tas=all_vars['tas'], ) pet = preproc.area_statistics(pet, operator='mean') pet.convert_units('kg m-2 day-1') # equivalent to mm/day logger.info("Processing variable tas") temp = preproc.area_statistics(all_vars['tas'], operator='mean') temp.convert_units('celsius') logger.info("Processing variable pr") precip = preproc.area_statistics(all_vars['pr'], operator='mean') precip.convert_units('kg m-2 day-1') # equivalent to mm/day # Get the start and end times and latitude longitude time_start_end, lat_lon = _get_extra_info(temp) # make data structure # delta_t_days could also be extracted from the cube output_data = { 'forcing': { 'precip': precip.data, 'temp': temp.data, 'pet': pet.data, 'delta_t_days': float(1), 'time_unit': 'day', }, 'time_start': time_start_end[0], 'time_end': time_start_end[1], 'data_origin': lat_lon, } # Save to matlab structure basename = '_'.join([ 'marrmot', dataset, cfg['basin'], str(int(output_data['time_start'][0])), str(int(output_data['time_end'][0])), ]) output_name = get_diagnostic_filename(basename, cfg, extension='mat') sio.savemat(output_name, output_data) # Store provenance with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(output_name, provenance)
def retrieve_data(self): ''' Here is where all the interesting stuff begins. (1) The function calls the variables and stores all the cubes according to their experiment (ssp585, historical, reanalysis...) (2) Calls the functions that calculates the output data to be shown (3) Calls the plotting functions ''' print('----------- DATA RETREIVAL ----------') data = group_metadata(self.cfg['input_data'].values(), 'alias') hist_ls_tas, hist_ls_pr = [], [] ssp_ls_tas, ssp_ls_pr = [], [] rean_ls_tas, rean_ls_pr = [], [] start_year_ssp, end_year_ssp, start_year_hist, end_year_hist, start_year_rean, end_year_rean = 0, 0, 0, 0, 0, 0 obs_names = ' ' w_density = iris.coords.AuxCoord(1000, long_name='water_density', units='kg m-3') # iteration that sorts the dataset cubes into it's own experiment list. # (hist_ls, ssp_ls, rean_ls) for i, alias in enumerate(data): print(alias) variables = group_metadata(data[alias], 'short_name') pr_exist, tas_exist = False, False if 'pr' in variables: pr_file = variables['pr'][0]['filename'] pr = iris.load(pr_file)[0] pr = pr / w_density pr.convert_units('mm month-1') pr_exist = True if 'tas' in variables: tas_file = variables['tas'][0]['filename'] tas = iris.load(tas_file)[0] tas.convert_units('degC') # coord_names = [coord.name() for coord in tas.coords()] tas_exist = True # LOAD DATA IN CUBES # OBS --> rean data, they don't have atribute 'exp' if 'OBS' not in alias: exp = data[alias][0]['exp'] activity = data[alias][0]['activity'] model_names = data[alias][0]['project'] # 'activity' ?? else: exp = 'a' obs_names = obs_names + data[alias][0]['dataset'] + ', ' # LIST THE CUBES ACCORDING TO THE 'EXP' THEY BELONG if exp == 'a': if (start_year_rean == 0) & (end_year_rean == 0): start_year_rean = variables['tas'][0]['start_year'] end_year_rean = variables['tas'][0]['end_year'] if tas_exist: tas = self.regrid_time(tas, start_year_rean, end_year_rean) rean_ls_tas.append(tas) if pr_exist: pr = self.regrid_time(pr, start_year_rean, end_year_rean) rean_ls_pr.append(pr) elif ((activity.lower() == 'highresmip') and (exp == 'highres-future')) or ( (activity.lower() == 'scenariomip') and (exp == 'ssp585')): if (start_year_ssp == 0) & (end_year_ssp == 0): start_year_ssp = variables['tas'][0]['start_year'] end_year_ssp = variables['tas'][0]['end_year'] if tas_exist: tas = self.regrid_time(tas, start_year_ssp, end_year_ssp) ssp_ls_tas.append(tas) if pr_exist: pr = self.regrid_time(pr, start_year_ssp, end_year_ssp) ssp_ls_pr.append(pr) elif ((activity.lower() == 'highresmip') and (exp == 'hist-1950')) or ((activity.lower() == 'cmip') and (exp == 'historical')): if (start_year_hist == 0) & (end_year_hist == 0): start_year_hist = variables['tas'][0]['start_year'] end_year_hist = variables['tas'][0]['end_year'] if tas_exist: tas = self.regrid_time(tas, start_year_hist, end_year_hist) hist_ls_tas.append(tas) if pr_exist: pr = self.regrid_time(pr, start_year_hist, end_year_hist) hist_ls_pr.append(pr) start_year_ls = [start_year_ssp, start_year_hist, start_year_rean] end_year_ls = [end_year_ssp, end_year_hist, end_year_rean] # COMPUTE EVERYTHING & PLOT # TAS if (not rean_ls_tas) or (not ssp_ls_tas) or (not hist_ls_tas): print('No temperature to diagnose') else: cube_ls_tas, ts_ls_tas = self.compute(rean_ls_tas, ssp_ls_tas, hist_ls_tas, 'tas') self.tas_plot_caller(cube_ls_tas, obs_names.upper(), model_names, str(start_year_rean)) # # Compute standard deviation of the ensembles & plot STDs_tas = self.timeseries_std( [ssp_ls_tas, hist_ls_tas, rean_ls_tas]) self.timeseries_plot(ts_ls_tas, start_year_ls, end_year_ls, '_tas', STDs_tas, '($^o$C)', 'Temperature') # PR if (not rean_ls_pr) or (not ssp_ls_pr) or (not hist_ls_pr): print('No precipitation to diagnose') else: cube_ls_pr, ts_ls_pr = self.compute(rean_ls_pr, ssp_ls_pr, hist_ls_pr, 'pr') self.pr_plot_caller(cube_ls_pr, obs_names.upper(), model_names, str(start_year_rean)) # # Compute standard deviation of the ensembles & plot STDs_pr = self.timeseries_std([ssp_ls_pr, hist_ls_pr, rean_ls_pr]) self.timeseries_plot(ts_ls_pr, start_year_ls, end_year_ls, '_pr', STDs_pr, '(mm month$^{-1}$)', 'Precipitation')
def plot_regressions(input_data, cfg, description=None): """Plot linear regressions used to calculate feedback parameters.""" table = OrderedDict() # Iterate over radiation quantities (y axis) for (var, datasets) in group_metadata(input_data, 'short_name').items(): if 'tas' in var: continue logger.info("Creating regression plots for variable '%s'", var) # Iterate over all available datasets for dataset in datasets: dataset_name = dataset['dataset'] table.setdefault(dataset_name, {}) tas_data = select_metadata(input_data, short_name=_get_tas_var( dataset_name, var), dataset=dataset_name) if not tas_data: raise ValueError( f"No 'tas' data for '{dataset_name}' available") tas_cube = tas_data[0]['cube'] if dataset['cube'].ndim > 1: raise ValueError( "Regression plots are not supported for input data with " "more than one dimension (which should be time)") # Save plot and netcdf file (plot_path, reg) = _create_regression_plot(tas_cube, dataset['cube'], dataset_name, cfg, description=description) netcdf_path = _create_regression_file(tas_cube, dataset['cube'], dataset_name, cfg, description=description) # Expand table table[dataset_name][var] = reg.slope if var == 'rtnt': table[dataset_name]['ECS'] = (-reg.intercept / 2.0 / reg.slope) table[dataset_name]['F'] = reg.intercept # Provenance caption = ( 'Scatterplot between {} TOA radiance and global mean surface ' 'temperature anomaly{} of the abrupt 4x CO2 experiment ' 'including linear regression for {} (following Andrews et ' 'al., Geophys. Res. Lett., 39, 2012).'.format( FEEDBACK_PARAMETERS.get(var, var), '' if description is None else f' for {description}', dataset_name)) _write_provenance(netcdf_path, plot_path, caption, dataset['ancestors'] + tas_data[0]['ancestors'], cfg, plot_types=['scatter']) # Create summary table _create_table(table, cfg, description=description)
def main(cfg): # The config object is a dict of all the metadata from the pre-processor # get variable processed var = list(extract_variables(cfg).keys()) assert len(var) == 1 var = var[0] if var == "pr": rel_change = True else: rel_change = False # establish the time periods of our datasets start_years = list(group_metadata(cfg["input_data"].values(), "start_year")) base_start = min(start_years) fut_start = max(start_years) # first group datasets by project.. # this creates a dict of datasets keyed by project (CMIP5, CMIP6 etc.) projects = group_metadata(cfg["input_data"].values(), "project") # how to uniquely define a dataset varies by project, for CMIP it's simple, just dataset... # for CORDEX, combo of dataset and driver (and possibly also domain if we start adding those) # also gets more complex if we start adding in different ensembles.. # This section of the code loads and organises the data to be ready for plotting logger.info("Loading data") # empty dict to store results projections = {} model_lists = {} cordex_drivers = [] # loop over projects for proj in projects: # we now have a list of all the data entries.. # for CMIPs we can just group metadata again by dataset then work with that.. models = group_metadata(projects[proj], "dataset") # empty dict for results projections[proj] = {} # loop over the models for m in models: if proj[:6].upper() == "CORDEX": # then we need to go one deeper in the dictionary to deal with driving models drivers = group_metadata(models[m], "driver") projections[proj][m] = dict.fromkeys(drivers.keys()) for d in drivers: logging.info(f"Calculating anomalies for {proj} {m} {d}") anoms = get_anomalies(drivers[d], base_start, fut_start, rel_change) if anoms is None: continue projections[proj][m][d] = anoms if proj not in model_lists: model_lists[proj] = [] model_lists[proj].append(f"{m} {d}") cordex_drivers.append(d) elif proj == "UKCP18": # go deeper to deal with ensembles and datasets # split UKCP into seperate GCM and RCM proj_key = f"UKCP18 {m}" ensembles = group_metadata(models[m], "ensemble") projections[proj_key] = dict.fromkeys(ensembles.keys()) for ens in ensembles: logging.info(f"Calculating anomalies for {proj_key} {ens}") anoms = get_anomalies(ensembles[ens], base_start, fut_start, rel_change) if anoms is None: continue projections[proj_key][ens] = anoms if proj_key not in model_lists: model_lists[proj_key] = [] model_lists[proj_key].append(f"{proj_key} {ens}") else: logging.info(f"Calculating anomalies for {proj} {m}") anoms = get_anomalies(models[m], base_start, fut_start, rel_change) if anoms is None: continue projections[proj][m] = anoms if proj not in model_lists: model_lists[proj] = [] model_lists[proj].append(f"{m}") # remove any empty categories (i.e. UKCP18 which has been split into rcm and gcm) if projections[proj] == {}: del projections[proj] cordex_drivers = set(cordex_drivers) # this section of the code does the plotting.. # we now have all the projections in the projections dictionary # now lets plot them # first we need to process the dictionary, and move the data into a list of vectors # the projections object is the key one that contains all our data.. seasons = {0: "DJF", 1: "MAM", 2: "JJA", 3: "OND"} logger.info("Plotting") extent = ( cfg["domain"]["start_longitude"] - 2, cfg["domain"]["end_longitude"] + 2, cfg["domain"]["start_latitude"] - 2, cfg["domain"]["end_latitude"] + 2, ) for s in seasons.keys(): # make directory try: os.mkdir(f"{cfg['plot_dir']}/{seasons[s]}") except FileExistsError: pass for p in projections: pdata = process_projections_dict(projections[p], s) for m in pdata: title = f"{p} {m} {seasons[s]} {var} change" plt.figure(figsize=(12.8, 9.6)) ax = plt.axes(projection=ccrs.PlateCarree()) ax.set_extent(extent) # set scales if var == "pr": vmn = -50 vmx = 50 cmap = "brewer_RdYlBu_11" else: vmn = 0 vmx = 5 cmap = "brewer_YlOrRd_09" qplt.pcolormesh(pdata[m], vmin=vmn, vmax=vmx, cmap=cmap) plt.title(title) ax.coastlines() ax.add_feature(cartopy.feature.BORDERS, linestyle=":") plt.savefig( f"{cfg['plot_dir']}/{seasons[s]}/{p}_{m}_map_{seasons[s]}.png" ) plt.close() # print all datasets used print("Input models for plots:") for p in model_lists.keys(): print(f"{p}: {len(model_lists[p])} models") print(model_lists[p]) print("")
def _plot_comparison(self, data, datasets, p_values=False): if p_values: filename = 'feedback_p_values' else: filename = 'feedback' path = os.path.join(self.cfg[n.PLOT_DIR], f'{filename}.{self.cfg[n.OUTPUT_FILE_TYPE]}') plot_options = self.cfg.get('plot', {}) fig = plt.figure() index = np.arange(len(data)) plt.scatter( index, data, plot_options.get('point_size', 8), color=plot_options.get('point_color', 'black'), ) if p_values: plt.hlines(0.05, -1, index[-1] + 1, colors='red') axes = plt.gca() logger.debug(data) max_limit = math.ceil(max(data)) if max_limit < 0: max_limit = 0 min_limit = math.floor(min(data)) separation = max_limit - min_limit if plot_options.get('show_values', False): def _get_y_position(value): if value > min_limit + separation * 0.75: return value - separation * 0.05 return value + separation * 0.10 for i, value in enumerate(data): axes.annotate( f'{value:.2f}', xy=(index[i], value), xycoords='data', textcoords='data', xytext=(index[i], _get_y_position(value)), rotation=90, ) # axes and labels axes.set_ylim(min_limit, max_limit) if p_values: axes.set_ylabel('P-value [log]') plt.ylim(0, max(0.25, max(data))) else: axes.set_ylabel('IFE') axes.set_title('IFE comparison') _, xtick_names = plt.xticks(index, datasets) plt.xlim(index[0] - 0.5, index[-1] + 0.5) plt.setp(xtick_names, rotation=90, fontsize=10) plt.grid(True, 'both', 'y') plt.tight_layout() fig.savefig(path) plt.close(fig) self._create_prov_record( path, f'IFE {filename} comparison for all datasets', group_metadata(self.cfg['input_data'].values(), n.ALIAS))
def main(diag_config): """ Evaluate global distribution of ecosystem carbon turnover time. Argument: -------- diag_config - nested dictionary of metadata """ model_data_dict = group_metadata(diag_config['input_data'].values(), 'dataset') # get the data from the observation global_tau_obs = _get_obs_data(diag_config) base_name = ('{title}_{source_label}_' '{grid_label}'.format( title=global_tau_obs['grid']['tau_ctotal'].long_name, source_label=diag_config['obs_info']['source_label'], grid_label=diag_config['obs_info']['grid_label'])) global_tau_mod = {} global_tau_mod['grid'] = {} global_tau_mod['global'] = {} provenance_record_matrix = _get_provenance_record( "Matrix Comparison of global distributions of turnover time of carbon", ['mean', 'perc'], ['global'], _get_ancestor_files(diag_config, 'tau_ctotal')) provenance_record_multimodel = _get_provenance_record( "Multimodel bias and agreements of global distributions of turnover" "time of carbon. Reproduces figure 3 in Carvalhais et al. (2014).", ['mean', 'perc'], ['global'], _get_ancestor_files(diag_config, 'tau_ctotal')) for model_name, model_dataset in model_data_dict.items(): global_tau_mod[model_name] = {} # load the data ctotal = _load_variable(model_dataset, 'ctotal') gpp = _load_variable(model_dataset, 'gpp') tau_ctotal = _calc_turnover(ctotal, gpp, model_name) global_tau_mod['grid'][model_name] = tau_ctotal # apply the GPP threshold and set the data in dictionary gpp_global = gpp.collapsed(['latitude', 'longitude'], iris.analysis.SUM) ctotal_global = ctotal.collapsed(['latitude', 'longitude'], iris.analysis.SUM) tau_global = ctotal_global / gpp_global tau_global.convert_units('yr') global_tau_mod['global'][model_name] = np.float(tau_global.core_data()) if diag_config['write_plots']: base_name_mod = ( 'global_{title}_{source_label}_' '{grid_label}'.format( title=global_tau_obs['grid']['tau_ctotal'].long_name, source_label=model_name, grid_label=diag_config['obs_info']['grid_label'])) plot_path_mod = get_plot_filename(base_name_mod, diag_config) # plot_path_list.append(plot_path_mod) provenance_record_mod = _get_provenance_record( "Map of global distribution of turnover time of carbon", ['mean', 'perc'], ['global'], {model_name: model_dataset}) _plot_single_map(plot_path_mod, tau_ctotal, global_tau_mod['global'][model_name], model_name, provenance_record_mod, diag_config) if diag_config['write_netcdf']: model_cubes = [ c for c in global_tau_mod['grid'].values() if isinstance(c, iris.cube.Cube) ] obs_cubes = [ c for c in global_tau_obs['grid'].values() if isinstance(c, iris.cube.Cube) ] netcdf_path = get_diagnostic_filename(base_name_mod, diag_config) save_cubes = iris.cube.CubeList(model_cubes + obs_cubes) iris.save(save_cubes, netcdf_path) else: netcdf_path = None with ProvenanceLogger(diag_config) as provenance_logger: provenance_logger.log(netcdf_path, provenance_record_mod) if diag_config['write_plots']: # multimodel agreement base_name_multimodel = '{prefix}_{base_name}'.format( prefix='global_multimodelAgreement', base_name=base_name) plot_path_multimodel = get_plot_filename(base_name_multimodel, diag_config) _plot_multimodel_agreement(plot_path_multimodel, global_tau_mod, global_tau_obs, config) with ProvenanceLogger(diag_config) as provenance_logger: provenance_logger.log(plot_path_multimodel, provenance_record_multimodel) # map of observation base_name_obs = '{prefix}_{base_name}'.format(prefix='global', base_name=base_name) plot_path_obs = get_plot_filename(base_name_obs, diag_config) provenance_record_obs = _get_provenance_record( "Map of observed global distribution of turnover time of carbon", ['mean', 'perc'], ['global'], global_tau_obs['input_files'].tolist()) _plot_single_map(plot_path_obs, global_tau_obs['grid']['tau_ctotal'], global_tau_obs['global']['tau_ctotal'], config['obs_info']['source_label'], provenance_record_obs, diag_config) # matrix of maps base_name_matrix = '{prefix}_{base_name}'.format( prefix='global_matrix_map', base_name=base_name) plot_path_matrix = get_plot_filename(base_name_matrix, diag_config) _plot_matrix_map(plot_path_matrix, global_tau_mod, global_tau_obs, config) with ProvenanceLogger(diag_config) as provenance_logger: provenance_logger.log(plot_path_matrix, provenance_record_matrix)
def main(cfg): """Run the diagnostic.""" ########################################################################### # Read recipe data ########################################################################### # Dataset data containers data = e.Datasets(cfg) logging.debug("Found datasets in recipe:\n%s", data) # Variables var = e.Variables(cfg) logging.debug("Found variables in recipe:\n%s", var) # Check for tas and rlnst if not var.vars_available('pr', 'ua', 'va', 'ts'): raise ValueError("This diagnostic needs 'pr', 'ua', " + " 'va', and 'ts'") available_exp = list(group_metadata(cfg['input_data'].values(), 'exp')) if 'historical' not in available_exp: raise ValueError("The diagnostic needs an historical experiment " + " and one other experiment.") if len(available_exp) != 2: raise ValueError("The diagnostic needs an two model experiments: " + " onehistorical and one other one.") available_exp.remove('historical') future_exp = available_exp[0] ########################################################################### # Read data ########################################################################### # Create iris cube for each dataset and save annual means for dataset_path in data: cube = iris.load(dataset_path)[0] cat.add_month_number(cube, 'time', name='month_number') # MJJAS mean (monsoon season) cube = cube[np.where( np.absolute(cube.coord('month_number').points - 7) <= 2)] cube = cube.collapsed('time', iris.analysis.MEAN) short_name = data.get_info(n.SHORT_NAME, dataset_path) if short_name == 'pr': # convert from kg m-2 s-1 to mm d-1 # cube.convert_units('mm d-1') doesn't work. cube.data = cube.data * (60.0 * 60.0 * 24.0) cube.units = 'mm d-1' # Possible because all data must be interpolated to the same grid. if 'lats' not in locals(): lats = cube.coord('latitude').points lons = cube.coord('longitude').points data.set_data(cube.data, dataset_path) ########################################################################### # Process data ########################################################################### data_ar = substract_li(cfg, data, lats, lons, future_exp) # data_ar {"datasets": datasets, "ar_diff_rain": ar_diff_rain, # "ar_diff_ua": ar_diff_ua, "ar_diff_va": ar_diff_va, # "ar_hist_rain": ar_hist_rain, "mism_diff_rain": mism_diff_rain, # "mwp_hist_rain": mwp_hist_rain} plot_rain_and_wind(cfg, 'Multi-model_mean', {'ar_diff_rain': data_ar["ar_diff_rain"], 'ar_diff_ua': data_ar["ar_diff_ua"], 'ar_diff_va': data_ar["ar_diff_va"], 'lats': lats, 'lons': lons}, future_exp) # Regression between mean ISM rain difference and historical rain reg2d = get_reg_2d_li(data_ar["mism_diff_rain"], data_ar["ar_hist_rain"], lats, lons) plot_2dcorrelation_li(cfg, reg2d, lats, lons) plot_reg_li(cfg, data_ar, future_exp) # Regression between mean WP rain and rain difference for each location reg2d_wp = get_reg_2d_li(data_ar["mwp_hist_rain"], data_ar["ar_diff_rain"], lats, lons) data_ar2 = correct_li(data_ar, lats, lons, reg2d_wp) # return {"datasets": data["datasets"], "ar_diff_cor": ar_diff_cor, # "proj_err": proj_err, "mism_diff_cor": mism_diff_cor, # "mism_hist_rain": mism_hist_rain, "mwp_hist_cor": mwp_hist_cor} plot_reg_li2(cfg, data_ar["datasets"], data_ar["mism_diff_rain"], data_ar2["mism_diff_cor"], data_ar2["mism_hist_rain"]) plot_rain(cfg, 'Multi-model mean rainfall change due to model error', np.mean(data_ar2["proj_err"], axis=2), lats, lons) plot_rain(cfg, 'Corrected multi-model mean rainfall change', np.mean(data_ar2["ar_diff_cor"], axis=2), lats, lons)
def main(diag_config): """ Diagnostic to evaluate zonal correlation between turnover time and climate. Argument: -------- diag_config - nested dictionary of metadata """ model_data_dict = group_metadata(diag_config['input_data'].values(), 'dataset') fig_config = _get_fig_config(diag_config) zonal_correlation_mod = {} for model_name, model_dataset in model_data_dict.items(): zonal_correlation_mod[model_name] = {} mod_coords = {} ctotal = _load_variable(model_dataset, 'ctotal') gpp = _load_variable(model_dataset, 'gpp') precip = _load_variable(model_dataset, 'pr') tas = _load_variable(model_dataset, 'tas') tau_ctotal = (ctotal / gpp) tau_ctotal.convert_units('yr') # set the attributes tau_ctotal.var_name = 'tau_ctotal' for coord in gpp.coords(): mod_coords[coord.name()] = coord _tau_dat = _remove_invalid(tau_ctotal.data, fill_value=np.nan) _precip_dat = _remove_invalid(precip.data, fill_value=np.nan) _tas_dat = _remove_invalid(tas.data, fill_value=np.nan) zon_corr = _calc_zonal_correlation(_tau_dat, _precip_dat, _tas_dat, mod_coords['latitude'].points, fig_config) zonal_correlation_mod[model_name]['data'] = zon_corr zonal_correlation_mod[model_name]['latitude'] = mod_coords['latitude'] zonal_correlation_obs = _get_obs_data_zonal(diag_config) base_name = '{title}_{corr}_{source_label}_{grid_label}z'.format( title='r_tau_ctotal_climate', corr=fig_config['correlation_method'], source_label=diag_config['obs_info']['source_label'], grid_label=diag_config['obs_info']['grid_label']) provenance_record = _get_provenance_record( "Comparison of latitudinal (zonal) variations of pearson" " correlation between turnover time and climate: turnover" " time and precipitation, controlled for temperature" " (left) and vice-versa (right). Reproduces figures 2c" " and 2d in Carvalhais et al. (2014).", ['corr', 'perc'], ['zonal'], _get_ancestor_files(diag_config, 'tau_ctotal')) if diag_config['write_netcdf']: model_cubes = [ c for c in zonal_correlation_mod.values() if isinstance(c, iris.cube.Cube) ] obs_cubes = [ c for c in zonal_correlation_obs.values() if isinstance(c, iris.cube.Cube) ] netcdf_path = get_diagnostic_filename(base_name, diag_config) save_cubes = iris.cube.CubeList(model_cubes + obs_cubes) iris.save(save_cubes, netcdf_path) with ProvenanceLogger(diag_config) as provenance_logger: provenance_logger.log(netcdf_path, provenance_record) if diag_config['write_plots']: plot_path = get_plot_filename(base_name, diag_config) _plot_zonal_correlation(plot_path, zonal_correlation_mod, zonal_correlation_obs, diag_config) provenance_record['plot_file'] = plot_path with ProvenanceLogger(diag_config) as provenance_logger: provenance_logger.log(plot_path, provenance_record)
def main(cfg): # The config object is a dict of all the metadata from the pre-processor # get variable processed var = get_var(cfg) if var == "pr": rel_change = True else: rel_change = False # establish the time periods of our datasets start_years = list(group_metadata(cfg["input_data"].values(), "start_year")) base_start = min(start_years) fut_start = max(start_years) # first group datasets by project.. # this creates a dict of datasets keyed by project (CMIP5, CMIP6 etc.) projects = group_metadata(cfg["input_data"].values(), "project") # how to uniquely define a dataset varies by project, for CMIP it's simple, just dataset... # for CORDEX, combo of dataset and driver (and possibly also domain if we start adding those) # also gets more complex if we start adding in different ensembles.. # This section of the code loads and organises the data to be ready for plotting logger.info("Loading data") # empty dict to store results projections = {} model_lists = {} cordex_drivers = [] # loop over projects for proj in projects: # we now have a list of all the data entries.. # for CMIPs we can just group metadata again by dataset then work with that.. models = group_metadata(projects[proj], "dataset") # empty dict for results projections[proj] = {} # loop over the models for m in models: if proj[:6].upper() == "CORDEX": # then we need to go one deeper in the dictionary to deal with driving models drivers = group_metadata(models[m], "driver") projections[proj][m] = dict.fromkeys(drivers.keys()) for d in drivers: logging.info(f"Calculating anomalies for {proj} {m} {d}") anoms = get_anomalies(drivers[d], base_start, fut_start, rel_change) if anoms is None: continue projections[proj][m][d] = anoms if proj not in model_lists: model_lists[proj] = [] model_lists[proj].append(f"{m} {d}") cordex_drivers.append(d) elif proj == "UKCP18": # go deeper to deal with ensembles and datasets # split UKCP into seperate GCM and RCM proj_key = f"UKCP18 {m}" ensembles = group_metadata(models[m], "ensemble") projections[proj_key] = dict.fromkeys(ensembles.keys()) for ens in ensembles: logging.info(f"Calculating anomalies for {proj_key} {ens}") anoms = get_anomalies(ensembles[ens], base_start, fut_start, rel_change) if anoms is None: continue projections[proj_key][ens] = anoms if proj_key not in model_lists: model_lists[proj_key] = [] model_lists[proj_key].append(f"{proj_key} {ens}") else: logging.info(f"Calculating anomalies for {proj} {m}") anoms = get_anomalies(models[m], base_start, fut_start, rel_change) if anoms is None: continue projections[proj][m] = anoms if proj not in model_lists: model_lists[proj] = [] model_lists[proj].append(f"{m}") # remove any empty categories (i.e. UKCP18 which has been split into rcm and gcm) if projections[proj] == {}: del projections[proj] cordex_drivers = set(cordex_drivers) # this section of the code does all the plotting.. plot_boxplots(projections, cordex_drivers) simple_dots_plot(projections, cordex_drivers) # print all datasets used print("Input models for plots:") for p in model_lists.keys(): print(f"{p}: {len(model_lists[p])} models") print(model_lists[p]) print("")
def compute(self): print('----------- COMPUTE ----------') # --------------------------------------------------------------------- # Every dataset in the recipe is associated with an alias. We are going # to use th:We alias and the group_metadata shared function to loop over # the datasets. #---------------------------------------------------------------------- data = group_metadata(self.cfg['input_data'].values(), 'alias') ssp_ts = {} hist_ts = {} rean_ts = {} hist = 0 ssp = 0 rean = 0 # Loop over the datasets. for i, alias in enumerate(data): exp = data[alias][0]['exp'] variables = group_metadata(data[alias], 'short_name') # Returns the path to the preprocessed files. tas_file = variables['tas'][0]['filename'] tas = iris.load(tas_file)[0] tas.convert_units('degC') if i == 0: climatology = self.ref_clim(tas, 1960, 1962) #anomaly = tas - climatology #timeseries = anomaly.collapsed(['longitude', 'latitude'], iris.analysis.MEAN) timeseries = tas.collapsed(['longitude', 'latitude'], iris.analysis.MEAN) #timeseries.long_name = 'med_r_timeseries_tas' # Calculate Trends nlat = tas.coord('latitude').shape[0] nlon = tas.coord('longitude').shape[0] lat = tas.coord('latitude').points lon = tas.coord('longitude').points time_array = np.arange(1,tas.coord('time').shape[0]+1,1) regr = np.zeros([nlat, nlon]) for j in range(nlat): for k in range(nlon): p = np.polyfit(time_array, tas[:,j,k].data, 1) regr[j, k] = p[0]*10 # the 10 is to convert to decadal latitude = DimCoord(lat, standard_name='latitude', units='degrees') longitude = DimCoord(lon, standard_name='longitude', units='degrees') regr_cube = Cube(regr, dim_coords_and_dims=[(latitude, 0), (longitude, 1)]) ### ---------- remask -------------- ### # finding the trends turns the remask usseless as pyplot doesn't care about masked arrays # Ergo another remask is needed. output_trend = mask_landsea(regr_cube, ['/blablabla/where/the/fx/at/'] ,'sea', True) # Save the output trends in the cube dict output_trend.standard_name = None output_trend.long_name = 'tas_trend_med' output_trend.short_name = 'tastrend' # Calculate Climatology output_clim = climate_statistics(tas) output_clim.standard_name = None output_clim.long_name = 'tas_clim_med' output_clim.short_name = 'tasclim' # Save diagnosed dataset to dict. TODO: what about averaging first? if exp == 'historical': hist_ts[alias] = timeseries if hist == 0: mean_hist_trend = output_trend mean_hist_clim = output_clim hist += 1 else: mean_hist_trend = (mean_hist_trend + output_trend) mean_hist_clim = (mean_hist_clim + output_clim) hist += 1 if exp == 'ssp585': ssp_ts[alias] = timeseries if ssp == 0: mean_ssp_trend = output_trend mean_ssp_clim = output_clim ssp += 1 else: mean_ssp_trend = (mean_ssp_trend + output_trend) mean_ssp_clim = (mean_ssp_clim + output_clim) ssp += 1 if exp == 'reanaly': rean_ts[alias] = timeseries if rean == 0: mean_rean_trend = output_trend mean_rean_clim = output_clim rean += 1 else: mean_rean_trend = (mean_rean_trend + output_trend) mean_rean_clim = (mean_rean_clim + output_clim) rean += 1 mean_hist_trend = mean_hist_trend/hist mean_hist_clim = mean_hist_clim/hist mean_ssp_trend = mean_ssp_trend/ssp mean_ssp_clim = mean_ssp_clim/ssp #mean_rean_trend = mean_rean_trend/ssp #mean_rean_clim = mean_rean_clim/ssp mean_ssp_trend.long_name = 'ssp_trend_Med' mean_ssp_clim.long_name = 'ssp_clim_Med' mean_hist_trend.long_name = 'hist_trend_Med' mean_hist_clim.long_name = 'hist_clim_Med' # mean_rean_trend.long_name = 'rean_trend_Med' # mean_rean_clim.long_name = 'rean_clim_Med' ##### Biases ##### #trend_bias = mean_hist_trend - mean_rean_trend #clim_bias = mean_hist_clim - mean_rean_clim # Save the outputs for each dataset. #self.save(output, alias, data) # Plot the results. self.plot_2D(mean_ssp_trend) self.plot_1D(timeseries) print(mean_ssp_trend)