def get_grouped_data(cfg, input_data=None): """Get input files.""" if input_data is None: logger.debug("Loading input data from 'cfg' argument") input_data = mlr.get_input_data(cfg, pattern=cfg.get('pattern'), ignore=cfg.get('ignore')) else: logger.debug("Loading input data from 'input_data' argument") if not mlr.datasets_have_mlr_attributes(input_data, log_level='error'): raise ValueError("At least one input dataset does not have valid " "MLR attributes") if not input_data: raise ValueError("No input data found") paths = [d['filename'] for d in input_data] logger.debug("Found files") logger.debug(pformat(paths)) # Extract necessary data label_data = select_metadata(input_data, var_type='label') if not label_data: raise ValueError("No data with var_type 'label' found") prediction_reference_data = select_metadata( input_data, var_type='prediction_reference') extracted_data = label_data + prediction_reference_data logger.debug("Found 'label' data") logger.debug(pformat([d['filename'] for d in label_data])) logger.debug("Found 'prediction_reference' data") logger.debug(pformat([d['filename'] for d in prediction_reference_data])) # Return grouped data return group_metadata(extracted_data, 'tag')
def wfluxes(model, wdir, input_data): """Compute auxiliary fields and perform time averaging of existing fields. Arguments: - model: the model name; - wdir: the working directory where the outputs are stored; - filelist: a list of file names containing the input fields; Author: Valerio Lembo, University of Hamburg (2019). """ cdo = Cdo() hfls_file = e.select_metadata(input_data, short_name='hfls', dataset=model)[0]['filename'] pr_file = e.select_metadata(input_data, short_name='pr', dataset=model)[0]['filename'] prsn_file = e.select_metadata(input_data, short_name='prsn', dataset=model)[0]['filename'] # # hfls_file = filelist[0] # pr_file = filelist[3] # prsn_file = filelist[4] aux_file = wdir + '/aux.nc' evspsbl_file = (wdir + '/{}_evspsbl.nc'.format(model)) cdo.divc(str(L_C), input="{}".format(hfls_file), output=evspsbl_file) # Rainfall precipitation prr_file = wdir + '/{}_prr.nc'.format(model) cdo.sub(input="{} {}".format(pr_file, prsn_file), output=aux_file) cdo.chname('pr,prr', input=aux_file, output=prr_file) return evspsbl_file, prr_file
def make_diag_tci(cfg, dataset, input_data, sm_name="mrlsl", hf_name="hfls", tci_name="tci"): """Shim routine between ESMValTool and the generic make_tci().""" sm_meta = select_metadata(input_data, short_name=sm_name)[0] hf_meta = select_metadata(input_data, short_name=hf_name)[0] sm_meta[ "standard_name"] = "depth_integrated_moisture_content_of_soil_layer" # noqa tci_meta = sm_meta.copy() tci_meta["short_name"] = tci_name tci_meta["standard_name"] = "terrestrial_coupling_index" filename_tci = _get_filename(tci_meta, cfg, extension="nc") alpha = 0.05 # pvalue rejection threshold. tci = make_tci(sm_meta["filename"], hf_meta["filename"], filename_tci, standard_name_sm=sm_meta["standard_name"], standard_name_hf=hf_meta["standard_name"], standard_name_tci=tci_meta["standard_name"], alpha=alpha) return tci
def get_anomalies(ds_list, relative=False): # determine historic and future periods start_years = list(group_metadata(ds_list, "start_year")) base_clim_start = min(start_years) fut_clim_start = max(start_years) # construct baseline base_metadata = select_metadata(ds_list, start_year=base_clim_start) base_file = base_metadata[0]["filename"] base_cube = iris.load_cube(base_file) # get future fut_metadata = select_metadata(ds_list, start_year=fut_clim_start) fut_file = fut_metadata[0]["filename"] fut_cube = iris.load_cube(fut_file) if relative: diff = fut_cube - base_cube anomaly = (diff / base_cube) * 100 anomaly.units = "%" else: anomaly = fut_cube - base_cube # ensure longitude coord is on -180 to 180 range try: anomaly = anomaly.intersection(longitude=(-180.0, 180.0)) except ValueError: # remove and re add bounds to attempt to fix anomaly.coord('longitude').bounds = None anomaly.coord('longitude').guess_bounds() anomaly = anomaly.intersection(longitude=(-180.0, 180.0)) return anomaly
def get_anomalies(ds_list, base_clim_start, fut_clim_start, relative=False): # construct baseline base_metadata = select_metadata(ds_list, start_year=base_clim_start) if base_metadata == []: logging.warning( f"Base climatology (start {base_clim_start}) not found") return None base_file = base_metadata[0]["filename"] base_cube = iris.load_cube(base_file) # get future fut_metadata = select_metadata(ds_list, start_year=fut_clim_start) if fut_metadata == []: logging.warning( f"Future climatology (start {fut_clim_start}) not found") return None fut_file = fut_metadata[0]["filename"] fut_cube = iris.load_cube(fut_file) if relative: diff = fut_cube - base_cube anomaly = (diff / base_cube) * 100 anomaly.units = "%" else: anomaly = fut_cube - base_cube return anomaly
def _get_anomaly_cubes(cfg): """Get all anomaly cubes.""" logger.info("Calculating anomalies") cubes = {} ancestors = {} input_data = cfg['input_data'].values() onepct_data = select_metadata(input_data, short_name='tas', exp='1pctCO2') # Process data for dataset in onepct_data: dataset_name = dataset['dataset'] pi_data = select_metadata(input_data, short_name='tas', exp='piControl', dataset=dataset_name) if not pi_data: raise ValueError("No 'piControl' data available for dataset " "'dataset_name'") onepct_cube = iris.load_cube(dataset['filename']) pi_cube = iris.load_cube(pi_data[0]['filename']) anomaly_cube = _get_anomaly_cube(onepct_cube, pi_cube) cubes[dataset_name] = anomaly_cube ancestors[dataset_name] = [dataset['filename'], pi_data[0]['filename']] # Calculate multi-model mean if desired if cfg.get('calculate_mmm', True): (mmm_cube, mmm_ancestors) = _get_mmm_anomaly(cubes, ancestors, cfg) cubes['MultiModelMean'] = mmm_cube ancestors['MultiModelMean'] = mmm_ancestors return (cubes, ancestors)
def preprocess_data(cfg): """Extract input data.""" input_data = deepcopy(list(cfg['input_data'].values())) if not input_data: return ([], []) # Use 'rtmt' instead of 'rtmt' if necessary for dataset in input_data: if dataset['short_name'] == 'rtmt': RTMT_DATASETS.add(dataset['dataset']) dataset['short_name'] = 'rtnt' if RTMT_DATASETS: logger.info("Using 'rtmt' instead of 'rtnt' for datasets '%s'", RTMT_DATASETS) # Calculate anomalies for every dataset input_data = _get_anomaly_data(input_data) # Calculate multi-model mean if cfg.get('calculate_mmm', True): input_data = _get_multi_model_mean(input_data) # Group data in terms of dataset tas_data = select_metadata(input_data, short_name='tas') rtnt_data = select_metadata(input_data, short_name='rtnt') tas_data = group_metadata(tas_data, 'dataset') rtnt_data = group_metadata(rtnt_data, 'dataset') return (tas_data, rtnt_data)
def make_plot(metadata, scenarios, cfg, provenance): """Make figure 3, left graph. Multimodel values as line, reference value in black square, steering variables in dark dots. """ fig, axes = plt.subplots() for member in select_metadata(metadata, variable_group='tas_cmip'): filename = member['filename'] dataset = xr.open_dataset(filename) if 'MultiModel' not in filename: axes.plot(dataset.time.dt.year, dataset.tas.values, c='grey', alpha=0.3, lw=.5, label='CMIP members') else: # Only display stats for the future period: dataset = dataset.sel(time=slice('2010', None, None)) axes.plot(dataset.time.dt.year, dataset.tas.values, color='k', linewidth=2, label='CMIP ' + Path(filename).stem.split('_')[0][10:]) for member in select_metadata(metadata, variable_group='tas_target'): filename = member['filename'] dataset = xr.open_dataset(filename) if 'MultiModel' not in filename: axes.plot(dataset.time.dt.year, dataset.tas.values, color='blue', linewidth=1, label=member['dataset']) # Add the scenario's with dots at the cmip dt and bars for the periods for i, scenario in enumerate(scenarios): axes.scatter(scenario['year'], scenario['cmip_dt'], s=50, zorder=10, color='r', label=r"Scenarios' steering $\Delta T_{CMIP}$") _timeline(axes, i, scenario['period_bounds']) handles, labels = plt.gca().get_legend_handles_labels() by_label = dict(zip(labels, handles)) # dict removes dupes axes.legend(by_label.values(), by_label.keys()) axes.set_xlabel('Year') axes.set_ylabel(r'Global mean $\Delta T$ (K) w.r.t. reference period') # Save figure filename = get_plot_filename('global_matching', cfg) fig.savefig(filename, bbox_inches='tight', dpi=300) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(filename, provenance)
def reform_data_iris_deangelis3b4(input_data): """Extract data from IRIS cubes and average or reformat them.""" # Model data for 'tas', 'rsnstcs' cubes = {} for my_short_name in ['tas', 'rsnstcs']: # my_data: List of dictionaries my_data = select_metadata(input_data, short_name=my_short_name) # subdata: dictionary for subdata in my_data: cube = iris.load(subdata['filename'])[0] cat.add_year(cube, 'time', name='year') cube = cube.aggregated_by('year', iris.analysis.MEAN) experiment = subdata['exp'] if experiment == 'abrupt-4xCO2': experiment = 'abrupt4xCO2' dataset = subdata['dataset'] cubetuple = (dataset, my_short_name, experiment) if experiment == 'piControl': # DeAngelis use a 21 month running mean on piControl but the # full extend of 150 years abrupt4xCO2. I could not find out, # how they tread the edges, currently I just skip the mean for # the edges. This is not exacly the same as done in the paper, # small differences remain in extended data Fig 1, # but closer than other methods I # tried, e.g. skipping the edges. # For most data sets it would be also possible to # extend the piControl for 20 years, but then it would # not be centered means of piControl for each year of # abrupt4xCO2 any more. # cube_new = cube.rolling_window('time',iris.analysis.MEAN, 21) # endm10 = len(cube.coord('time').points) - 10 # cube.data[10:endm10] = cube_new.data cube.data = scisi.savgol_filter(cube.data, 21, 1, axis=0) cubes[cubetuple] = cube.data # Model data and observations for 'rsnstcsnorm', and 'prw' for my_short_name in ['rsnstcsnorm', 'prw']: # my_data: List of dictionaries my_data = select_metadata(input_data, short_name=my_short_name) # subdata: dictionary for subdata in my_data: if 'exp' in subdata.keys(): experiment = subdata['exp'] else: experiment = 'nomodel' dataset = subdata['dataset'] cubetuple = (dataset, my_short_name, experiment) if experiment in ['piControl', 'nomodel']: cube = iris.load(subdata['filename'])[0] total_len = len(cube.coord('time').points) * \ len(cube.coord('latitude').points) * \ len(cube.coord('longitude').points) data_new = np.reshape(cube.data, total_len) cubes[cubetuple] = data_new return cubes
def main(cfg): """Run the diagnostic.""" input_data = ( select_metadata(cfg['input_data'].values(), short_name='tas') + select_metadata(cfg['input_data'].values(), short_name='tasa')) if not input_data: raise ValueError("This diagnostics needs 'tas' or 'tasa' variable") # Calculate psi for every dataset psis = {} psi_attrs = { 'short_name': 'psi', 'long_name': 'Temperature variability metric', 'units': 'K', } grouped_data = group_metadata(input_data, 'dataset') for (dataset, [data]) in grouped_data.items(): logger.info("Processing %s", dataset) cube = iris.load_cube(data['filename']) iris.coord_categorisation.add_year(cube, 'time') cube = cube.aggregated_by('year', iris.analysis.MEAN) psi_cube = calculate_psi(cube, cfg) data.update(psi_attrs) data.pop('standard_name', '') # Provenance caption = ("Temporal evolution of temperature variability metric psi " "between {start_year} and {end_year} for {dataset}.".format( **data)) provenance_record = get_provenance_record(caption, [data['filename']]) out_path = get_diagnostic_filename('psi_' + dataset, cfg) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(out_path, provenance_record) # Save psi for every dataset data['filename'] = out_path io.metadata_to_netcdf(psi_cube, data) # Save averaged psi psis[dataset] = np.mean(psi_cube.data) # Save averaged psis for every dataset in one file out_path = get_diagnostic_filename('psi', cfg) io.save_scalar_data(psis, out_path, psi_attrs, attributes=psi_cube.attributes) # Provenance caption = "{long_name} for mutliple climate models.".format(**psi_attrs) ancestor_files = [d['filename'] for d in input_data] provenance_record = get_provenance_record(caption, ancestor_files) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(out_path, provenance_record)
def main(cfg): """Process data for use as input to the PCR-GLOBWB hydrological model.""" for dataset, metadata in group_metadata(cfg['input_data'].values(), 'dataset').items(): for short_name in "pr", "tas": logger.info("Processing variable %s for dataset %s", short_name, dataset) # Load preprocessed cubes for normal data and climatology var = select_metadata(metadata, variable_group=short_name)[0] cube = iris.load_cube(var['filename']) var_climatology = select_metadata( metadata, variable_group=short_name + '_climatology', )[0] cube_climatology = iris.load_cube(var_climatology['filename']) # Create a spin-up year for pcrglob based on the climatology data cube = add_spinup_year(cube, cube_climatology) # Round times to integer number of days time_coord = cube.coord('time') time_coord.points = da.floor(time_coord.core_points()) time_coord.bounds = None time_coord.guess_bounds() # Set lat from highest to lowest value cube = cube[:, ::-1, ...] # Workaround for bug in PCRGlob # (see https://github.com/UU-Hydro/PCR-GLOBWB_model/pull/13) for coord_name in ['latitude', 'longitude']: coord = cube.coord(coord_name) coord.points = coord.points + 0.001 # Unit conversion 'kg m-3 day-1' to 'm' precip (divide by density) if short_name == "pr": cube.units = cube.units / 'kg m-3 day-1' cube.data = cube.core_data() / 1000 # Save data basename = '_'.join([ 'pcrglobwb', Path(var['filename']).stem, cfg['basin'], ]) output_file = get_diagnostic_filename(basename, cfg) iris.save(cube, output_file, fill_value=1.e20) # Store provenance provenance_record = get_provenance_record( [var['filename'], var_climatology['filename']]) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(output_file, provenance_record)
def main(cfg): """Run the diagnostic.""" input_data = ( select_metadata(cfg['input_data'].values(), short_name='tas') + select_metadata(cfg['input_data'].values(), short_name='tasa')) if not input_data: raise ValueError("This diagnostics needs 'tas' or 'tasa' variable") # Get tas data tas_cubes = {} tas_obs = [] for (dataset, [data]) in group_metadata(input_data, 'dataset').items(): cube = iris.load_cube(data['filename']) iris.coord_categorisation.add_year(cube, 'time') cube = cube.aggregated_by('year', iris.analysis.MEAN) tas_cubes[dataset] = cube if data['project'] == 'OBS': tas_obs.append(dataset) # Get time-dependent psi data psi_cubes = {} psi_obs = [] for (dataset, [data]) in group_metadata( io.netcdf_to_metadata(cfg, pattern='psi_*.nc'), 'dataset').items(): cube = iris.load_cube(data['filename']) cube = cube.aggregated_by('year', iris.analysis.MEAN) psi_cubes[dataset] = cube if data['project'] == 'OBS': psi_obs.append(dataset) # Get psi, ECS and psi for models (psi_cube, ecs_cube, lambda_cube) = get_external_cubes(cfg) # Plots for obs_name in tas_obs: logger.info("Observation for tas: %s", obs_name) plot_temperature_anomaly(cfg, tas_cubes, lambda_cube, obs_name) for obs_name in psi_obs: logger.info("Observation for psi: %s", obs_name) plot_psi(cfg, psi_cubes, lambda_cube, obs_name) obs_cube = psi_cubes[obs_name] plot_emergent_relationship(cfg, psi_cube, ecs_cube, lambda_cube, obs_cube) plot_pdf(cfg, psi_cube, ecs_cube, obs_cube) plot_cdf(cfg, psi_cube, ecs_cube, obs_cube) # Print ECS range ecs_range = get_ecs_range(cfg, psi_cube, ecs_cube, obs_cube) logger.info("Observational constraint: Ψ = (%.2f ± %.2f) K", np.mean(obs_cube.data), np.std(obs_cube.data)) logger.info( "Constrained ECS range: (%.2f - %.2f) K with best " "estimate %.2f K", ecs_range[1], ecs_range[2], ecs_range[0])
def get_control_exper_obs(short_name, input_data, cfg, cmip_type): """ Get control, exper and obs datasets. This function is used when running recipes that need a clear distinction between a control dataset, an experiment dataset and have optional obs (OBS, obs4mips etc) datasets; such recipes include recipe_validation, and all the autoassess ones; short_name: variable short name input_data: dict containing the input data info cfg: config file as used in this module """ # select data per short name and CMIP type dataset_selection = select_metadata(input_data, short_name=short_name, project=cmip_type) # get the obs datasets if specified in recipe if 'observational_datasets' in cfg: obs_selection = [ select_metadata(input_data, short_name=short_name, dataset=obs_dataset)[0] for obs_dataset in cfg['observational_datasets'] ] else: obs_selection = [] # print out OBS's if obs_selection: logger.info("Observations dataset(s) %s", [obs['dataset'] for obs in obs_selection]) # determine CONTROL and EXPERIMENT datasets # corner case: they could be the same dataset name if cfg['control_model'] == cfg['exper_model']: logger.info("Identical Control/Experiment dataset names: %s", dataset_selection[0]['dataset']) control, experiment = _disentagle_iden_datasets(dataset_selection) return control, experiment, obs_selection # if they're not the same dataset, fire away for model in dataset_selection: if model['dataset'] == cfg['control_model']: logger.info("Control dataset %s", model['dataset']) control = model elif model['dataset'] == cfg['exper_model']: logger.info("Experiment dataset %s", model['dataset']) experiment = model return control, experiment, obs_selection
def main(cfg): """Calculate, visualize and save the bias and change for each model.""" metadata = cfg['input_data'].values() grouped_metadata = group_metadata(metadata, 'variable_group') biases = {} changes = {} ancestors = [] for group, metadata in grouped_metadata.items(): model_metadata = select_metadata(metadata, tag='model') model_data, model_ancestors = load_data(model_metadata) ancestors.extend(model_ancestors) variable = model_data.name if group.endswith('bias'): obs_metadata = select_metadata(metadata, tag='observations') obs_data, obs_ancestors = load_data(obs_metadata) ancestors.extend(obs_ancestors) bias = calculate_bias(model_data, obs_data) biases[variable] = bias elif group.endswith('change'): changes[variable] = model_data else: logger.warning( "Got input for variable group %s" " but I don't know what to do with it.", group) # Combine all variables bias = xr.Dataset(biases) change = xr.Dataset(changes) combined = xr.concat([bias, change], dim='metric') combined['metric'] = [ 'Bias (RMSD of all gridpoints)', 'Mean change (Future - Reference)' ] dataframe = combined.rename( tas='Temperature (K)', pr='Precipitation (kg/m2/s)', ).to_dataframe() dataframe.columns.name = 'variable' tidy_df = dataframe.stack('variable').unstack('metric') plot_scatter(tidy_df, ancestors, cfg) plot_table(tidy_df, ancestors, cfg) plot_htmltable(tidy_df, ancestors, cfg) return
def _get_ancestor_files(cfg, obs_name, projects=None): """Get ancestor files for provenance.""" if projects is None: projects = _get_project(cfg) if isinstance(projects, str): projects = [projects] datasets = [] for project in projects: datasets.extend( select_metadata(cfg['input_data'].values(), project=project)) datasets.extend( select_metadata(cfg['input_data'].values(), dataset=obs_name)) return [d['filename'] for d in datasets]
def make_plots(cfg, dataset, data, data_tci, varname_sm="mrlsl", varname_hf="hfls", varname_tci="tci"): """Shim routine between ESMValTool and the generic plot_tci().""" meta = select_metadata(data, short_name=varname_sm)[0] filename_maps = _get_plot_filename(meta, cfg, varname_tci) model_desc = "{:s}, {:s}, {:s}, {:s}, {:d}-{:d}".format( meta["project"], meta["exp"], meta["dataset"], meta["ensemble"], meta["start_year"], meta["end_year"], ) title = ("Terrestrial Coupling Index " "({units}) {varname_sm} - {varname_hf}\n{model_desc}".format( units=str(data_tci.units), varname_sm=varname_sm, varname_hf=varname_hf, model_desc=model_desc, )) plot_tci(data_tci, filename_maps, title=title) return
def _get_datasets_for_ec(input_data): """Check input data.""" features = select_metadata(input_data, var_type='feature') labels = select_metadata(input_data, var_type='label') pred_input = select_metadata(input_data, var_type='prediction_input') pred_input_err = select_metadata(input_data, var_type='prediction_input_error') data_to_check = { 'feature': features, 'label': labels, 'prediction_input': pred_input, 'prediction_input_error': pred_input_err, } for (name, data) in data_to_check.items(): _check_datasets(data, name) return (features, labels, pred_input, pred_input_err)
def init_mkthe_te(model, wdir, input_data): """Compute auxiliary fields or perform time averaging of existing fields. Arguments: - model: the model name; - wdir: the working directory where the outputs are stored; - filelist: a list of file names containing the input fields; Author: Valerio Lembo, University of Hamburg (2019). """ cdo = Cdo() rlut_file = e.select_metadata(input_data, short_name='rlut', dataset=model)[0]['filename'] # Compute monthly mean fields from 2D surface daily fields # emission temperature te_file = wdir + '/{}_te.nc'.format(model) cdo.sqrt(input="-sqrt -mulc,{} {}".format(SIGMAINV, rlut_file), output=te_file) te_ymm_file = wdir + '/{}_te_ymm.nc'.format(model) cdo.yearmonmean(input=te_file, output=te_ymm_file) te_gmean_file = wdir + '/{}_te_gmean.nc'.format(model) cdo.timmean(input='-fldmean {}'.format(te_ymm_file), output=te_gmean_file) with Dataset(te_gmean_file) as f_l: te_gmean_constant = f_l.variables['rlut'][0, 0, 0] return te_ymm_file, te_gmean_constant, te_file
def _get_mmm_tas(rad_var, rad_datasets, tas_datasets): """Get multi-model mean for tas data.""" logger.debug( "Calculating multi-model mean 'tas' for radiation variable '%s'", rad_var) ancestors = [] dataset_names = [] mmm = [] for dataset_name in [d['dataset'] for d in rad_datasets]: tas_data = select_metadata(tas_datasets, dataset=dataset_name) if not tas_data: raise ValueError( f"No 'tas' data for dataset '{dataset_name}' available for " f"multi-model mean calculation") cube = tas_data[0]['cube'] ancestors.extend(tas_data[0]['ancestors']) dataset_names.append(dataset_name) mmm.append(cube.data) _check_array_shapes(mmm, 'tas') mmm = np.ma.array(mmm) mmm_cube = cube.copy(data=np.ma.mean(mmm, axis=0)) attributes = { 'ancestors': ancestors, 'dataset': 'MultiModelMean', 'datasets': '|'.join(dataset_names), 'project': rad_datasets[0]['project'], 'short_name': _get_tas_var('MultiModelMean', rad_var), } mmm_cube.attributes = attributes return {**attributes, 'cube': mmm_cube}
def _cmip_envelope(datasetlist, variable, target_year): """Determine the change in <variable> PDF of each CMIP model. Note: using mf_dataset not possible due to different calendars. """ cmip = select_metadata(datasetlist, variable_group=f'{variable}_cmip') envelope = [] ancestors = [] for data_dict in cmip: dataset = xr.open_dataset(data_dict['filename'])[variable] control = dataset.sel(time=slice('1981', '2010')) future = dataset.sel(time=slice(str(target_year - 15), str(target_year + 15))) quantiles = [.05, .1, .25, .5, .75, .90, .95] qcontrol = control.groupby('time.season').quantile(quantiles) qfuture = future.groupby('time.season').quantile(quantiles) if variable == 'tas': # absolute diff envelope.append(qfuture - qcontrol) else: # pr; relative diff envelope.append((qfuture - qcontrol) / qcontrol * 100) ancestors.append(data_dict['filename']) cmip = xr.concat(envelope, dim='multimodel') provenance = _create_provenance_record(ancestors) # Prevent confusion between dimension 'quantile' and method 'quantile' return cmip.rename({'quantile': 'percentile'}), provenance
def get_residual_data(cfg): """Get residual data.""" input_data = mlr_plot.get_input_datasets(cfg) residual_data = select_metadata(input_data, var_type='prediction_residual') if not residual_data: raise ValueError("No 'prediction_residual' data found") return group_metadata(residual_data, 'mlr_model_name')
def _set_axx_exfig2b(axx, cfg, datasets, reg_dict, sa_dict): """Text for exfig2b.""" axx.plot(np.linspace(0.2, 1.4, 2), reg_dict["y_rsnst"], color='r') for iii, model in enumerate(datasets): proj = (select_metadata(cfg['input_data'].values(), dataset=model))[0]['project'] style = e.plot.get_dataset_style(model, style_file=proj.lower()) axx.plot(sa_dict["rsnstcsdt"][iii], sa_dict["rsnstdt"][iii], marker=style['mark'], color=style['color'], markerfacecolor=style['facecolor'], linestyle='none', markersize=10, markeredgewidth=2.0, label=model) axx.set_xlabel(r'drsnstcs/dtas (W m$^{-2}$ K$^{-1}$)') axx.set_title(' ') axx.set_ylabel(r'drsnst/dtas (W m$^{-2}$ K$^{-1}$)') axx.set_xlim([0.45, 1.15]) axx.set_xticks(np.linspace(0.5, 1.1, 7)) axx.set_ylim([0.45, 1.15]) axx.set_yticks(np.linspace(0.5, 1.1, 7)) axx.text( 0.85, 1.1, 'Fit (r={:.2f}, '.format(reg_dict["rsnst"].rvalue) + ' slope = {:.2f}, '.format(reg_dict["rsnst"].slope) + ')') axx.legend(loc=2) return axx
def make_daily_var(cfg, input_data, short_name, getweights, metadata, scale=1, offset=0): """Wrapper for dry_spell_rwr.utc_to_lt.make_daily_var() to derive some args from the ESMValTool config. """ var_meta = select_metadata(input_data, short_name=short_name)[0] logger.info(var_meta) files_var = [var_meta["filename"], ] var_name = var_meta["short_name"] local_time = var_meta["local_time"] model_grid, file_sftlf = _get_model_grid(input_data) ut_var, ts_pad = _get_time_axis(files_var[0]) logger.info("ts_pad = %s", ts_pad) file_out = _get_filename(var_meta, cfg) utc.make_daily_var(files_var, var_name, local_time, getweights, model_grid, metadata, ut_var, tsPad=ts_pad, scale=scale, offset=offset, file_out=file_out) record_var = _get_provenance_record({}, files_var + [file_sftlf, ]) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(file_out, record_var) return file_out
def main(cfg): """Compute the time average for each input dataset.""" # Get a description of the preprocessed data that we will use as input. input_data = cfg['input_data'].values() # Demonstrate use of metadata access convenience functions. selection = select_metadata(input_data, short_name='pr', project='CMIP5') logger.info("Example of how to select only CMIP5 precipitation data:\n%s", pformat(selection)) selection = sorted_metadata(selection, sort='dataset') logger.info("Example of how to sort this selection by dataset:\n%s", pformat(selection)) grouped_input_data = group_metadata(input_data, 'standard_name', sort='dataset') logger.info( "Example of how to group and sort input data by standard_name:" "\n%s", pformat(grouped_input_data)) # Example of how to loop over variables/datasets in alphabetical order for standard_name in grouped_input_data: logger.info("Processing variable %s", standard_name) for attributes in grouped_input_data[standard_name]: logger.info("Processing dataset %s", attributes['dataset']) input_file = attributes['filename'] cube = compute_diagnostic(input_file) output_basename = os.path.splitext( os.path.basename(input_file))[0] + '_mean' provenance_record = get_provenance_record( attributes, ancestor_files=[input_file]) plot_diagnostic(cube, output_basename, provenance_record, cfg)
def calculate_ecs(input_data, cfg, description=None): """Calculate ECS and net climate feedback parameters.""" logger.info("Calculating ECS and net climate feedback parameter") msg = '' if description is None else f' for {description}' ancestors = [] ecs = {} feedback_parameter = {} # Iterate over all datasets and save ECS and feedback parameters for dataset in select_metadata(input_data, short_name='tas'): dataset_name = dataset['dataset'] logger.debug("Calculating ECS%s of dataset '%s'", msg, dataset_name) rtnt_data = select_metadata(input_data, short_name='rtnt', dataset=dataset_name) if not rtnt_data: logger.debug( "No 'rtmt' or 'rtnt' data for '%s' available, skipping ECS " "calculation for it", dataset_name) continue tas_cube = dataset['cube'] rtnt_cube = rtnt_data[0]['cube'] if rtnt_cube.ndim > 2: raise ValueError( f"Calculating ECS is only supported for cubes with less than " f"3 dimensions, got {rtnt_cube.ndim:d}D cube") ancestors.extend(dataset['ancestors'] + rtnt_data[0]['ancestors']) coords = [(coord, idx - 1) for (idx, coord) in enumerate(rtnt_cube.coords(dim_coords=True)) if coord.name() != 'time'] # Calculate ECS (using linear regression) reg = _vectorized_linregress(_get_data_time_last(tas_cube), _get_data_time_last(rtnt_cube)) ecs[dataset_name] = iris.cube.Cube(-reg[1] / (2 * reg[0]), dim_coords_and_dims=coords) feedback_parameter[dataset_name] = iris.cube.Cube( reg[0], dim_coords_and_dims=coords) ancestors = list(set(ancestors)) if not ecs: logger.info( "No 'rtmt' or 'rtnt' data available, skipping ECS calculation") return # Write data _write_scalar_data([ecs, feedback_parameter], ancestors, cfg, description)
def _get_error_datasets(input_data, **kwargs): """Extract error datasets from input data.""" input_data = select_metadata(input_data, **kwargs) error_data = [] for dataset in input_data: if dataset.get('stderr', False): error_data.append(dataset) return error_data
def _get_sel_files_var(cfg, varnames): """Get filenames from cfg for all model mean and differen variables.""" selection = [] for var in varnames: for hlp in select_metadata(cfg['input_data'].values(), short_name=var): selection.append(hlp['filename']) return selection
def _get_ec_ancestors(cfg): """Get ancestor files for emergent constraint.""" input_data = _get_input_data(cfg) ancestors = [] for var_type in ('feature', 'label', 'prediction_input', 'prediction_input_error'): datasets = select_metadata(input_data, var_type=var_type) ancestors.extend([d['filename'] for d in datasets]) return ancestors
def _get_cube(datasets, short_name): """Get cube with specific ``'short_name'`` from datasets.""" datasets = select_metadata(datasets, short_name=short_name) if len(datasets) != 1: raise ValueError( f"Expected exactly one dataset with short_name '{short_name}', " f"got {len(datasets):d}:\n{datasets}") return iris.load_cube(datasets[0]['filename'], ih.var_name_constraint(short_name))
def calculate_tcr(cfg): """Calculate transient climate response (TCR).""" tcr = {} # Get data input_data = cfg['input_data'].values() onepct_data = select_metadata(input_data, short_name='tas', exp='1pctCO2') # Iterate over all datasets for dataset in onepct_data: pi_data = select_metadata(input_data, short_name='tas', exp='piControl', dataset=dataset['dataset']) if not pi_data: raise ValueError(f"No 'piControl' data available for dataset " f"'{dataset['dataset']}'") onepct_cube = iris.load_cube(dataset['filename']) pi_cube = iris.load_cube(pi_data[0]['filename']) # Get anomaly cube anomaly_cube = _get_anomaly_cube(onepct_cube, pi_cube) # Calculate TCR tas_2x = anomaly_cube[START_YEAR_IDX:END_YEAR_IDX].collapsed( 'time', iris.analysis.MEAN).data new_tcr = tas_2x tcr[dataset['dataset']] = new_tcr logger.info("TCR (%s) = %.2f %s", dataset['dataset'], new_tcr, anomaly_cube.units) # Plot (path, provenance_record) = _plot(cfg, anomaly_cube, dataset['dataset'], new_tcr) if path is not None: provenance_record['ancestors'] = [ dataset['filename'], pi_data[0]['filename'], ] with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(path, provenance_record) return tcr