def main(cfg): """Run the diagnostic.""" input_data = ( select_metadata(cfg['input_data'].values(), short_name='tas') + select_metadata(cfg['input_data'].values(), short_name='tasa')) if not input_data: raise ValueError("This diagnostics needs 'tas' or 'tasa' variable") # Calculate psi for every dataset psis = {} psi_attrs = { 'short_name': 'psi', 'long_name': 'Temperature variability metric', 'units': 'K', } grouped_data = group_metadata(input_data, 'dataset') for (dataset, [data]) in grouped_data.items(): logger.info("Processing %s", dataset) cube = iris.load_cube(data['filename']) iris.coord_categorisation.add_year(cube, 'time') cube = cube.aggregated_by('year', iris.analysis.MEAN) psi_cube = calculate_psi(cube, cfg) data.update(psi_attrs) data.pop('standard_name', '') # Provenance caption = ("Temporal evolution of temperature variability metric psi " "between {start_year} and {end_year} for {dataset}.".format( **data)) provenance_record = get_provenance_record(caption, [data['filename']]) out_path = get_diagnostic_filename('psi_' + dataset, cfg) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(out_path, provenance_record) # Save psi for every dataset data['filename'] = out_path io.metadata_to_netcdf(psi_cube, data) # Save averaged psi psis[dataset] = np.mean(psi_cube.data) # Save averaged psis for every dataset in one file out_path = get_diagnostic_filename('psi', cfg) io.save_scalar_data(psis, out_path, psi_attrs, attributes=psi_cube.attributes) # Provenance caption = "{long_name} for mutliple climate models.".format(**psi_attrs) ancestor_files = [d['filename'] for d in input_data] provenance_record = get_provenance_record(caption, ancestor_files) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(out_path, provenance_record)
def select_final_subset(cfg, subsets, prov=None): """Select sample with minimal reuse of ensemble segments. Final set of eight samples should have with minimal reuse of the same ensemble member for the same period. From 10.000 randomly selected sets of 8 samples, count and penalize re-used segments (1 for 3*reuse, 5 for 4*reuse). Choose the set with the lowest penalty. """ n_samples = cfg['n_samples'] all_scenarios = {} for scenario, dataframes in subsets.items(): # Make a table with the final indices LOGGER.info("Selecting %s final samples for scenario %s", n_samples, scenario) control = _best_subset(dataframes['control'].combination, n_samples) future = _best_subset(dataframes['future'].combination, n_samples) table = pd.concat([control, future], axis=1, keys=['control', 'future']) all_scenarios[scenario] = table # Store the output filename = get_diagnostic_filename(f'indices_{scenario}', cfg, extension='csv') table.to_csv(filename) LOGGER.info("Selected recombinations for scenario %s: \n %s", scenario, table) LOGGER.info('Output stored as %s', filename) # Write provenance information with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(filename, prov) return all_scenarios
def plot_htmltable(dataframe, ancestors, cfg): """Render pandas table as html output. # https://pandas.pydata.org/pandas-docs/stable/user_guide/style.html """ styles = [ { "selector": ".index_name", "props": [("text-align", "right")] }, { "selector": ".row_heading", "props": [("text-align", "right")] }, { "selector": "td", "props": [("padding", "3px 25px")] }, ] styled_table = dataframe\ .unstack('variable')\ .style\ .set_table_styles(styles)\ .background_gradient(cmap='RdYlGn', low=0, high=1, axis=0)\ .format("{:.2e}", na_rep="-")\ .render() filename = get_diagnostic_filename('bias_vs_change', cfg, extension='html') with open(filename, 'w') as htmloutput: htmloutput.write(styled_table) caption = "Bias and change for each variable" log_provenance(filename, ancestors, caption, cfg)
def plot_climatology(cfg, metadata): """Plot climatology data.""" short_name = 'pr' datasets = read_input_data(metadata) var = datasets[short_name] xaxis = var.dims[-1] # i.e. month_number / day_of_year xlabel = xaxis.replace('_', ' ') caption = f'{var.long_name} climatology statistics per {xlabel}' ancestors = [info['filename'] for info in metadata] name = f'{var.long_name}_climatology_{xaxis}' plot_data( cfg=cfg, datasets=datasets, xaxis=xaxis, yaxis=short_name, xlabel=xlabel.capitalize(), ylabel=f'{var.long_name} / {var.units}', caption=caption, name=name, ancestors=ancestors, ) filename_data = get_diagnostic_filename(name, cfg, extension='nc') datasets.to_netcdf(filename_data) log_provenance(caption, filename_data, cfg, ancestors)
def plot_timeseries(cfg, metadata): """Plot timeseries data.""" short_name = 'pr' xaxis = 'time' datasets = read_input_data(metadata) ancestors = [info['filename'] for info in metadata] time_period = cfg['time_period'] var = datasets[short_name] time_unit = time_period[0].upper() start_date = np.datetime_as_string(datasets.time.min(), unit=time_unit) end_date = np.datetime_as_string(datasets.time.max(), unit=time_unit) name = f'{var.long_name}_{time_period}' caption = f"{var.long_name} per {time_period} for {start_date}:{end_date}" plot_data( cfg=cfg, datasets=datasets, xaxis=xaxis, yaxis=short_name, xlabel=f'{xaxis.capitalize()} / {time_period}', ylabel=f'{var.long_name} / {var.units}', caption=caption, name=name, ancestors=ancestors, ) filename_data = get_diagnostic_filename(name, cfg, extension='nc') datasets.to_netcdf(filename_data) log_provenance(caption, filename_data, cfg, ancestors)
def visualize_and_save_independence(independence: 'xr.DataArray', cfg: dict, ancestors: list): """Visualize independence.""" variable = independence.variable_group labels = list(independence.model_ensemble.values) figure, axes = plt.subplots(figsize=(15, 15), subplot_kw={'aspect': 'equal'}) chart = sns.heatmap( independence, linewidths=1, cmap="YlGn", xticklabels=labels, yticklabels=labels, cbar_kws={'label': f'Euclidean distance ({independence.units})'}, ax=axes, ) chart.set_title(f'Distance matrix for {variable}') filename_plot = get_plot_filename(f'independence_{variable}', cfg) figure.savefig(filename_plot, dpi=300, bbox_inches='tight') plt.close(figure) filename_data = get_diagnostic_filename(f'independence_{variable}', cfg, extension='nc') independence.to_netcdf(filename_data) caption = f'Euclidean distance matrix for variable {variable}' log_provenance(caption, filename_plot, cfg, ancestors) log_provenance(caption, filename_data, cfg, ancestors)
def test_get_diagnostic_filename(): cfg = { 'work_dir': '/some/path', } filename = shared.get_diagnostic_filename('test', cfg) assert filename == '/some/path/test.nc'
def test_get_diagnostic_filename_ext(): cfg = { 'work_dir': '/some/path', } filename = shared.get_diagnostic_filename('test', cfg, extension='csv') assert filename == '/some/path/test.csv'
def main(cfg): """Run the diagnostic.""" cfg = get_default_settings(cfg) diag = check_cfg(cfg) sns.set(**cfg.get('seaborn_settings', {})) # Get input data input_data = list(cfg['input_data'].values()) input_data.extend(io.netcdf_to_metadata(cfg, pattern=cfg.get('pattern'))) input_data = deepcopy(input_data) check_input_data(input_data) grouped_data = group_metadata(input_data, 'dataset') # Calculate X-axis of emergent constraint diag_func = globals()[diag] (diag_data, var_attrs, attrs) = diag_func(grouped_data, cfg) attrs.update(get_global_attributes(input_data, cfg)) # Save data netcdf_path = get_diagnostic_filename(diag, cfg) io.save_scalar_data(diag_data, netcdf_path, var_attrs, attributes=attrs) logger.info("Found data:\n%s", pformat(diag_data)) # Provenance provenance_record = ec.get_provenance_record( {diag: attrs}, [diag], caption=attrs['plot_xlabel'], ancestors=[d['filename'] for d in input_data]) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(netcdf_path, provenance_record)
def main(cfg): """Process data for use as input to the PCR-GLOBWB hydrological model.""" input_data = cfg['input_data'].values() grouped_input_data = group_metadata(input_data, 'standard_name', sort='dataset') for standard_name in grouped_input_data: logger.info("Processing variable %s", standard_name) for attributes in grouped_input_data[standard_name]: logger.info("Processing dataset %s", attributes['dataset']) input_file = attributes['filename'] cube = iris.load_cube(input_file) # Round times to integer number of days time_coord = cube.coord('time') time_coord.points = da.floor(time_coord.core_points()) time_coord.bounds = None # Set lat from highest to lowest value cube = cube[:, ::-1, ...] # Save data output_file = get_diagnostic_filename( Path(input_file).stem + '_pcrglobwb', cfg) iris.save(cube, output_file, fill_value=1.e20) # Store provenance provenance_record = get_provenance_record(input_file) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(output_file, provenance_record)
def _provenance_map_spei(cfg, name_dict, spei, dataset_name): """Set provenance for plot_map_spei.""" caption = 'Global map of ' + \ name_dict['drought_char'] + \ ' [' + name_dict['unit'] + '] ' + \ 'based on ' + cfg['indexname'] + '.' if cfg['indexname'].lower == "spei": set_refs = ['martin18grl', 'vicente10jclim', ] elif cfg['indexname'].lower == "spi": set_refs = ['martin18grl', 'mckee93proc', ] else: set_refs = ['martin18grl', ] provenance_record = get_provenance_record([name_dict['input_filenames']], caption, ['global'], set_refs) diagnostic_file = get_diagnostic_filename(cfg['indexname'] + '_map' + name_dict['add_to_filename'] + '_' + dataset_name, cfg) logger.info("Saving analysis results to %s", diagnostic_file) cubesave = cube_to_save_ploted(spei, name_dict) iris.save(cubesave, target=diagnostic_file) logger.info("Recording provenance of %s:\n%s", diagnostic_file, pformat(provenance_record)) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(diagnostic_file, provenance_record)
def _provenance_time_series_spei(cfg, data_dict): """Provenance for time series plots.""" caption = 'Time series of ' + \ data_dict['var'] + \ ' at' + data_dict['area'] + '.' if cfg['indexname'].lower == "spei": set_refs = ['vicente10jclim', ] elif cfg['indexname'].lower == "spi": set_refs = ['mckee93proc', ] else: set_refs = ['martin18grl', ] provenance_record = get_provenance_record([data_dict['filename']], caption, ['reg'], set_refs, plot_type='times') diagnostic_file = get_diagnostic_filename(cfg['indexname'] + '_time_series_' + data_dict['area'] + '_' + data_dict['dataset_name'], cfg) logger.info("Saving analysis results to %s", diagnostic_file) cubesave = cube_to_save_ploted_ts(data_dict) iris.save(cubesave, target=diagnostic_file) logger.info("Recording provenance of %s:\n%s", diagnostic_file, pformat(provenance_record)) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(diagnostic_file, provenance_record)
def _create_regression_file(tas_cube, cube, dataset_name, cfg, description=None): """Save regression plot as netcdf file for a given dataset.""" var = cube.var_name reg = stats.linregress(tas_cube.data, cube.data) filename = f'{var}_regression_{dataset_name}' attrs = { 'dataset': dataset_name, 'regression_r_value': reg.rvalue, 'regression_slope': reg.slope, 'regression_interception': reg.intercept, 'feedback_parameter': reg.slope, } attrs.update(cfg.get('output_attributes', {})) if description is not None: attrs['description'] = description filename += f"_{description.replace(' ', '_')}" if var in ('rtmt', 'rtnt'): attrs['ECS'] = -reg.intercept / (2.0 * reg.slope) tas_coord = iris.coords.AuxCoord( tas_cube.data, **extract_variables(cfg, as_iris=True)['tas']) cube = iris.cube.Cube(cube.data, attributes=attrs, aux_coords_and_dims=[(tas_coord, 0)], **extract_variables(cfg, as_iris=True)[var]) netcdf_path = get_diagnostic_filename(filename, cfg) io.iris_save(cube, netcdf_path) return netcdf_path
def main(cfg): """Process data for use as input to the HYPE hydrological model.""" input_data = cfg['input_data'].values() grouped_input_data = group_metadata(input_data, 'long_name', sort='dataset') for long_name in grouped_input_data: logger.info("Processing variable %s", long_name) for attributes in grouped_input_data[long_name]: logger.info("Processing dataset %s", attributes['dataset']) output_file = get_diagnostic_filename(get_output_stem(attributes), cfg, 'txt') Path(output_file).parent.mkdir(exist_ok=True) data, times, ids = get_data_times_and_ids(attributes) frame = pandas.DataFrame(data, index=times, columns=ids) frame.to_csv(output_file, sep=' ', index_label="DATE", float_format='%.3f') # Store provenance provenance_record = get_provenance_record(attributes) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(output_file, provenance_record)
def main(cfg): """Rename preprocessed native6 file.""" fixed_files = cfg['input_data'] for file, info in fixed_files.items(): stem = Path(file).stem basename = stem.replace('native', 'OBS') if info['diagnostic'] == 'daily': for mip in ['day', 'Eday', 'CFday']: if CMOR_TABLES['CMIP6'].get_variable(mip, info['short_name']): basename = basename.replace('E1hr', mip) basename = basename.replace('E1hr', 'day') cube = iris.load_cube(file) try: time = cube.coord('time') except iris.exceptions.CoordinateNotFoundError: pass else: if info['diagnostic'] == "monthly": start = time.cell(0).point.strftime("%Y%m") end = time.cell(-1).point.strftime("%Y%m") else: start = time.cell(0).point.strftime("%Y%m%d") end = time.cell(-1).point.strftime("%Y%m%d") basename = f"{basename.rstrip('0123456789-')}{start}-{end}" outfile = get_diagnostic_filename(basename, cfg) logger.info('Moving %s to %s', file, outfile) shutil.move(file, outfile)
def plot_bar_deangelis(cfg, data_var_sum, available_exp, available_vars): """Plot linear regression used to calculate ECS.""" if not cfg[n.WRITE_PLOTS]: return # Plot data fig, axx = plt.subplots() set_colors = [ 'cornflowerblue', 'orange', 'silver', 'limegreen', 'rosybrown', 'orchid' ] bar_width = 1.0 / float(len(available_vars)) for iii, iexp in enumerate(available_exp): axx.bar(np.arange(len(available_vars)) + bar_width * float(iii), data_var_sum[iexp], bar_width, color=set_colors[iii], label=iexp) axx.set_xlabel(' ') axx.set_ylabel(r'Model mean (W m$^{-2}$)') axx.set_title(' ') axx.set_xticks(np.arange(len(available_vars)) + bar_width) axx.set_xticklabels(available_vars) axx.legend(loc=1) fig.tight_layout() fig.savefig(get_plot_filename('bar_all', cfg), dpi=300) plt.close() caption = 'Global average multi-model mean comparing different ' + \ 'model experiments and flux variables.' provenance_record = get_provenance_record( _get_sel_files_var(cfg, available_vars), caption, ['mean'], ['global']) diagnostic_file = get_diagnostic_filename('bar_all', cfg) logger.info("Saving analysis results to %s", diagnostic_file) list_dict = {} list_dict["data"] = [] list_dict["name"] = [] for iexp in available_exp: list_dict["data"].append(data_var_sum[iexp]) list_dict["name"].append({ 'var_name': iexp + '_all', 'long_name': 'Fluxes for ' + iexp + ' experiment', 'units': 'W m-2' }) iris.save(cube_to_save_vars(list_dict), target=diagnostic_file) logger.info("Recording provenance of %s:\n%s", diagnostic_file, pformat(provenance_record)) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(diagnostic_file, provenance_record)
def plot_cdf(cfg, psi_cube, ecs_cube, obs_cube): """Plot cumulative distribution function of ECS.""" confidence_level = cfg.get('confidence_level', 0.66) (ecs_lin, ecs_pdf) = ec.gaussian_pdf(psi_cube.data, ecs_cube.data, np.mean(obs_cube.data), np.std(obs_cube.data)) ecs_cdf = ec.cdf(ecs_lin, ecs_pdf) # Provenance filename = 'cdf_{}'.format(obs_cube.attributes['dataset']) netcdf_path = get_diagnostic_filename(filename, cfg) cube = iris.cube.Cube(ecs_cdf, var_name='cdf', long_name='Cumulative distribution function', units='1') cube.add_aux_coord( iris.coords.AuxCoord(ecs_lin, **ih.convert_to_iris(ECS_ATTRS)), 0) io.iris_save(cube, netcdf_path) project = _get_project(cfg) provenance_record = get_provenance_record( "The CDF for ECS. The horizontal dot-dashed lines show the {}% " "confidence limits. The orange histograms show the prior " "distributions that arise from equal weighting of the {} models in " "0.5 K bins.".format(int(confidence_level * 100), project), ['mean'], ['other'], _get_ancestor_files(cfg, obs_cube.attributes['dataset'])) # Plot if cfg['write_plots']: AXES.plot(ecs_lin, ecs_cdf, color='black', linewidth=2.0, label='Emergent constraint') AXES.hist(ecs_cube.data, bins=6, range=(2.0, 5.0), cumulative=True, density=True, color='orange', label='{} models'.format(project)) AXES.axhline((1.0 - confidence_level) / 2.0, color='black', linestyle='dashdot') AXES.axhline((1.0 + confidence_level) / 2.0, color='black', linestyle='dashdot') # Plot appearance AXES.set_title('CDF of emergent constraint') AXES.set_xlabel('ECS / K') AXES.set_ylabel('CDF') legend = AXES.legend(loc='upper left') # Save plot provenance_record['plot_file'] = _save_fig(cfg, filename, legend) # Write provenance with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(netcdf_path, provenance_record)
def su(grouped_data, cfg): """Su et al. (2014) constraint.""" metric = cfg['metric'] logger.info("Found metric '%s' for Su et al. (2014) constraint", metric) # Extract cubes (var_name, reference_datasets) = _get_su_variable(grouped_data) cube_dict = _get_su_cube_dict(grouped_data, var_name, reference_datasets) diag_data = {} ref_cube = cube_dict[reference_datasets] # Variable attributes var_attrs = { 'short_name': 'alpha' if metric == 'regression_slope' else 'rho', 'long_name': f"Error in vertically-resolved tropospheric " f"zonal-average {ref_cube.long_name} between 40°N and " f"45°S expressed as {metric.replace('_', ' ')} between " f"model data and observations", 'units': '1', } attrs = { 'plot_xlabel': f'Model performance in {ref_cube.long_name} [1]', 'plot_title': 'Su et al. (2014) constraint', 'provenance_authors': ['schlund_manuel'], 'provenance_domains': ['trop', 'midlat'], 'provenance_realms': ['atmos'], 'provenance_references': ['su14jgr'], 'provenance_statistics': ['corr'], 'provenance_themes': ['EC'], } # Calculate constraint for (dataset_name, cube) in cube_dict.items(): logger.info("Processing dataset '%s'", dataset_name) # Plot cube if cube.ndim == 2: iris.quickplot.contourf(cube) filename = f"su_{dataset_name.replace('|', '_')}" plot_path = get_plot_filename(filename, cfg) plt.savefig(plot_path, **cfg['savefig_kwargs']) logger.info("Wrote %s", plot_path) plt.close() # Provenance netcdf_path = get_diagnostic_filename(filename, cfg) io.iris_save(cube, netcdf_path) ancestors = cube.attributes['ancestors'].split('|') provenance_record = ec.get_provenance_record( {'su': attrs}, ['su'], caption=f'{cube.long_name} for {dataset_name}.', plot_type='zonal', plot_file=plot_path, ancestors=ancestors) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(netcdf_path, provenance_record) # Similarity metric diag_data[dataset_name] = _similarity_metric(cube, ref_cube, metric) return (diag_data, var_attrs, attrs)
def write_data(config, data): """Write all the calculated data to output file.""" cubes = iris.cube.CubeList([data['equatorial_ref']] + data['zonal_mean_errors'] + data['equatorials'] + data['equatorial_errors']) path = get_diagnostic_filename('fig-9-14', config) iris.save(cubes, path) return path
def mapplot(dataarray, cfg, title_pattern, filename_part, ancestors, **colormesh_args): """Visualize weighted temperature.""" period = '{start_year}-{end_year}'.format(**read_metadata(cfg)['tas'][0]) if 'tas_reference' in read_metadata(cfg).keys(): meta = read_metadata(cfg)['tas_reference'] period = 'change: {} minus {start_year}-{end_year}'.format( period, **meta[0]) metric = cfg['model_aggregation'] if isinstance(metric, int): metric = f'{metric}perc' proj = ccrs.PlateCarree(central_longitude=0) figure, axes = plt.subplots(subplot_kw={'projection': proj}) dataarray = set_antimeridian(dataarray, cfg.get('antimeridian', 'pacific')) dataarray = dataarray.dropna('lon', how='all').dropna('lat', how='all') dataarray.plot.pcolormesh( ax=axes, transform=ccrs.PlateCarree(), levels=9, robust=True, extend='both', **colormesh_args # colorbar size often does not fit nicely # https://stackoverflow.com/questions/18195758/set-matplotlib-colorbar-size-to-match-graph # cbar_kwargs={'fraction': .021} ) lons = dataarray.lon.values lats = dataarray.lat.values longitude_formatter = LongitudeFormatter() latitude_formatter = LatitudeFormatter() default_xticks = np.arange(np.floor(lons.min()), np.ceil(lons.max()), 10) default_yticks = np.arange(np.floor(lats.min()), np.ceil(lats.max()), 10) axes.coastlines() axes.set_xticks(cfg.get('xticks', default_xticks), crs=proj) axes.set_yticks(cfg.get('yticks', default_yticks), crs=proj) axes.xaxis.set_ticks_position('both') axes.yaxis.set_ticks_position('both') axes.xaxis.set_major_formatter(longitude_formatter) axes.yaxis.set_major_formatter(latitude_formatter) axes.set_xlabel('') axes.set_ylabel('') title = title_pattern.format(metric=metric, period=period) axes.set_title(title) filename_plot = get_plot_filename(filename_part, cfg) figure.savefig(filename_plot, dpi=300, bbox_inches='tight') plt.close(figure) filename_data = get_diagnostic_filename(filename_part, cfg, extension='nc') dataarray.to_netcdf(filename_data) log_provenance(title, filename_plot, cfg, ancestors) log_provenance(title, filename_data, cfg, ancestors)
def save(output, cfg, provenance): """Save the output as csv file.""" scenarios = pd.DataFrame(output) filename = get_diagnostic_filename('scenarios', cfg, extension='csv') scenarios.to_csv(filename) print(scenarios.round(2)) print(f"Output written to {filename}") with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(filename, provenance)
def _get_filename(var_meta, cfg, extension="nc"): """Return a filename for output data.""" basename = "_".join([var_meta["project"], var_meta["dataset"], var_meta["exp"], var_meta["ensemble"], var_meta["short_name"]]) filename = get_diagnostic_filename(basename, cfg, extension=extension) return filename
def main(cfg): """Process data for use as input to the PCR-GLOBWB hydrological model.""" for dataset, metadata in group_metadata(cfg['input_data'].values(), 'dataset').items(): for short_name in "pr", "tas": logger.info("Processing variable %s for dataset %s", short_name, dataset) # Load preprocessed cubes for normal data and climatology var = select_metadata(metadata, variable_group=short_name)[0] cube = iris.load_cube(var['filename']) var_climatology = select_metadata( metadata, variable_group=short_name + '_climatology', )[0] cube_climatology = iris.load_cube(var_climatology['filename']) # Create a spin-up year for pcrglob based on the climatology data cube = add_spinup_year(cube, cube_climatology) # Round times to integer number of days time_coord = cube.coord('time') time_coord.points = da.floor(time_coord.core_points()) time_coord.bounds = None time_coord.guess_bounds() # Set lat from highest to lowest value cube = cube[:, ::-1, ...] # Workaround for bug in PCRGlob # (see https://github.com/UU-Hydro/PCR-GLOBWB_model/pull/13) for coord_name in ['latitude', 'longitude']: coord = cube.coord(coord_name) coord.points = coord.points + 0.001 # Unit conversion 'kg m-3 day-1' to 'm' precip (divide by density) if short_name == "pr": cube.units = cube.units / 'kg m-3 day-1' cube.data = cube.core_data() / 1000 # Save data basename = '_'.join([ 'pcrglobwb', Path(var['filename']).stem, cfg['basin'], ]) output_file = get_diagnostic_filename(basename, cfg) iris.save(cube, output_file, fill_value=1.e20) # Store provenance provenance_record = get_provenance_record( [var['filename'], var_climatology['filename']]) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(output_file, provenance_record)
def _plot(cfg, cube, dataset_name, tcr): """Create scatterplot of temperature anomaly vs. time.""" if not cfg.get('plot', True): return (None, None) logger.debug("Plotting temperature anomaly vs. time for '%s'", dataset_name) (_, axes) = plt.subplots() # Plot data x_data = np.arange(cube.shape[0]) y_data = cube.data axes.scatter(x_data, y_data, color='b', marker='o') # Plot lines line_kwargs = {'color': 'k', 'linewidth': 1.0, 'linestyle': '--'} axes.axhline(tcr, **line_kwargs) axes.axvline(START_YEAR_IDX, **line_kwargs) axes.axvline(END_YEAR_IDX, **line_kwargs) # Appearance units_str = (cube.units.symbol if cube.units.origin is None else cube.units.origin) axes.set_title(dataset_name) axes.set_xlabel('Years after experiment start') axes.set_ylabel(f'Temperature anomaly / {units_str}') axes.set_ylim([x_data[0] - 1, x_data[-1] + 1]) axes.set_ylim([-1.0, 7.0]) axes.text(0.0, tcr + 0.1, 'TCR = {:.1f} {}'.format(tcr, units_str)) # Save cube netcdf_path = get_diagnostic_filename(dataset_name, cfg) io.iris_save(cube, netcdf_path) # Save plot plot_path = get_plot_filename(dataset_name, cfg) plt.savefig(plot_path, **cfg['savefig_kwargs']) logger.info("Wrote %s", plot_path) plt.close() # Provenance provenance_record = get_provenance_record( f"Time series of the global mean surface air temperature anomaly " f"(relative to the linear fit of the pre-industrial control run) of " f"{dataset_name} for the 1% CO2 increase per year experiment. The " f"horizontal dashed line indicates the transient climate response " f"(TCR) defined as the 20 year average temperature anomaly centered " f"at the time of CO2 doubling (vertical dashed lines).") provenance_record.update({ 'plot_file': plot_path, 'plot_types': ['times'], }) return (netcdf_path, provenance_record)
def main(diag_config): """ Diagnostic function to compare the zonal turnover time. Argument: -------- diag_config - nested dictionary of metadata """ model_data_dict = group_metadata(diag_config['input_data'].values(), 'dataset') fig_config = _get_fig_config(diag_config) zonal_tau_mod = {} for model_name, model_dataset in model_data_dict.items(): zonal_tau_mod[model_name] = {} ctotal = _load_variable(model_dataset, 'ctotal') gpp = _load_variable(model_dataset, 'gpp') zonal_tau_mod[model_name] = _calc_zonal_tau(gpp, ctotal, fig_config) zonal_tau_obs = _get_obs_data_zonal(diag_config) obs_var = diag_config.get('obs_variable')[0] tau_obs = zonal_tau_obs[obs_var] base_name = '{title}_{source_label}_{grid_label}z'.format( title=tau_obs.long_name, source_label=diag_config['obs_info']['source_label'], grid_label=diag_config['obs_info']['grid_label']) provenance_record = _get_provenance_record( "Comparison of latitudinal (zonal) variations of observation-based and" " modelled ecosystem carbon turnover time. The zonal turnover time is" " calculated as the ratio of zonal `ctotal` and `gpp`. Reproduces " " figure 2a and 2b in Carvalhais et al. (2014).", ['mean', 'perc'], ['zonal'], _get_ancestor_files(diag_config, obs_var)) if diag_config['write_netcdf']: model_cubes = [ c for c in zonal_tau_mod.values() if isinstance(c, iris.cube.Cube) ] obs_cubes = [ c for c in zonal_tau_obs.values() if isinstance(c, iris.cube.Cube) ] netcdf_path = get_diagnostic_filename(base_name, diag_config) save_cubes = iris.cube.CubeList(model_cubes + obs_cubes) iris.save(save_cubes, netcdf_path) with ProvenanceLogger(diag_config) as provenance_logger: provenance_logger.log(netcdf_path, provenance_record) if diag_config['write_plots']: plot_path = get_plot_filename(base_name, diag_config) _plot_zonal_tau(plot_path, zonal_tau_mod, zonal_tau_obs, diag_config) with ProvenanceLogger(diag_config) as provenance_logger: provenance_logger.log(plot_path, provenance_record)
def main(cfg, input_data=None, description=None): """Run the diagnostic.""" cfg = deepcopy(cfg) cfg.setdefault('dtype', 'float64') cfg.setdefault('mlr_model_name', 'MMM') cfg.setdefault('weighted_samples', { 'area_weighted': True, 'time_weighted': True }) # Get data grouped_data = get_grouped_data(cfg, input_data=input_data) description = '' if description is None else f'_for_{description}' # Loop over all tags for (tag, datasets) in grouped_data.items(): logger.info("Processing label '%s'", tag) # Get label datasets and reference dataset if possible label_datasets = select_metadata(datasets, var_type='label') (ref_dataset, pred_name) = get_reference_dataset(datasets, tag) if pred_name is None: pred_name = cfg.get('prediction_name') # Calculate multi-model mean logger.info("Calculating multi-model mean") mmm_cube = get_mmm_cube(cfg, label_datasets) add_general_attributes(mmm_cube, tag=tag, prediction_name=pred_name) mmm_path = get_diagnostic_filename( f"mmm_{tag}_prediction{description}", cfg) io.iris_save(mmm_cube, mmm_path) write_provenance( cfg, mmm_path, [d['filename'] for d in label_datasets], f"Predicted {mmm_cube.long_name} of MMM model " f"{cfg['mlr_model_name']}.") # Estimate prediction error using cross-validation if 'mmm_error_type' in cfg: save_error(cfg, label_datasets, mmm_path, tag=tag, prediction_name=pred_name) # Calculate residuals if ref_dataset is not None: save_residuals(cfg, mmm_cube, ref_dataset, label_datasets, tag=tag, prediction_name=pred_name)
def main(cfg): # just load the pre-processed anomlies, and plot them # first read them in, pop into dictionaries keyed by model name # group by project first (CMIP5, CMIP6, UKCP) projects = group_metadata(cfg["input_data"].values(), "project") results = {} for p in projects: results[p] = {} if p == "UKCP18": # loop over ensembles models = group_metadata(projects[p], "ensemble") else: # loop over datasets models = group_metadata(projects[p], "dataset") for m in models: if len(models[m]) > 1: raise ValueError("Too many bits of data") fname = models[m][0]["filename"] data = iris.load_cube(fname) results[p][m] = data.data.item() # plot and save the results for p in projects: # use pandas to create data for a csv file results_df = pd.DataFrame.from_dict(results[p], orient='index') # save data as csv results_df.to_csv(get_diagnostic_filename(f"{p}_global_tas_anom", cfg, "csv"), header=False) # get list of models models = results[p].keys() # and corresponding values vals = [results[p][m] for m in models] fig, ax = plt.subplots(figsize=(12.8, 9.6)) # plot bar chart y_pos = np.arange(len(models)) colors = np.empty(len(models, ), dtype=str) colors[::2] = 'r' colors[1::2] = 'b' ax.barh(y_pos, vals, color=colors) ax.set_yticks(y_pos, labels=models) plot_fname = get_plot_filename(f'{p}_global_anomaly', cfg) fig.savefig(plot_fname) plt.tight_layout() plt.close(fig)
def write_data(cfg, all_data, metadata): """Write netcdf file.""" new_data = {} for (label, xy_data) in all_data.items(): for (idx, dataset_name) in enumerate(xy_data[0]): key = f'{label}-{dataset_name}' value = xy_data[1][idx] new_data[key] = value netcdf_path = get_diagnostic_filename(metadata['var_name'], cfg) var_attrs = metadata.copy() var_attrs['short_name'] = var_attrs.pop('var_name') io.save_scalar_data(new_data, netcdf_path, var_attrs) return netcdf_path
def get_provenance_record(cfg, basename, caption, extension, ancestor_files): """Create a provenance record describing the diagnostic data and plot.""" record = { 'caption': caption, 'statistics': ['other'], 'domains': ['global'], 'authors': ['berg_peter'], 'references': ['acknow_project'], 'ancestors': ancestor_files, } diagnostic_file = get_diagnostic_filename(basename, cfg, extension) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(diagnostic_file, record)
def plot_temperature_anomaly(cfg, tas_cubes, lambda_cube, obs_name): """Plot temperature anomaly versus time.""" for cube in tas_cubes.values(): cube.data -= np.mean( cube.extract( iris.Constraint(year=lambda cell: 1961 <= cell <= 1990)).data) # Save netcdf file and provenance filename = 'temperature_anomaly_{}'.format(obs_name) netcdf_path = get_diagnostic_filename(filename, cfg) io.save_1d_data(tas_cubes, netcdf_path, 'year', TASA_ATTRS) project = _get_project(cfg) provenance_record = get_provenance_record( "Simulated change in global temperature from {} models (coloured " "lines), compared to the global temperature anomaly from the {} " "dataset (black dots). The anomalies are relative to a baseline " "period of 1961-1990.".format(project, obs_name), ['anomaly'], ['times'], _get_ancestor_files(cfg, obs_name)) # Plot if cfg['write_plots']: models = lambda_cube.coord('dataset').points # Plot lines for model in models: cube = tas_cubes[model] AXES.plot(cube.coord('year').points, cube.data, color=_get_model_color(model, lambda_cube)) obs_style = plot.get_dataset_style('OBS', 'cox18nature') obs_cube = tas_cubes[obs_name] AXES.plot(obs_cube.coord('year').points, obs_cube.data, linestyle='none', marker='o', markeredgecolor=obs_style['color'], markerfacecolor=obs_style['color']) # Plot appearance AXES.set_title('Simulation of global warming record') AXES.set_xlabel('Year') AXES.set_ylabel('Temperature anomaly / K') legend = _get_line_plot_legend() # Save plot provenance_record['plot_file'] = _save_fig(cfg, filename, legend) # Write provenance with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(netcdf_path, provenance_record)