Exemple #1
0
def main(cfg):
    """Run the diagnostic."""
    input_data = (
        select_metadata(cfg['input_data'].values(), short_name='tas') +
        select_metadata(cfg['input_data'].values(), short_name='tasa'))
    if not input_data:
        raise ValueError("This diagnostics needs 'tas' or 'tasa' variable")

    # Calculate psi for every dataset
    psis = {}
    psi_attrs = {
        'short_name': 'psi',
        'long_name': 'Temperature variability metric',
        'units': 'K',
    }
    grouped_data = group_metadata(input_data, 'dataset')
    for (dataset, [data]) in grouped_data.items():
        logger.info("Processing %s", dataset)
        cube = iris.load_cube(data['filename'])
        iris.coord_categorisation.add_year(cube, 'time')
        cube = cube.aggregated_by('year', iris.analysis.MEAN)
        psi_cube = calculate_psi(cube, cfg)
        data.update(psi_attrs)
        data.pop('standard_name', '')

        # Provenance
        caption = ("Temporal evolution of temperature variability metric psi "
                   "between {start_year} and {end_year} for {dataset}.".format(
                       **data))
        provenance_record = get_provenance_record(caption, [data['filename']])
        out_path = get_diagnostic_filename('psi_' + dataset, cfg)
        with ProvenanceLogger(cfg) as provenance_logger:
            provenance_logger.log(out_path, provenance_record)

        # Save psi for every dataset
        data['filename'] = out_path
        io.metadata_to_netcdf(psi_cube, data)

        # Save averaged psi
        psis[dataset] = np.mean(psi_cube.data)

    # Save averaged psis for every dataset in one file
    out_path = get_diagnostic_filename('psi', cfg)
    io.save_scalar_data(psis,
                        out_path,
                        psi_attrs,
                        attributes=psi_cube.attributes)

    # Provenance
    caption = "{long_name} for mutliple climate models.".format(**psi_attrs)
    ancestor_files = [d['filename'] for d in input_data]
    provenance_record = get_provenance_record(caption, ancestor_files)
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(out_path, provenance_record)
def select_final_subset(cfg, subsets, prov=None):
    """Select sample with minimal reuse of ensemble segments.

    Final set of eight samples should have with minimal reuse
    of the same ensemble member for the same period.
    From 10.000 randomly selected sets of 8 samples, count
    and penalize re-used segments (1 for 3*reuse, 5 for 4*reuse).
    Choose the set with the lowest penalty.
    """
    n_samples = cfg['n_samples']
    all_scenarios = {}
    for scenario, dataframes in subsets.items():
        # Make a table with the final indices
        LOGGER.info("Selecting %s final samples for scenario %s", n_samples,
                    scenario)
        control = _best_subset(dataframes['control'].combination, n_samples)
        future = _best_subset(dataframes['future'].combination, n_samples)
        table = pd.concat([control, future],
                          axis=1,
                          keys=['control', 'future'])
        all_scenarios[scenario] = table

        # Store the output
        filename = get_diagnostic_filename(f'indices_{scenario}',
                                           cfg,
                                           extension='csv')
        table.to_csv(filename)
        LOGGER.info("Selected recombinations for scenario %s: \n %s", scenario,
                    table)
        LOGGER.info('Output stored as %s', filename)

        # Write provenance information
        with ProvenanceLogger(cfg) as provenance_logger:
            provenance_logger.log(filename, prov)
    return all_scenarios
Exemple #3
0
def plot_htmltable(dataframe, ancestors, cfg):
    """Render pandas table as html output.

    # https://pandas.pydata.org/pandas-docs/stable/user_guide/style.html
    """
    styles = [
        {
            "selector": ".index_name",
            "props": [("text-align", "right")]
        },
        {
            "selector": ".row_heading",
            "props": [("text-align", "right")]
        },
        {
            "selector": "td",
            "props": [("padding", "3px 25px")]
        },
    ]

    styled_table = dataframe\
        .unstack('variable')\
        .style\
        .set_table_styles(styles)\
        .background_gradient(cmap='RdYlGn', low=0, high=1, axis=0)\
        .format("{:.2e}", na_rep="-")\
        .render()

    filename = get_diagnostic_filename('bias_vs_change', cfg, extension='html')
    with open(filename, 'w') as htmloutput:
        htmloutput.write(styled_table)

    caption = "Bias and change for each variable"
    log_provenance(filename, ancestors, caption, cfg)
def plot_climatology(cfg, metadata):
    """Plot climatology data."""
    short_name = 'pr'

    datasets = read_input_data(metadata)
    var = datasets[short_name]

    xaxis = var.dims[-1]  # i.e. month_number / day_of_year
    xlabel = xaxis.replace('_', ' ')
    caption = f'{var.long_name} climatology statistics per {xlabel}'

    ancestors = [info['filename'] for info in metadata]

    name = f'{var.long_name}_climatology_{xaxis}'

    plot_data(
        cfg=cfg,
        datasets=datasets,
        xaxis=xaxis,
        yaxis=short_name,
        xlabel=xlabel.capitalize(),
        ylabel=f'{var.long_name} / {var.units}',
        caption=caption,
        name=name,
        ancestors=ancestors,
    )

    filename_data = get_diagnostic_filename(name, cfg, extension='nc')
    datasets.to_netcdf(filename_data)
    log_provenance(caption, filename_data, cfg, ancestors)
def plot_timeseries(cfg, metadata):
    """Plot timeseries data."""
    short_name = 'pr'
    xaxis = 'time'

    datasets = read_input_data(metadata)
    ancestors = [info['filename'] for info in metadata]

    time_period = cfg['time_period']

    var = datasets[short_name]

    time_unit = time_period[0].upper()
    start_date = np.datetime_as_string(datasets.time.min(), unit=time_unit)
    end_date = np.datetime_as_string(datasets.time.max(), unit=time_unit)

    name = f'{var.long_name}_{time_period}'
    caption = f"{var.long_name} per {time_period} for {start_date}:{end_date}"

    plot_data(
        cfg=cfg,
        datasets=datasets,
        xaxis=xaxis,
        yaxis=short_name,
        xlabel=f'{xaxis.capitalize()} / {time_period}',
        ylabel=f'{var.long_name} / {var.units}',
        caption=caption,
        name=name,
        ancestors=ancestors,
    )

    filename_data = get_diagnostic_filename(name, cfg, extension='nc')
    datasets.to_netcdf(filename_data)
    log_provenance(caption, filename_data, cfg, ancestors)
Exemple #6
0
def visualize_and_save_independence(independence: 'xr.DataArray', cfg: dict,
                                    ancestors: list):
    """Visualize independence."""
    variable = independence.variable_group
    labels = list(independence.model_ensemble.values)

    figure, axes = plt.subplots(figsize=(15, 15),
                                subplot_kw={'aspect': 'equal'})
    chart = sns.heatmap(
        independence,
        linewidths=1,
        cmap="YlGn",
        xticklabels=labels,
        yticklabels=labels,
        cbar_kws={'label': f'Euclidean distance ({independence.units})'},
        ax=axes,
    )
    chart.set_title(f'Distance matrix for {variable}')

    filename_plot = get_plot_filename(f'independence_{variable}', cfg)
    figure.savefig(filename_plot, dpi=300, bbox_inches='tight')
    plt.close(figure)

    filename_data = get_diagnostic_filename(f'independence_{variable}',
                                            cfg,
                                            extension='nc')
    independence.to_netcdf(filename_data)

    caption = f'Euclidean distance matrix for variable {variable}'
    log_provenance(caption, filename_plot, cfg, ancestors)
    log_provenance(caption, filename_data, cfg, ancestors)
Exemple #7
0
def test_get_diagnostic_filename():

    cfg = {
        'work_dir': '/some/path',
    }
    filename = shared.get_diagnostic_filename('test', cfg)
    assert filename == '/some/path/test.nc'
Exemple #8
0
def test_get_diagnostic_filename_ext():

    cfg = {
        'work_dir': '/some/path',
    }
    filename = shared.get_diagnostic_filename('test', cfg, extension='csv')
    assert filename == '/some/path/test.csv'
def main(cfg):
    """Run the diagnostic."""
    cfg = get_default_settings(cfg)
    diag = check_cfg(cfg)
    sns.set(**cfg.get('seaborn_settings', {}))

    # Get input data
    input_data = list(cfg['input_data'].values())
    input_data.extend(io.netcdf_to_metadata(cfg, pattern=cfg.get('pattern')))
    input_data = deepcopy(input_data)
    check_input_data(input_data)
    grouped_data = group_metadata(input_data, 'dataset')

    # Calculate X-axis of emergent constraint
    diag_func = globals()[diag]
    (diag_data, var_attrs, attrs) = diag_func(grouped_data, cfg)
    attrs.update(get_global_attributes(input_data, cfg))

    # Save data
    netcdf_path = get_diagnostic_filename(diag, cfg)
    io.save_scalar_data(diag_data, netcdf_path, var_attrs, attributes=attrs)
    logger.info("Found data:\n%s", pformat(diag_data))

    # Provenance
    provenance_record = ec.get_provenance_record(
        {diag: attrs}, [diag],
        caption=attrs['plot_xlabel'],
        ancestors=[d['filename'] for d in input_data])
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(netcdf_path, provenance_record)
Exemple #10
0
def main(cfg):
    """Process data for use as input to the PCR-GLOBWB hydrological model."""
    input_data = cfg['input_data'].values()
    grouped_input_data = group_metadata(input_data,
                                        'standard_name',
                                        sort='dataset')

    for standard_name in grouped_input_data:
        logger.info("Processing variable %s", standard_name)
        for attributes in grouped_input_data[standard_name]:
            logger.info("Processing dataset %s", attributes['dataset'])
            input_file = attributes['filename']
            cube = iris.load_cube(input_file)

            # Round times to integer number of days
            time_coord = cube.coord('time')
            time_coord.points = da.floor(time_coord.core_points())
            time_coord.bounds = None

            # Set lat from highest to lowest value
            cube = cube[:, ::-1, ...]

            # Save data
            output_file = get_diagnostic_filename(
                Path(input_file).stem + '_pcrglobwb', cfg)
            iris.save(cube, output_file, fill_value=1.e20)

            # Store provenance
            provenance_record = get_provenance_record(input_file)
            with ProvenanceLogger(cfg) as provenance_logger:
                provenance_logger.log(output_file, provenance_record)
Exemple #11
0
def _provenance_map_spei(cfg, name_dict, spei, dataset_name):
    """Set provenance for plot_map_spei."""
    caption = 'Global map of ' + \
              name_dict['drought_char'] + \
              ' [' + name_dict['unit'] + '] ' + \
              'based on ' + cfg['indexname'] + '.'

    if cfg['indexname'].lower == "spei":
        set_refs = ['martin18grl', 'vicente10jclim', ]
    elif cfg['indexname'].lower == "spi":
        set_refs = ['martin18grl', 'mckee93proc', ]
    else:
        set_refs = ['martin18grl', ]

    provenance_record = get_provenance_record([name_dict['input_filenames']],
                                              caption,
                                              ['global'],
                                              set_refs)

    diagnostic_file = get_diagnostic_filename(cfg['indexname'] + '_map' +
                                              name_dict['add_to_filename'] +
                                              '_' +
                                              dataset_name, cfg)

    logger.info("Saving analysis results to %s", diagnostic_file)

    cubesave = cube_to_save_ploted(spei, name_dict)
    iris.save(cubesave, target=diagnostic_file)

    logger.info("Recording provenance of %s:\n%s", diagnostic_file,
                pformat(provenance_record))
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(diagnostic_file, provenance_record)
Exemple #12
0
def _provenance_time_series_spei(cfg, data_dict):
    """Provenance for time series plots."""
    caption = 'Time series of ' + \
              data_dict['var'] + \
              ' at' + data_dict['area'] + '.'

    if cfg['indexname'].lower == "spei":
        set_refs = ['vicente10jclim', ]
    elif cfg['indexname'].lower == "spi":
        set_refs = ['mckee93proc', ]
    else:
        set_refs = ['martin18grl', ]

    provenance_record = get_provenance_record([data_dict['filename']],
                                              caption,
                                              ['reg'], set_refs,
                                              plot_type='times')

    diagnostic_file = get_diagnostic_filename(cfg['indexname'] +
                                              '_time_series_' +
                                              data_dict['area'] +
                                              '_' +
                                              data_dict['dataset_name'], cfg)

    logger.info("Saving analysis results to %s", diagnostic_file)

    cubesave = cube_to_save_ploted_ts(data_dict)
    iris.save(cubesave, target=diagnostic_file)

    logger.info("Recording provenance of %s:\n%s", diagnostic_file,
                pformat(provenance_record))
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(diagnostic_file, provenance_record)
def _create_regression_file(tas_cube,
                            cube,
                            dataset_name,
                            cfg,
                            description=None):
    """Save regression plot as netcdf file for a given dataset."""
    var = cube.var_name
    reg = stats.linregress(tas_cube.data, cube.data)
    filename = f'{var}_regression_{dataset_name}'
    attrs = {
        'dataset': dataset_name,
        'regression_r_value': reg.rvalue,
        'regression_slope': reg.slope,
        'regression_interception': reg.intercept,
        'feedback_parameter': reg.slope,
    }
    attrs.update(cfg.get('output_attributes', {}))
    if description is not None:
        attrs['description'] = description
        filename += f"_{description.replace(' ', '_')}"
    if var in ('rtmt', 'rtnt'):
        attrs['ECS'] = -reg.intercept / (2.0 * reg.slope)
    tas_coord = iris.coords.AuxCoord(
        tas_cube.data,
        **extract_variables(cfg, as_iris=True)['tas'])
    cube = iris.cube.Cube(cube.data,
                          attributes=attrs,
                          aux_coords_and_dims=[(tas_coord, 0)],
                          **extract_variables(cfg, as_iris=True)[var])
    netcdf_path = get_diagnostic_filename(filename, cfg)
    io.iris_save(cube, netcdf_path)
    return netcdf_path
Exemple #14
0
def main(cfg):
    """Process data for use as input to the HYPE hydrological model."""
    input_data = cfg['input_data'].values()
    grouped_input_data = group_metadata(input_data,
                                        'long_name',
                                        sort='dataset')

    for long_name in grouped_input_data:
        logger.info("Processing variable %s", long_name)
        for attributes in grouped_input_data[long_name]:
            logger.info("Processing dataset %s", attributes['dataset'])

            output_file = get_diagnostic_filename(get_output_stem(attributes),
                                                  cfg, 'txt')
            Path(output_file).parent.mkdir(exist_ok=True)

            data, times, ids = get_data_times_and_ids(attributes)

            frame = pandas.DataFrame(data, index=times, columns=ids)

            frame.to_csv(output_file,
                         sep=' ',
                         index_label="DATE",
                         float_format='%.3f')

            # Store provenance
            provenance_record = get_provenance_record(attributes)
            with ProvenanceLogger(cfg) as provenance_logger:
                provenance_logger.log(output_file, provenance_record)
def main(cfg):
    """Rename preprocessed native6 file."""
    fixed_files = cfg['input_data']

    for file, info in fixed_files.items():
        stem = Path(file).stem
        basename = stem.replace('native', 'OBS')

        if info['diagnostic'] == 'daily':
            for mip in ['day', 'Eday', 'CFday']:
                if CMOR_TABLES['CMIP6'].get_variable(mip, info['short_name']):
                    basename = basename.replace('E1hr', mip)
            basename = basename.replace('E1hr', 'day')

        cube = iris.load_cube(file)
        try:
            time = cube.coord('time')
        except iris.exceptions.CoordinateNotFoundError:
            pass
        else:
            if info['diagnostic'] == "monthly":
                start = time.cell(0).point.strftime("%Y%m")
                end = time.cell(-1).point.strftime("%Y%m")
            else:
                start = time.cell(0).point.strftime("%Y%m%d")
                end = time.cell(-1).point.strftime("%Y%m%d")
            basename = f"{basename.rstrip('0123456789-')}{start}-{end}"

        outfile = get_diagnostic_filename(basename, cfg)
        logger.info('Moving %s to %s', file, outfile)
        shutil.move(file, outfile)
def plot_bar_deangelis(cfg, data_var_sum, available_exp, available_vars):
    """Plot linear regression used to calculate ECS."""
    if not cfg[n.WRITE_PLOTS]:
        return

    # Plot data
    fig, axx = plt.subplots()

    set_colors = [
        'cornflowerblue', 'orange', 'silver', 'limegreen', 'rosybrown',
        'orchid'
    ]
    bar_width = 1.0 / float(len(available_vars))

    for iii, iexp in enumerate(available_exp):
        axx.bar(np.arange(len(available_vars)) + bar_width * float(iii),
                data_var_sum[iexp],
                bar_width,
                color=set_colors[iii],
                label=iexp)

    axx.set_xlabel(' ')
    axx.set_ylabel(r'Model mean (W m$^{-2}$)')
    axx.set_title(' ')
    axx.set_xticks(np.arange(len(available_vars)) + bar_width)
    axx.set_xticklabels(available_vars)
    axx.legend(loc=1)

    fig.tight_layout()
    fig.savefig(get_plot_filename('bar_all', cfg), dpi=300)
    plt.close()

    caption = 'Global average multi-model mean comparing different ' + \
              'model experiments and flux variables.'

    provenance_record = get_provenance_record(
        _get_sel_files_var(cfg, available_vars), caption, ['mean'], ['global'])

    diagnostic_file = get_diagnostic_filename('bar_all', cfg)

    logger.info("Saving analysis results to %s", diagnostic_file)

    list_dict = {}
    list_dict["data"] = []
    list_dict["name"] = []
    for iexp in available_exp:
        list_dict["data"].append(data_var_sum[iexp])
        list_dict["name"].append({
            'var_name': iexp + '_all',
            'long_name': 'Fluxes for ' + iexp + ' experiment',
            'units': 'W m-2'
        })

    iris.save(cube_to_save_vars(list_dict), target=diagnostic_file)

    logger.info("Recording provenance of %s:\n%s", diagnostic_file,
                pformat(provenance_record))
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(diagnostic_file, provenance_record)
Exemple #17
0
def plot_cdf(cfg, psi_cube, ecs_cube, obs_cube):
    """Plot cumulative distribution function of ECS."""
    confidence_level = cfg.get('confidence_level', 0.66)
    (ecs_lin, ecs_pdf) = ec.gaussian_pdf(psi_cube.data, ecs_cube.data,
                                         np.mean(obs_cube.data),
                                         np.std(obs_cube.data))
    ecs_cdf = ec.cdf(ecs_lin, ecs_pdf)

    # Provenance
    filename = 'cdf_{}'.format(obs_cube.attributes['dataset'])
    netcdf_path = get_diagnostic_filename(filename, cfg)
    cube = iris.cube.Cube(ecs_cdf,
                          var_name='cdf',
                          long_name='Cumulative distribution function',
                          units='1')
    cube.add_aux_coord(
        iris.coords.AuxCoord(ecs_lin, **ih.convert_to_iris(ECS_ATTRS)), 0)
    io.iris_save(cube, netcdf_path)
    project = _get_project(cfg)
    provenance_record = get_provenance_record(
        "The CDF for ECS. The horizontal dot-dashed lines show the {}% "
        "confidence limits. The orange histograms show the prior "
        "distributions that arise from equal weighting of the {} models in "
        "0.5 K bins.".format(int(confidence_level * 100), project), ['mean'],
        ['other'], _get_ancestor_files(cfg, obs_cube.attributes['dataset']))

    # Plot
    if cfg['write_plots']:
        AXES.plot(ecs_lin,
                  ecs_cdf,
                  color='black',
                  linewidth=2.0,
                  label='Emergent constraint')
        AXES.hist(ecs_cube.data,
                  bins=6,
                  range=(2.0, 5.0),
                  cumulative=True,
                  density=True,
                  color='orange',
                  label='{} models'.format(project))
        AXES.axhline((1.0 - confidence_level) / 2.0,
                     color='black',
                     linestyle='dashdot')
        AXES.axhline((1.0 + confidence_level) / 2.0,
                     color='black',
                     linestyle='dashdot')

        # Plot appearance
        AXES.set_title('CDF of emergent constraint')
        AXES.set_xlabel('ECS / K')
        AXES.set_ylabel('CDF')
        legend = AXES.legend(loc='upper left')

        # Save plot
        provenance_record['plot_file'] = _save_fig(cfg, filename, legend)

    # Write provenance
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(netcdf_path, provenance_record)
Exemple #18
0
def su(grouped_data, cfg):
    """Su et al. (2014) constraint."""
    metric = cfg['metric']
    logger.info("Found metric '%s' for Su et al. (2014) constraint", metric)

    # Extract cubes
    (var_name, reference_datasets) = _get_su_variable(grouped_data)
    cube_dict = _get_su_cube_dict(grouped_data, var_name, reference_datasets)
    diag_data = {}
    ref_cube = cube_dict[reference_datasets]

    # Variable attributes
    var_attrs = {
        'short_name': 'alpha' if metric == 'regression_slope' else 'rho',
        'long_name': f"Error in vertically-resolved tropospheric "
                     f"zonal-average {ref_cube.long_name} between 40°N and "
                     f"45°S expressed as {metric.replace('_', ' ')} between "
                     f"model data and observations",
        'units': '1',
    }
    attrs = {
        'plot_xlabel': f'Model performance in {ref_cube.long_name} [1]',
        'plot_title': 'Su et al. (2014) constraint',
        'provenance_authors': ['schlund_manuel'],
        'provenance_domains': ['trop', 'midlat'],
        'provenance_realms': ['atmos'],
        'provenance_references': ['su14jgr'],
        'provenance_statistics': ['corr'],
        'provenance_themes': ['EC'],
    }

    # Calculate constraint
    for (dataset_name, cube) in cube_dict.items():
        logger.info("Processing dataset '%s'", dataset_name)

        # Plot cube
        if cube.ndim == 2:
            iris.quickplot.contourf(cube)
            filename = f"su_{dataset_name.replace('|', '_')}"
            plot_path = get_plot_filename(filename, cfg)
            plt.savefig(plot_path, **cfg['savefig_kwargs'])
            logger.info("Wrote %s", plot_path)
            plt.close()

            # Provenance
            netcdf_path = get_diagnostic_filename(filename, cfg)
            io.iris_save(cube, netcdf_path)
            ancestors = cube.attributes['ancestors'].split('|')
            provenance_record = ec.get_provenance_record(
                {'su': attrs}, ['su'],
                caption=f'{cube.long_name} for {dataset_name}.',
                plot_type='zonal', plot_file=plot_path, ancestors=ancestors)
            with ProvenanceLogger(cfg) as provenance_logger:
                provenance_logger.log(netcdf_path, provenance_record)

        # Similarity metric
        diag_data[dataset_name] = _similarity_metric(cube, ref_cube, metric)

    return (diag_data, var_attrs, attrs)
def write_data(config, data):
    """Write all the calculated data to output file."""
    cubes = iris.cube.CubeList([data['equatorial_ref']] +
                               data['zonal_mean_errors'] +
                               data['equatorials'] + data['equatorial_errors'])
    path = get_diagnostic_filename('fig-9-14', config)
    iris.save(cubes, path)
    return path
Exemple #20
0
def mapplot(dataarray, cfg, title_pattern, filename_part, ancestors,
            **colormesh_args):
    """Visualize weighted temperature."""
    period = '{start_year}-{end_year}'.format(**read_metadata(cfg)['tas'][0])
    if 'tas_reference' in read_metadata(cfg).keys():
        meta = read_metadata(cfg)['tas_reference']
        period = 'change: {} minus {start_year}-{end_year}'.format(
            period, **meta[0])
    metric = cfg['model_aggregation']
    if isinstance(metric, int):
        metric = f'{metric}perc'
    proj = ccrs.PlateCarree(central_longitude=0)
    figure, axes = plt.subplots(subplot_kw={'projection': proj})

    dataarray = set_antimeridian(dataarray, cfg.get('antimeridian', 'pacific'))
    dataarray = dataarray.dropna('lon', how='all').dropna('lat', how='all')

    dataarray.plot.pcolormesh(
        ax=axes,
        transform=ccrs.PlateCarree(),
        levels=9,
        robust=True,
        extend='both',
        **colormesh_args
        # colorbar size often does not fit nicely
        # https://stackoverflow.com/questions/18195758/set-matplotlib-colorbar-size-to-match-graph
        # cbar_kwargs={'fraction': .021}
    )

    lons = dataarray.lon.values
    lats = dataarray.lat.values
    longitude_formatter = LongitudeFormatter()
    latitude_formatter = LatitudeFormatter()
    default_xticks = np.arange(np.floor(lons.min()), np.ceil(lons.max()), 10)
    default_yticks = np.arange(np.floor(lats.min()), np.ceil(lats.max()), 10)

    axes.coastlines()
    axes.set_xticks(cfg.get('xticks', default_xticks), crs=proj)
    axes.set_yticks(cfg.get('yticks', default_yticks), crs=proj)
    axes.xaxis.set_ticks_position('both')
    axes.yaxis.set_ticks_position('both')
    axes.xaxis.set_major_formatter(longitude_formatter)
    axes.yaxis.set_major_formatter(latitude_formatter)
    axes.set_xlabel('')
    axes.set_ylabel('')

    title = title_pattern.format(metric=metric, period=period)
    axes.set_title(title)

    filename_plot = get_plot_filename(filename_part, cfg)
    figure.savefig(filename_plot, dpi=300, bbox_inches='tight')
    plt.close(figure)

    filename_data = get_diagnostic_filename(filename_part, cfg, extension='nc')
    dataarray.to_netcdf(filename_data)

    log_provenance(title, filename_plot, cfg, ancestors)
    log_provenance(title, filename_data, cfg, ancestors)
def save(output, cfg, provenance):
    """Save the output as csv file."""
    scenarios = pd.DataFrame(output)
    filename = get_diagnostic_filename('scenarios', cfg, extension='csv')
    scenarios.to_csv(filename)
    print(scenarios.round(2))
    print(f"Output written to {filename}")
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(filename, provenance)
Exemple #22
0
def _get_filename(var_meta, cfg, extension="nc"):
    """Return a filename for output data."""
    basename = "_".join([var_meta["project"],
                         var_meta["dataset"],
                         var_meta["exp"],
                         var_meta["ensemble"],
                         var_meta["short_name"]])

    filename = get_diagnostic_filename(basename, cfg, extension=extension)
    return filename
def main(cfg):
    """Process data for use as input to the PCR-GLOBWB hydrological model."""
    for dataset, metadata in group_metadata(cfg['input_data'].values(),
                                            'dataset').items():
        for short_name in "pr", "tas":
            logger.info("Processing variable %s for dataset %s", short_name,
                        dataset)

            # Load preprocessed cubes for normal data and climatology
            var = select_metadata(metadata, variable_group=short_name)[0]
            cube = iris.load_cube(var['filename'])
            var_climatology = select_metadata(
                metadata,
                variable_group=short_name + '_climatology',
            )[0]
            cube_climatology = iris.load_cube(var_climatology['filename'])

            # Create a spin-up year for pcrglob based on the climatology data
            cube = add_spinup_year(cube, cube_climatology)

            # Round times to integer number of days
            time_coord = cube.coord('time')
            time_coord.points = da.floor(time_coord.core_points())
            time_coord.bounds = None
            time_coord.guess_bounds()

            # Set lat from highest to lowest value
            cube = cube[:, ::-1, ...]

            # Workaround for bug in PCRGlob
            # (see https://github.com/UU-Hydro/PCR-GLOBWB_model/pull/13)
            for coord_name in ['latitude', 'longitude']:
                coord = cube.coord(coord_name)
                coord.points = coord.points + 0.001

            # Unit conversion 'kg m-3 day-1' to 'm' precip (divide by density)
            if short_name == "pr":
                cube.units = cube.units / 'kg m-3 day-1'
                cube.data = cube.core_data() / 1000

            # Save data
            basename = '_'.join([
                'pcrglobwb',
                Path(var['filename']).stem,
                cfg['basin'],
            ])
            output_file = get_diagnostic_filename(basename, cfg)
            iris.save(cube, output_file, fill_value=1.e20)

            # Store provenance
            provenance_record = get_provenance_record(
                [var['filename'], var_climatology['filename']])
            with ProvenanceLogger(cfg) as provenance_logger:
                provenance_logger.log(output_file, provenance_record)
Exemple #24
0
def _plot(cfg, cube, dataset_name, tcr):
    """Create scatterplot of temperature anomaly vs. time."""
    if not cfg.get('plot', True):
        return (None, None)
    logger.debug("Plotting temperature anomaly vs. time for '%s'",
                 dataset_name)
    (_, axes) = plt.subplots()

    # Plot data
    x_data = np.arange(cube.shape[0])
    y_data = cube.data
    axes.scatter(x_data, y_data, color='b', marker='o')

    # Plot lines
    line_kwargs = {'color': 'k', 'linewidth': 1.0, 'linestyle': '--'}
    axes.axhline(tcr, **line_kwargs)
    axes.axvline(START_YEAR_IDX, **line_kwargs)
    axes.axvline(END_YEAR_IDX, **line_kwargs)

    # Appearance
    units_str = (cube.units.symbol
                 if cube.units.origin is None else cube.units.origin)
    axes.set_title(dataset_name)
    axes.set_xlabel('Years after experiment start')
    axes.set_ylabel(f'Temperature anomaly / {units_str}')
    axes.set_ylim([x_data[0] - 1, x_data[-1] + 1])
    axes.set_ylim([-1.0, 7.0])
    axes.text(0.0, tcr + 0.1, 'TCR = {:.1f} {}'.format(tcr, units_str))

    # Save cube
    netcdf_path = get_diagnostic_filename(dataset_name, cfg)
    io.iris_save(cube, netcdf_path)

    # Save plot
    plot_path = get_plot_filename(dataset_name, cfg)
    plt.savefig(plot_path, **cfg['savefig_kwargs'])
    logger.info("Wrote %s", plot_path)
    plt.close()

    # Provenance
    provenance_record = get_provenance_record(
        f"Time series of the global mean surface air temperature anomaly "
        f"(relative to the linear fit of the pre-industrial control run) of "
        f"{dataset_name} for the 1% CO2 increase per year experiment. The "
        f"horizontal dashed line indicates the transient climate response "
        f"(TCR) defined as the 20 year average temperature anomaly centered "
        f"at the time of CO2 doubling (vertical dashed lines).")
    provenance_record.update({
        'plot_file': plot_path,
        'plot_types': ['times'],
    })

    return (netcdf_path, provenance_record)
Exemple #25
0
def main(diag_config):
    """
    Diagnostic function to compare the zonal turnover time.

    Argument:
    --------
        diag_config - nested dictionary of metadata
    """
    model_data_dict = group_metadata(diag_config['input_data'].values(),
                                     'dataset')

    fig_config = _get_fig_config(diag_config)
    zonal_tau_mod = {}
    for model_name, model_dataset in model_data_dict.items():
        zonal_tau_mod[model_name] = {}
        ctotal = _load_variable(model_dataset, 'ctotal')
        gpp = _load_variable(model_dataset, 'gpp')
        zonal_tau_mod[model_name] = _calc_zonal_tau(gpp, ctotal, fig_config)

    zonal_tau_obs = _get_obs_data_zonal(diag_config)

    obs_var = diag_config.get('obs_variable')[0]
    tau_obs = zonal_tau_obs[obs_var]
    base_name = '{title}_{source_label}_{grid_label}z'.format(
        title=tau_obs.long_name,
        source_label=diag_config['obs_info']['source_label'],
        grid_label=diag_config['obs_info']['grid_label'])

    provenance_record = _get_provenance_record(
        "Comparison of latitudinal (zonal) variations of observation-based and"
        " modelled ecosystem carbon turnover time. The zonal turnover time is"
        " calculated as the ratio of zonal `ctotal` and `gpp`. Reproduces "
        " figure 2a and 2b in Carvalhais et al. (2014).", ['mean', 'perc'],
        ['zonal'], _get_ancestor_files(diag_config, obs_var))

    if diag_config['write_netcdf']:
        model_cubes = [
            c for c in zonal_tau_mod.values() if isinstance(c, iris.cube.Cube)
        ]
        obs_cubes = [
            c for c in zonal_tau_obs.values() if isinstance(c, iris.cube.Cube)
        ]
        netcdf_path = get_diagnostic_filename(base_name, diag_config)
        save_cubes = iris.cube.CubeList(model_cubes + obs_cubes)
        iris.save(save_cubes, netcdf_path)
        with ProvenanceLogger(diag_config) as provenance_logger:
            provenance_logger.log(netcdf_path, provenance_record)

    if diag_config['write_plots']:
        plot_path = get_plot_filename(base_name, diag_config)
        _plot_zonal_tau(plot_path, zonal_tau_mod, zonal_tau_obs, diag_config)
        with ProvenanceLogger(diag_config) as provenance_logger:
            provenance_logger.log(plot_path, provenance_record)
def main(cfg, input_data=None, description=None):
    """Run the diagnostic."""
    cfg = deepcopy(cfg)
    cfg.setdefault('dtype', 'float64')
    cfg.setdefault('mlr_model_name', 'MMM')
    cfg.setdefault('weighted_samples', {
        'area_weighted': True,
        'time_weighted': True
    })

    # Get data
    grouped_data = get_grouped_data(cfg, input_data=input_data)
    description = '' if description is None else f'_for_{description}'

    # Loop over all tags
    for (tag, datasets) in grouped_data.items():
        logger.info("Processing label '%s'", tag)

        # Get label datasets and reference dataset if possible
        label_datasets = select_metadata(datasets, var_type='label')
        (ref_dataset, pred_name) = get_reference_dataset(datasets, tag)
        if pred_name is None:
            pred_name = cfg.get('prediction_name')

        # Calculate multi-model mean
        logger.info("Calculating multi-model mean")
        mmm_cube = get_mmm_cube(cfg, label_datasets)
        add_general_attributes(mmm_cube, tag=tag, prediction_name=pred_name)
        mmm_path = get_diagnostic_filename(
            f"mmm_{tag}_prediction{description}", cfg)
        io.iris_save(mmm_cube, mmm_path)
        write_provenance(
            cfg, mmm_path, [d['filename'] for d in label_datasets],
            f"Predicted {mmm_cube.long_name} of MMM model "
            f"{cfg['mlr_model_name']}.")

        # Estimate prediction error using cross-validation
        if 'mmm_error_type' in cfg:
            save_error(cfg,
                       label_datasets,
                       mmm_path,
                       tag=tag,
                       prediction_name=pred_name)

        # Calculate residuals
        if ref_dataset is not None:
            save_residuals(cfg,
                           mmm_cube,
                           ref_dataset,
                           label_datasets,
                           tag=tag,
                           prediction_name=pred_name)
def main(cfg):
    # just load the pre-processed anomlies, and plot them

    # first read them in, pop into dictionaries keyed by model name
    # group by project first (CMIP5, CMIP6, UKCP)
    projects = group_metadata(cfg["input_data"].values(), "project")

    results = {}
    for p in projects:
        results[p] = {}
        if p == "UKCP18":
            # loop over ensembles
            models = group_metadata(projects[p], "ensemble")
        else:
            # loop over datasets
            models = group_metadata(projects[p], "dataset")

        for m in models:
            if len(models[m]) > 1:
                raise ValueError("Too many bits of data")
            fname = models[m][0]["filename"]
            data = iris.load_cube(fname)
            results[p][m] = data.data.item()

    # plot and save the results
    for p in projects:
        # use pandas to create data for a csv file
        results_df = pd.DataFrame.from_dict(results[p], orient='index')
        # save data as csv
        results_df.to_csv(get_diagnostic_filename(f"{p}_global_tas_anom", cfg,
                                                  "csv"),
                          header=False)

        # get list of models
        models = results[p].keys()
        # and corresponding values
        vals = [results[p][m] for m in models]

        fig, ax = plt.subplots(figsize=(12.8, 9.6))

        # plot bar chart
        y_pos = np.arange(len(models))
        colors = np.empty(len(models, ), dtype=str)
        colors[::2] = 'r'
        colors[1::2] = 'b'
        ax.barh(y_pos, vals, color=colors)
        ax.set_yticks(y_pos, labels=models)

        plot_fname = get_plot_filename(f'{p}_global_anomaly', cfg)
        fig.savefig(plot_fname)
        plt.tight_layout()
        plt.close(fig)
def write_data(cfg, all_data, metadata):
    """Write netcdf file."""
    new_data = {}
    for (label, xy_data) in all_data.items():
        for (idx, dataset_name) in enumerate(xy_data[0]):
            key = f'{label}-{dataset_name}'
            value = xy_data[1][idx]
            new_data[key] = value
    netcdf_path = get_diagnostic_filename(metadata['var_name'], cfg)
    var_attrs = metadata.copy()
    var_attrs['short_name'] = var_attrs.pop('var_name')
    io.save_scalar_data(new_data, netcdf_path, var_attrs)
    return netcdf_path
def get_provenance_record(cfg, basename, caption, extension, ancestor_files):
    """Create a provenance record describing the diagnostic data and plot."""
    record = {
        'caption': caption,
        'statistics': ['other'],
        'domains': ['global'],
        'authors': ['berg_peter'],
        'references': ['acknow_project'],
        'ancestors': ancestor_files,
    }
    diagnostic_file = get_diagnostic_filename(basename, cfg, extension)
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(diagnostic_file, record)
def plot_temperature_anomaly(cfg, tas_cubes, lambda_cube, obs_name):
    """Plot temperature anomaly versus time."""
    for cube in tas_cubes.values():
        cube.data -= np.mean(
            cube.extract(
                iris.Constraint(year=lambda cell: 1961 <= cell <= 1990)).data)

    # Save netcdf file and provenance
    filename = 'temperature_anomaly_{}'.format(obs_name)
    netcdf_path = get_diagnostic_filename(filename, cfg)
    io.save_1d_data(tas_cubes, netcdf_path, 'year', TASA_ATTRS)
    project = _get_project(cfg)
    provenance_record = get_provenance_record(
        "Simulated change in global temperature from {} models (coloured "
        "lines), compared to the global temperature anomaly from the {} "
        "dataset (black dots). The anomalies are relative to a baseline "
        "period of 1961-1990.".format(project, obs_name), ['anomaly'],
        ['times'], _get_ancestor_files(cfg, obs_name))

    # Plot
    if cfg['write_plots']:
        models = lambda_cube.coord('dataset').points

        # Plot lines
        for model in models:
            cube = tas_cubes[model]
            AXES.plot(cube.coord('year').points,
                      cube.data,
                      color=_get_model_color(model, lambda_cube))
        obs_style = plot.get_dataset_style('OBS', 'cox18nature')
        obs_cube = tas_cubes[obs_name]
        AXES.plot(obs_cube.coord('year').points,
                  obs_cube.data,
                  linestyle='none',
                  marker='o',
                  markeredgecolor=obs_style['color'],
                  markerfacecolor=obs_style['color'])

        # Plot appearance
        AXES.set_title('Simulation of global warming record')
        AXES.set_xlabel('Year')
        AXES.set_ylabel('Temperature anomaly / K')
        legend = _get_line_plot_legend()

        # Save plot
        provenance_record['plot_file'] = _save_fig(cfg, filename, legend)

    # Write provenance
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(netcdf_path, provenance_record)