def get_grouped_data(cfg, input_data=None):
    """Get input files."""
    if input_data is None:
        logger.debug("Loading input data from 'cfg' argument")
        input_data = mlr.get_input_data(cfg,
                                        pattern=cfg.get('pattern'),
                                        ignore=cfg.get('ignore'))
    else:
        logger.debug("Loading input data from 'input_data' argument")
        if not mlr.datasets_have_mlr_attributes(input_data, log_level='error'):
            raise ValueError("At least one input dataset does not have valid "
                             "MLR attributes")
    if not input_data:
        raise ValueError("No input data found")
    paths = [d['filename'] for d in input_data]
    logger.debug("Found files")
    logger.debug(pformat(paths))

    # Extract necessary data
    label_data = select_metadata(input_data, var_type='label')
    if not label_data:
        raise ValueError("No data with var_type 'label' found")
    prediction_reference_data = select_metadata(
        input_data, var_type='prediction_reference')
    extracted_data = label_data + prediction_reference_data
    logger.debug("Found 'label' data")
    logger.debug(pformat([d['filename'] for d in label_data]))
    logger.debug("Found 'prediction_reference' data")
    logger.debug(pformat([d['filename'] for d in prediction_reference_data]))

    # Return grouped data
    return group_metadata(extracted_data, 'tag')
def wfluxes(model, wdir, input_data):
    """Compute auxiliary fields and perform time averaging of existing fields.

    Arguments:
    - model: the model name;
    - wdir: the working directory where the outputs are stored;
    - filelist: a list of file names containing the input fields;

    Author:
    Valerio Lembo, University of Hamburg (2019).
    """
    cdo = Cdo()
    hfls_file = e.select_metadata(input_data, short_name='hfls',
                                  dataset=model)[0]['filename']
    pr_file = e.select_metadata(input_data, short_name='pr',
                                dataset=model)[0]['filename']
    prsn_file = e.select_metadata(input_data, short_name='prsn',
                                  dataset=model)[0]['filename']
    #
    #    hfls_file = filelist[0]
    #    pr_file = filelist[3]
    #    prsn_file = filelist[4]
    aux_file = wdir + '/aux.nc'
    evspsbl_file = (wdir + '/{}_evspsbl.nc'.format(model))
    cdo.divc(str(L_C), input="{}".format(hfls_file), output=evspsbl_file)
    # Rainfall precipitation
    prr_file = wdir + '/{}_prr.nc'.format(model)
    cdo.sub(input="{} {}".format(pr_file, prsn_file), output=aux_file)
    cdo.chname('pr,prr', input=aux_file, output=prr_file)
    return evspsbl_file, prr_file
def make_diag_tci(cfg,
                  dataset,
                  input_data,
                  sm_name="mrlsl",
                  hf_name="hfls",
                  tci_name="tci"):
    """Shim routine between ESMValTool and the generic make_tci()."""

    sm_meta = select_metadata(input_data, short_name=sm_name)[0]
    hf_meta = select_metadata(input_data, short_name=hf_name)[0]

    sm_meta[
        "standard_name"] = "depth_integrated_moisture_content_of_soil_layer"  # noqa

    tci_meta = sm_meta.copy()
    tci_meta["short_name"] = tci_name
    tci_meta["standard_name"] = "terrestrial_coupling_index"

    filename_tci = _get_filename(tci_meta, cfg, extension="nc")

    alpha = 0.05  # pvalue rejection threshold.

    tci = make_tci(sm_meta["filename"],
                   hf_meta["filename"],
                   filename_tci,
                   standard_name_sm=sm_meta["standard_name"],
                   standard_name_hf=hf_meta["standard_name"],
                   standard_name_tci=tci_meta["standard_name"],
                   alpha=alpha)

    return tci
def get_anomalies(ds_list, relative=False):
    # determine historic and future periods
    start_years = list(group_metadata(ds_list, "start_year"))
    base_clim_start = min(start_years)
    fut_clim_start = max(start_years)

    # construct baseline
    base_metadata = select_metadata(ds_list, start_year=base_clim_start)
    base_file = base_metadata[0]["filename"]
    base_cube = iris.load_cube(base_file)

    # get future
    fut_metadata = select_metadata(ds_list, start_year=fut_clim_start)
    fut_file = fut_metadata[0]["filename"]
    fut_cube = iris.load_cube(fut_file)

    if relative:
        diff = fut_cube - base_cube
        anomaly = (diff / base_cube) * 100
        anomaly.units = "%"
    else:
        anomaly = fut_cube - base_cube

    # ensure longitude coord is on -180 to 180 range
    try:
        anomaly = anomaly.intersection(longitude=(-180.0, 180.0))
    except ValueError:
        # remove and re add bounds to attempt to fix
        anomaly.coord('longitude').bounds = None
        anomaly.coord('longitude').guess_bounds()
        anomaly = anomaly.intersection(longitude=(-180.0, 180.0))

    return anomaly
def get_anomalies(ds_list, base_clim_start, fut_clim_start, relative=False):
    # construct baseline
    base_metadata = select_metadata(ds_list, start_year=base_clim_start)
    if base_metadata == []:
        logging.warning(
            f"Base climatology (start {base_clim_start}) not found")
        return None
    base_file = base_metadata[0]["filename"]
    base_cube = iris.load_cube(base_file)

    # get future
    fut_metadata = select_metadata(ds_list, start_year=fut_clim_start)
    if fut_metadata == []:
        logging.warning(
            f"Future climatology (start {fut_clim_start}) not found")
        return None
    fut_file = fut_metadata[0]["filename"]
    fut_cube = iris.load_cube(fut_file)

    if relative:
        diff = fut_cube - base_cube
        anomaly = (diff / base_cube) * 100
        anomaly.units = "%"
    else:
        anomaly = fut_cube - base_cube

    return anomaly
Exemple #6
0
def _get_anomaly_cubes(cfg):
    """Get all anomaly cubes."""
    logger.info("Calculating anomalies")
    cubes = {}
    ancestors = {}
    input_data = cfg['input_data'].values()
    onepct_data = select_metadata(input_data, short_name='tas', exp='1pctCO2')

    # Process data
    for dataset in onepct_data:
        dataset_name = dataset['dataset']
        pi_data = select_metadata(input_data,
                                  short_name='tas',
                                  exp='piControl',
                                  dataset=dataset_name)
        if not pi_data:
            raise ValueError("No 'piControl' data available for dataset "
                             "'dataset_name'")
        onepct_cube = iris.load_cube(dataset['filename'])
        pi_cube = iris.load_cube(pi_data[0]['filename'])
        anomaly_cube = _get_anomaly_cube(onepct_cube, pi_cube)
        cubes[dataset_name] = anomaly_cube
        ancestors[dataset_name] = [dataset['filename'], pi_data[0]['filename']]

    # Calculate multi-model mean if desired
    if cfg.get('calculate_mmm', True):
        (mmm_cube, mmm_ancestors) = _get_mmm_anomaly(cubes, ancestors, cfg)
        cubes['MultiModelMean'] = mmm_cube
        ancestors['MultiModelMean'] = mmm_ancestors

    return (cubes, ancestors)
Exemple #7
0
def preprocess_data(cfg):
    """Extract input data."""
    input_data = deepcopy(list(cfg['input_data'].values()))
    if not input_data:
        return ([], [])

    # Use 'rtmt' instead of 'rtmt' if necessary
    for dataset in input_data:
        if dataset['short_name'] == 'rtmt':
            RTMT_DATASETS.add(dataset['dataset'])
            dataset['short_name'] = 'rtnt'
    if RTMT_DATASETS:
        logger.info("Using 'rtmt' instead of 'rtnt' for datasets '%s'",
                    RTMT_DATASETS)

    # Calculate anomalies for every dataset
    input_data = _get_anomaly_data(input_data)

    # Calculate multi-model mean
    if cfg.get('calculate_mmm', True):
        input_data = _get_multi_model_mean(input_data)

    # Group data in terms of dataset
    tas_data = select_metadata(input_data, short_name='tas')
    rtnt_data = select_metadata(input_data, short_name='rtnt')
    tas_data = group_metadata(tas_data, 'dataset')
    rtnt_data = group_metadata(rtnt_data, 'dataset')
    return (tas_data, rtnt_data)
def make_plot(metadata, scenarios, cfg, provenance):
    """Make figure 3, left graph.

    Multimodel values as line, reference value in black square,
    steering variables in dark dots.
    """
    fig, axes = plt.subplots()
    for member in select_metadata(metadata, variable_group='tas_cmip'):
        filename = member['filename']
        dataset = xr.open_dataset(filename)
        if 'MultiModel' not in filename:
            axes.plot(dataset.time.dt.year,
                      dataset.tas.values,
                      c='grey',
                      alpha=0.3,
                      lw=.5,
                      label='CMIP members')
        else:
            # Only display stats for the future period:
            dataset = dataset.sel(time=slice('2010', None, None))
            axes.plot(dataset.time.dt.year,
                      dataset.tas.values,
                      color='k',
                      linewidth=2,
                      label='CMIP ' + Path(filename).stem.split('_')[0][10:])

    for member in select_metadata(metadata, variable_group='tas_target'):
        filename = member['filename']
        dataset = xr.open_dataset(filename)
        if 'MultiModel' not in filename:
            axes.plot(dataset.time.dt.year,
                      dataset.tas.values,
                      color='blue',
                      linewidth=1,
                      label=member['dataset'])

    # Add the scenario's with dots at the cmip dt and bars for the periods
    for i, scenario in enumerate(scenarios):
        axes.scatter(scenario['year'],
                     scenario['cmip_dt'],
                     s=50,
                     zorder=10,
                     color='r',
                     label=r"Scenarios' steering $\Delta T_{CMIP}$")
        _timeline(axes, i, scenario['period_bounds'])

    handles, labels = plt.gca().get_legend_handles_labels()
    by_label = dict(zip(labels, handles))  # dict removes dupes
    axes.legend(by_label.values(), by_label.keys())
    axes.set_xlabel('Year')
    axes.set_ylabel(r'Global mean $\Delta T$ (K) w.r.t. reference period')

    # Save figure
    filename = get_plot_filename('global_matching', cfg)
    fig.savefig(filename, bbox_inches='tight', dpi=300)
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(filename, provenance)
def reform_data_iris_deangelis3b4(input_data):
    """Extract data from IRIS cubes and average or reformat them."""
    # Model data for 'tas', 'rsnstcs'
    cubes = {}
    for my_short_name in ['tas', 'rsnstcs']:
        # my_data: List of dictionaries
        my_data = select_metadata(input_data, short_name=my_short_name)
        # subdata: dictionary
        for subdata in my_data:
            cube = iris.load(subdata['filename'])[0]
            cat.add_year(cube, 'time', name='year')
            cube = cube.aggregated_by('year', iris.analysis.MEAN)
            experiment = subdata['exp']
            if experiment == 'abrupt-4xCO2':
                experiment = 'abrupt4xCO2'
            dataset = subdata['dataset']
            cubetuple = (dataset, my_short_name, experiment)
            if experiment == 'piControl':
                # DeAngelis use a 21 month running mean on piControl but the
                # full extend of 150 years abrupt4xCO2. I could not find out,
                # how they tread the edges, currently I just skip the mean for
                # the edges. This is not exacly the same as done in the paper,
                # small differences remain in extended data Fig 1,
                # but closer than other methods I
                # tried, e.g. skipping the edges.
                # For most data sets it would be also possible to
                # extend the piControl for 20 years, but then it would
                # not be centered means of piControl for each year of
                # abrupt4xCO2 any more.
                # cube_new = cube.rolling_window('time',iris.analysis.MEAN, 21)
                # endm10 = len(cube.coord('time').points) - 10
                # cube.data[10:endm10] = cube_new.data
                cube.data = scisi.savgol_filter(cube.data, 21, 1, axis=0)
            cubes[cubetuple] = cube.data

    # Model data and observations for 'rsnstcsnorm', and 'prw'
    for my_short_name in ['rsnstcsnorm', 'prw']:
        # my_data: List of dictionaries
        my_data = select_metadata(input_data, short_name=my_short_name)
        # subdata: dictionary
        for subdata in my_data:
            if 'exp' in subdata.keys():
                experiment = subdata['exp']
            else:
                experiment = 'nomodel'
            dataset = subdata['dataset']
            cubetuple = (dataset, my_short_name, experiment)
            if experiment in ['piControl', 'nomodel']:
                cube = iris.load(subdata['filename'])[0]
                total_len = len(cube.coord('time').points) * \
                    len(cube.coord('latitude').points) * \
                    len(cube.coord('longitude').points)
                data_new = np.reshape(cube.data, total_len)
                cubes[cubetuple] = data_new

    return cubes
Exemple #10
0
def main(cfg):
    """Run the diagnostic."""
    input_data = (
        select_metadata(cfg['input_data'].values(), short_name='tas') +
        select_metadata(cfg['input_data'].values(), short_name='tasa'))
    if not input_data:
        raise ValueError("This diagnostics needs 'tas' or 'tasa' variable")

    # Calculate psi for every dataset
    psis = {}
    psi_attrs = {
        'short_name': 'psi',
        'long_name': 'Temperature variability metric',
        'units': 'K',
    }
    grouped_data = group_metadata(input_data, 'dataset')
    for (dataset, [data]) in grouped_data.items():
        logger.info("Processing %s", dataset)
        cube = iris.load_cube(data['filename'])
        iris.coord_categorisation.add_year(cube, 'time')
        cube = cube.aggregated_by('year', iris.analysis.MEAN)
        psi_cube = calculate_psi(cube, cfg)
        data.update(psi_attrs)
        data.pop('standard_name', '')

        # Provenance
        caption = ("Temporal evolution of temperature variability metric psi "
                   "between {start_year} and {end_year} for {dataset}.".format(
                       **data))
        provenance_record = get_provenance_record(caption, [data['filename']])
        out_path = get_diagnostic_filename('psi_' + dataset, cfg)
        with ProvenanceLogger(cfg) as provenance_logger:
            provenance_logger.log(out_path, provenance_record)

        # Save psi for every dataset
        data['filename'] = out_path
        io.metadata_to_netcdf(psi_cube, data)

        # Save averaged psi
        psis[dataset] = np.mean(psi_cube.data)

    # Save averaged psis for every dataset in one file
    out_path = get_diagnostic_filename('psi', cfg)
    io.save_scalar_data(psis,
                        out_path,
                        psi_attrs,
                        attributes=psi_cube.attributes)

    # Provenance
    caption = "{long_name} for mutliple climate models.".format(**psi_attrs)
    ancestor_files = [d['filename'] for d in input_data]
    provenance_record = get_provenance_record(caption, ancestor_files)
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(out_path, provenance_record)
def main(cfg):
    """Process data for use as input to the PCR-GLOBWB hydrological model."""
    for dataset, metadata in group_metadata(cfg['input_data'].values(),
                                            'dataset').items():
        for short_name in "pr", "tas":
            logger.info("Processing variable %s for dataset %s", short_name,
                        dataset)

            # Load preprocessed cubes for normal data and climatology
            var = select_metadata(metadata, variable_group=short_name)[0]
            cube = iris.load_cube(var['filename'])
            var_climatology = select_metadata(
                metadata,
                variable_group=short_name + '_climatology',
            )[0]
            cube_climatology = iris.load_cube(var_climatology['filename'])

            # Create a spin-up year for pcrglob based on the climatology data
            cube = add_spinup_year(cube, cube_climatology)

            # Round times to integer number of days
            time_coord = cube.coord('time')
            time_coord.points = da.floor(time_coord.core_points())
            time_coord.bounds = None
            time_coord.guess_bounds()

            # Set lat from highest to lowest value
            cube = cube[:, ::-1, ...]

            # Workaround for bug in PCRGlob
            # (see https://github.com/UU-Hydro/PCR-GLOBWB_model/pull/13)
            for coord_name in ['latitude', 'longitude']:
                coord = cube.coord(coord_name)
                coord.points = coord.points + 0.001

            # Unit conversion 'kg m-3 day-1' to 'm' precip (divide by density)
            if short_name == "pr":
                cube.units = cube.units / 'kg m-3 day-1'
                cube.data = cube.core_data() / 1000

            # Save data
            basename = '_'.join([
                'pcrglobwb',
                Path(var['filename']).stem,
                cfg['basin'],
            ])
            output_file = get_diagnostic_filename(basename, cfg)
            iris.save(cube, output_file, fill_value=1.e20)

            # Store provenance
            provenance_record = get_provenance_record(
                [var['filename'], var_climatology['filename']])
            with ProvenanceLogger(cfg) as provenance_logger:
                provenance_logger.log(output_file, provenance_record)
Exemple #12
0
def main(cfg):
    """Run the diagnostic."""
    input_data = (
        select_metadata(cfg['input_data'].values(), short_name='tas') +
        select_metadata(cfg['input_data'].values(), short_name='tasa'))
    if not input_data:
        raise ValueError("This diagnostics needs 'tas' or 'tasa' variable")

    # Get tas data
    tas_cubes = {}
    tas_obs = []
    for (dataset, [data]) in group_metadata(input_data, 'dataset').items():
        cube = iris.load_cube(data['filename'])
        iris.coord_categorisation.add_year(cube, 'time')
        cube = cube.aggregated_by('year', iris.analysis.MEAN)
        tas_cubes[dataset] = cube
        if data['project'] == 'OBS':
            tas_obs.append(dataset)

    # Get time-dependent psi data
    psi_cubes = {}
    psi_obs = []
    for (dataset, [data]) in group_metadata(
            io.netcdf_to_metadata(cfg, pattern='psi_*.nc'), 'dataset').items():
        cube = iris.load_cube(data['filename'])
        cube = cube.aggregated_by('year', iris.analysis.MEAN)
        psi_cubes[dataset] = cube
        if data['project'] == 'OBS':
            psi_obs.append(dataset)

    # Get psi, ECS and psi for models
    (psi_cube, ecs_cube, lambda_cube) = get_external_cubes(cfg)

    # Plots
    for obs_name in tas_obs:
        logger.info("Observation for tas: %s", obs_name)
        plot_temperature_anomaly(cfg, tas_cubes, lambda_cube, obs_name)
    for obs_name in psi_obs:
        logger.info("Observation for psi: %s", obs_name)
        plot_psi(cfg, psi_cubes, lambda_cube, obs_name)
        obs_cube = psi_cubes[obs_name]
        plot_emergent_relationship(cfg, psi_cube, ecs_cube, lambda_cube,
                                   obs_cube)
        plot_pdf(cfg, psi_cube, ecs_cube, obs_cube)
        plot_cdf(cfg, psi_cube, ecs_cube, obs_cube)

        # Print ECS range
        ecs_range = get_ecs_range(cfg, psi_cube, ecs_cube, obs_cube)
        logger.info("Observational constraint: Ψ = (%.2f ± %.2f) K",
                    np.mean(obs_cube.data), np.std(obs_cube.data))
        logger.info(
            "Constrained ECS range: (%.2f - %.2f) K with best "
            "estimate %.2f K", ecs_range[1], ecs_range[2], ecs_range[0])
Exemple #13
0
def get_control_exper_obs(short_name, input_data, cfg, cmip_type):
    """
    Get control, exper and obs datasets.
    This function is used when running recipes that need
    a clear distinction between a control dataset, an experiment
    dataset and have optional obs (OBS, obs4mips etc) datasets;
    such recipes include recipe_validation, and all the autoassess
    ones;
    short_name: variable short name
    input_data: dict containing the input data info
    cfg: config file as used in this module
    """
    # select data per short name and CMIP type
    dataset_selection = select_metadata(input_data,
                                        short_name=short_name,
                                        project=cmip_type)

    # get the obs datasets if specified in recipe
    if 'observational_datasets' in cfg:
        obs_selection = [
            select_metadata(input_data,
                            short_name=short_name,
                            dataset=obs_dataset)[0]
            for obs_dataset in cfg['observational_datasets']
        ]
    else:
        obs_selection = []

    # print out OBS's
    if obs_selection:
        logger.info("Observations dataset(s) %s",
                    [obs['dataset'] for obs in obs_selection])

    # determine CONTROL and EXPERIMENT datasets

    # corner case: they could be the same dataset name
    if cfg['control_model'] == cfg['exper_model']:
        logger.info("Identical Control/Experiment dataset names: %s",
                    dataset_selection[0]['dataset'])
        control, experiment = _disentagle_iden_datasets(dataset_selection)
        return control, experiment, obs_selection

    # if they're not the same dataset, fire away
    for model in dataset_selection:
        if model['dataset'] == cfg['control_model']:
            logger.info("Control dataset %s", model['dataset'])
            control = model
        elif model['dataset'] == cfg['exper_model']:
            logger.info("Experiment dataset %s", model['dataset'])
            experiment = model

    return control, experiment, obs_selection
Exemple #14
0
def main(cfg):
    """Calculate, visualize and save the bias and change for each model."""
    metadata = cfg['input_data'].values()
    grouped_metadata = group_metadata(metadata, 'variable_group')

    biases = {}
    changes = {}
    ancestors = []
    for group, metadata in grouped_metadata.items():

        model_metadata = select_metadata(metadata, tag='model')
        model_data, model_ancestors = load_data(model_metadata)
        ancestors.extend(model_ancestors)

        variable = model_data.name

        if group.endswith('bias'):
            obs_metadata = select_metadata(metadata, tag='observations')
            obs_data, obs_ancestors = load_data(obs_metadata)
            ancestors.extend(obs_ancestors)

            bias = calculate_bias(model_data, obs_data)
            biases[variable] = bias

        elif group.endswith('change'):
            changes[variable] = model_data

        else:
            logger.warning(
                "Got input for variable group %s"
                " but I don't know what to do with it.", group)

    # Combine all variables
    bias = xr.Dataset(biases)
    change = xr.Dataset(changes)
    combined = xr.concat([bias, change], dim='metric')
    combined['metric'] = [
        'Bias (RMSD of all gridpoints)', 'Mean change (Future - Reference)'
    ]

    dataframe = combined.rename(
        tas='Temperature (K)',
        pr='Precipitation (kg/m2/s)',
    ).to_dataframe()
    dataframe.columns.name = 'variable'
    tidy_df = dataframe.stack('variable').unstack('metric')

    plot_scatter(tidy_df, ancestors, cfg)
    plot_table(tidy_df, ancestors, cfg)
    plot_htmltable(tidy_df, ancestors, cfg)

    return
Exemple #15
0
def _get_ancestor_files(cfg, obs_name, projects=None):
    """Get ancestor files for provenance."""
    if projects is None:
        projects = _get_project(cfg)
    if isinstance(projects, str):
        projects = [projects]
    datasets = []
    for project in projects:
        datasets.extend(
            select_metadata(cfg['input_data'].values(), project=project))
    datasets.extend(
        select_metadata(cfg['input_data'].values(), dataset=obs_name))
    return [d['filename'] for d in datasets]
def make_plots(cfg,
               dataset,
               data,
               data_tci,
               varname_sm="mrlsl",
               varname_hf="hfls",
               varname_tci="tci"):
    """Shim routine between ESMValTool and the generic plot_tci()."""

    meta = select_metadata(data, short_name=varname_sm)[0]

    filename_maps = _get_plot_filename(meta, cfg, varname_tci)

    model_desc = "{:s}, {:s}, {:s}, {:s}, {:d}-{:d}".format(
        meta["project"],
        meta["exp"],
        meta["dataset"],
        meta["ensemble"],
        meta["start_year"],
        meta["end_year"],
    )

    title = ("Terrestrial Coupling Index "
             "({units}) {varname_sm} - {varname_hf}\n{model_desc}".format(
                 units=str(data_tci.units),
                 varname_sm=varname_sm,
                 varname_hf=varname_hf,
                 model_desc=model_desc,
             ))

    plot_tci(data_tci, filename_maps, title=title)
    return
def _get_datasets_for_ec(input_data):
    """Check input data."""
    features = select_metadata(input_data, var_type='feature')
    labels = select_metadata(input_data, var_type='label')
    pred_input = select_metadata(input_data, var_type='prediction_input')
    pred_input_err = select_metadata(input_data,
                                     var_type='prediction_input_error')
    data_to_check = {
        'feature': features,
        'label': labels,
        'prediction_input': pred_input,
        'prediction_input_error': pred_input_err,
    }
    for (name, data) in data_to_check.items():
        _check_datasets(data, name)
    return (features, labels, pred_input, pred_input_err)
def init_mkthe_te(model, wdir, input_data):
    """Compute auxiliary fields or perform time averaging of existing fields.

    Arguments:
    - model: the model name;
    - wdir: the working directory where the outputs are stored;
    - filelist: a list of file names containing the input fields;

    Author:
    Valerio Lembo, University of Hamburg (2019).
    """
    cdo = Cdo()
    rlut_file = e.select_metadata(input_data, short_name='rlut',
                                  dataset=model)[0]['filename']
    # Compute monthly mean fields from 2D surface daily fields
    # emission temperature
    te_file = wdir + '/{}_te.nc'.format(model)
    cdo.sqrt(input="-sqrt -mulc,{} {}".format(SIGMAINV, rlut_file),
             output=te_file)
    te_ymm_file = wdir + '/{}_te_ymm.nc'.format(model)
    cdo.yearmonmean(input=te_file, output=te_ymm_file)
    te_gmean_file = wdir + '/{}_te_gmean.nc'.format(model)
    cdo.timmean(input='-fldmean {}'.format(te_ymm_file), output=te_gmean_file)
    with Dataset(te_gmean_file) as f_l:
        te_gmean_constant = f_l.variables['rlut'][0, 0, 0]
    return te_ymm_file, te_gmean_constant, te_file
def _get_mmm_tas(rad_var, rad_datasets, tas_datasets):
    """Get multi-model mean for tas data."""
    logger.debug(
        "Calculating multi-model mean 'tas' for radiation variable '%s'",
        rad_var)
    ancestors = []
    dataset_names = []
    mmm = []
    for dataset_name in [d['dataset'] for d in rad_datasets]:
        tas_data = select_metadata(tas_datasets, dataset=dataset_name)
        if not tas_data:
            raise ValueError(
                f"No 'tas' data for dataset '{dataset_name}' available for "
                f"multi-model mean calculation")
        cube = tas_data[0]['cube']
        ancestors.extend(tas_data[0]['ancestors'])
        dataset_names.append(dataset_name)
        mmm.append(cube.data)
    _check_array_shapes(mmm, 'tas')
    mmm = np.ma.array(mmm)
    mmm_cube = cube.copy(data=np.ma.mean(mmm, axis=0))
    attributes = {
        'ancestors': ancestors,
        'dataset': 'MultiModelMean',
        'datasets': '|'.join(dataset_names),
        'project': rad_datasets[0]['project'],
        'short_name': _get_tas_var('MultiModelMean', rad_var),
    }
    mmm_cube.attributes = attributes
    return {**attributes, 'cube': mmm_cube}
def _cmip_envelope(datasetlist, variable, target_year):
    """Determine the change in <variable> PDF of each CMIP model.

    Note: using mf_dataset not possible due to different calendars.
    """
    cmip = select_metadata(datasetlist, variable_group=f'{variable}_cmip')
    envelope = []
    ancestors = []
    for data_dict in cmip:
        dataset = xr.open_dataset(data_dict['filename'])[variable]
        control = dataset.sel(time=slice('1981', '2010'))
        future = dataset.sel(time=slice(str(target_year -
                                            15), str(target_year + 15)))

        quantiles = [.05, .1, .25, .5, .75, .90, .95]
        qcontrol = control.groupby('time.season').quantile(quantiles)
        qfuture = future.groupby('time.season').quantile(quantiles)

        if variable == 'tas':
            # absolute diff
            envelope.append(qfuture - qcontrol)
        else:
            # pr; relative diff
            envelope.append((qfuture - qcontrol) / qcontrol * 100)
        ancestors.append(data_dict['filename'])

    cmip = xr.concat(envelope, dim='multimodel')
    provenance = _create_provenance_record(ancestors)

    # Prevent confusion between dimension 'quantile' and method 'quantile'
    return cmip.rename({'quantile': 'percentile'}), provenance
def get_residual_data(cfg):
    """Get residual data."""
    input_data = mlr_plot.get_input_datasets(cfg)
    residual_data = select_metadata(input_data, var_type='prediction_residual')
    if not residual_data:
        raise ValueError("No 'prediction_residual' data found")
    return group_metadata(residual_data, 'mlr_model_name')
Exemple #22
0
def _set_axx_exfig2b(axx, cfg, datasets, reg_dict, sa_dict):
    """Text for exfig2b."""
    axx.plot(np.linspace(0.2, 1.4, 2), reg_dict["y_rsnst"], color='r')

    for iii, model in enumerate(datasets):
        proj = (select_metadata(cfg['input_data'].values(),
                                dataset=model))[0]['project']
        style = e.plot.get_dataset_style(model, style_file=proj.lower())
        axx.plot(sa_dict["rsnstcsdt"][iii],
                 sa_dict["rsnstdt"][iii],
                 marker=style['mark'],
                 color=style['color'],
                 markerfacecolor=style['facecolor'],
                 linestyle='none',
                 markersize=10,
                 markeredgewidth=2.0,
                 label=model)

    axx.set_xlabel(r'drsnstcs/dtas (W m$^{-2}$ K$^{-1}$)')
    axx.set_title(' ')
    axx.set_ylabel(r'drsnst/dtas (W m$^{-2}$ K$^{-1}$)')
    axx.set_xlim([0.45, 1.15])
    axx.set_xticks(np.linspace(0.5, 1.1, 7))
    axx.set_ylim([0.45, 1.15])
    axx.set_yticks(np.linspace(0.5, 1.1, 7))
    axx.text(
        0.85, 1.1, 'Fit (r={:.2f}, '.format(reg_dict["rsnst"].rvalue) +
        ' slope = {:.2f}, '.format(reg_dict["rsnst"].slope) + ')')
    axx.legend(loc=2)

    return axx
Exemple #23
0
def make_daily_var(cfg, input_data, short_name, getweights, metadata,
                   scale=1, offset=0):
    """Wrapper for dry_spell_rwr.utc_to_lt.make_daily_var() to derive some
    args from the ESMValTool config.
    """

    var_meta = select_metadata(input_data, short_name=short_name)[0]
    logger.info(var_meta)

    files_var = [var_meta["filename"], ]
    var_name = var_meta["short_name"]
    local_time = var_meta["local_time"]

    model_grid, file_sftlf = _get_model_grid(input_data)

    ut_var, ts_pad = _get_time_axis(files_var[0])
    logger.info("ts_pad = %s", ts_pad)

    file_out = _get_filename(var_meta, cfg)

    utc.make_daily_var(files_var, var_name, local_time, getweights,
                       model_grid, metadata, ut_var, tsPad=ts_pad,
                       scale=scale, offset=offset,
                       file_out=file_out)

    record_var = _get_provenance_record({}, files_var + [file_sftlf, ])
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(file_out, record_var)

    return file_out
Exemple #24
0
def main(cfg):
    """Compute the time average for each input dataset."""
    # Get a description of the preprocessed data that we will use as input.
    input_data = cfg['input_data'].values()

    # Demonstrate use of metadata access convenience functions.
    selection = select_metadata(input_data, short_name='pr', project='CMIP5')
    logger.info("Example of how to select only CMIP5 precipitation data:\n%s",
                pformat(selection))

    selection = sorted_metadata(selection, sort='dataset')
    logger.info("Example of how to sort this selection by dataset:\n%s",
                pformat(selection))

    grouped_input_data = group_metadata(input_data,
                                        'standard_name',
                                        sort='dataset')
    logger.info(
        "Example of how to group and sort input data by standard_name:"
        "\n%s", pformat(grouped_input_data))

    # Example of how to loop over variables/datasets in alphabetical order
    for standard_name in grouped_input_data:
        logger.info("Processing variable %s", standard_name)
        for attributes in grouped_input_data[standard_name]:
            logger.info("Processing dataset %s", attributes['dataset'])
            input_file = attributes['filename']
            cube = compute_diagnostic(input_file)

            output_basename = os.path.splitext(
                os.path.basename(input_file))[0] + '_mean'
            provenance_record = get_provenance_record(
                attributes, ancestor_files=[input_file])
            plot_diagnostic(cube, output_basename, provenance_record, cfg)
def calculate_ecs(input_data, cfg, description=None):
    """Calculate ECS and net climate feedback parameters."""
    logger.info("Calculating ECS and net climate feedback parameter")
    msg = '' if description is None else f' for {description}'
    ancestors = []
    ecs = {}
    feedback_parameter = {}

    # Iterate over all datasets and save ECS and feedback parameters
    for dataset in select_metadata(input_data, short_name='tas'):
        dataset_name = dataset['dataset']
        logger.debug("Calculating ECS%s of dataset '%s'", msg, dataset_name)
        rtnt_data = select_metadata(input_data,
                                    short_name='rtnt',
                                    dataset=dataset_name)
        if not rtnt_data:
            logger.debug(
                "No 'rtmt' or 'rtnt' data for '%s' available, skipping ECS "
                "calculation for it", dataset_name)
            continue
        tas_cube = dataset['cube']
        rtnt_cube = rtnt_data[0]['cube']
        if rtnt_cube.ndim > 2:
            raise ValueError(
                f"Calculating ECS is only supported for cubes with less than "
                f"3 dimensions, got {rtnt_cube.ndim:d}D cube")
        ancestors.extend(dataset['ancestors'] + rtnt_data[0]['ancestors'])
        coords = [(coord, idx - 1)
                  for (idx,
                       coord) in enumerate(rtnt_cube.coords(dim_coords=True))
                  if coord.name() != 'time']

        # Calculate ECS (using linear regression)
        reg = _vectorized_linregress(_get_data_time_last(tas_cube),
                                     _get_data_time_last(rtnt_cube))
        ecs[dataset_name] = iris.cube.Cube(-reg[1] / (2 * reg[0]),
                                           dim_coords_and_dims=coords)
        feedback_parameter[dataset_name] = iris.cube.Cube(
            reg[0], dim_coords_and_dims=coords)
    ancestors = list(set(ancestors))
    if not ecs:
        logger.info(
            "No 'rtmt' or 'rtnt' data available, skipping ECS calculation")
        return

    # Write data
    _write_scalar_data([ecs, feedback_parameter], ancestors, cfg, description)
def _get_error_datasets(input_data, **kwargs):
    """Extract error datasets from input data."""
    input_data = select_metadata(input_data, **kwargs)
    error_data = []
    for dataset in input_data:
        if dataset.get('stderr', False):
            error_data.append(dataset)
    return error_data
Exemple #27
0
def _get_sel_files_var(cfg, varnames):
    """Get filenames from cfg for all model mean and differen variables."""
    selection = []

    for var in varnames:
        for hlp in select_metadata(cfg['input_data'].values(), short_name=var):
            selection.append(hlp['filename'])

    return selection
def _get_ec_ancestors(cfg):
    """Get ancestor files for emergent constraint."""
    input_data = _get_input_data(cfg)
    ancestors = []
    for var_type in ('feature', 'label', 'prediction_input',
                     'prediction_input_error'):
        datasets = select_metadata(input_data, var_type=var_type)
        ancestors.extend([d['filename'] for d in datasets])
    return ancestors
def _get_cube(datasets, short_name):
    """Get cube with specific ``'short_name'`` from datasets."""
    datasets = select_metadata(datasets, short_name=short_name)
    if len(datasets) != 1:
        raise ValueError(
            f"Expected exactly one dataset with short_name '{short_name}', "
            f"got {len(datasets):d}:\n{datasets}")
    return iris.load_cube(datasets[0]['filename'],
                          ih.var_name_constraint(short_name))
def calculate_tcr(cfg):
    """Calculate transient climate response (TCR)."""
    tcr = {}

    # Get data
    input_data = cfg['input_data'].values()
    onepct_data = select_metadata(input_data, short_name='tas', exp='1pctCO2')

    # Iterate over all datasets
    for dataset in onepct_data:
        pi_data = select_metadata(input_data,
                                  short_name='tas',
                                  exp='piControl',
                                  dataset=dataset['dataset'])
        if not pi_data:
            raise ValueError(f"No 'piControl' data available for dataset "
                             f"'{dataset['dataset']}'")

        onepct_cube = iris.load_cube(dataset['filename'])
        pi_cube = iris.load_cube(pi_data[0]['filename'])

        # Get anomaly cube
        anomaly_cube = _get_anomaly_cube(onepct_cube, pi_cube)

        # Calculate TCR
        tas_2x = anomaly_cube[START_YEAR_IDX:END_YEAR_IDX].collapsed(
            'time', iris.analysis.MEAN).data
        new_tcr = tas_2x
        tcr[dataset['dataset']] = new_tcr
        logger.info("TCR (%s) = %.2f %s", dataset['dataset'], new_tcr,
                    anomaly_cube.units)

        # Plot
        (path, provenance_record) = _plot(cfg, anomaly_cube,
                                          dataset['dataset'], new_tcr)
        if path is not None:
            provenance_record['ancestors'] = [
                dataset['filename'],
                pi_data[0]['filename'],
            ]
            with ProvenanceLogger(cfg) as provenance_logger:
                provenance_logger.log(path, provenance_record)

    return tcr