def _get_anomaly_data(input_data, year_idx=None):
    """Calculate anomaly data for all variables."""
    logger.info("Calculating anomaly data")
    project = input_data[0]['project']
    new_input_data = []
    for (var, var_data) in group_metadata(input_data, 'short_name').items():
        grouped_data = group_metadata(var_data, 'dataset')
        for (dataset_name, datasets) in grouped_data.items():
            logger.debug("Calculating '%s' anomaly for dataset '%s'", var,
                         dataset_name)
            data_4x = select_metadata(datasets, exp=EXP_4XCO2[project])
            data_pic = select_metadata(datasets, exp='piControl')

            # Check if all experiments are available
            if not data_4x:
                raise ValueError(
                    f"No '{EXP_4XCO2[project]}' data available for '{var}' of "
                    f"'{dataset_name}'")
            if not data_pic:
                raise ValueError(
                    f"No 'piControl' data available for '{var}' of "
                    f"'{dataset_name}'")

            # Calculate anomaly, extract correct years and save it
            cube = calculate_anomaly(data_4x, data_pic)
            _check_cube_dimensions(cube)
            cube = cube[year_idx]
            new_input_data.append({
                **data_4x[0],
                'ancestors': [data_4x[0]['filename'], data_pic[0]['filename']],
                'cube':
                cube,
            })
    msg = '' if not COORDS else f" with additional coordinates {COORDS['rad']}"
    logger.info("Found %iD 'tas' data and %iD radiation data%s",
                NDIMS.get('tas'), NDIMS.get('rad'), msg)
    return new_input_data
Esempio n. 2
0
def _get_anomaly_data(input_data):
    """Calculate anomaly data for all variables."""
    logger.info("Calculating anomaly data")
    project = input_data[0]['project']
    new_input_data = []
    for (var, var_data) in group_metadata(input_data, 'short_name').items():
        grouped_data = group_metadata(var_data, 'dataset')
        for (dataset_name, datasets) in grouped_data.items():
            logger.debug("Calculating '%s' anomaly for dataset '%s'", var,
                         dataset_name)
            data_4x = select_metadata(datasets, exp=EXP_4XCO2[project])
            data_pic = select_metadata(datasets, exp='piControl')

            # Check if all experiments are available
            if not data_4x:
                raise ValueError(
                    f"No '{EXP_4XCO2[project]}' data available for '{var}' of "
                    f"'{dataset_name}'")
            if not data_pic:
                raise ValueError(
                    f"No 'piControl' data available for '{var}' of "
                    f"'{dataset_name}'")

            # Calculate anomaly, extract correct years and save it
            cube = _calculate_anomaly(data_4x, data_pic)
            if cube.ndim != 1:
                raise ValueError(
                    f"This diagnostic supports only 1D (time), input data, "
                    f"got {cube.ndim}D data")
            new_input_data.append({
                **data_4x[0],
                'ancestors': [data_4x[0]['filename'], data_pic[0]['filename']],
                'cube':
                cube,
            })
    return new_input_data
def main(cfg):
    """Compute the time average for each input dataset."""
    # Get a description of the preprocessed data that we will use as input.
    input_data = cfg['input_data'].values()

    # Demonstrate use of metadata access convenience functions.
    selection = select_metadata(input_data, short_name='tas', project='CMIP5')
    logger.info("Example of how to select only CMIP5 temperature data:\n%s",
                pformat(selection))

    selection = sorted_metadata(selection, sort='dataset')
    logger.info("Example of how to sort this selection by dataset:\n%s",
                pformat(selection))

    grouped_input_data = group_metadata(input_data,
                                        'variable_group',
                                        sort='dataset')
    logger.info(
        "Example of how to group and sort input data by variable groups from "
        "the recipe:\n%s", pformat(grouped_input_data))

    # Example of how to loop over variables/datasets in alphabetical order
    groups = group_metadata(input_data, 'variable_group', sort='dataset')
    for group_name in groups:
        logger.info("Processing variable %s", group_name)
        for attributes in groups[group_name]:
            logger.info("Processing dataset %s", attributes['dataset'])
            input_file = attributes['filename']
            cube = compute_diagnostic(input_file)

            output_basename = Path(input_file).stem
            if group_name != attributes['short_name']:
                output_basename = group_name + '_' + output_basename
            provenance_record = get_provenance_record(
                attributes, ancestor_files=[input_file])
            plot_diagnostic(cube, output_basename, provenance_record, cfg)
def main(cfg):
    # The config object is a dict of all the metadata from the pre-processor
    logger.debug(cfg)

    projects = group_metadata(cfg["input_data"].values(), "project")

    for k, p in projects.items():
        m_list = set()
        for ds in p:
            if k == "CORDEX":
                ds_str = f"{ds['driver']} - {ds['dataset']}"
            else:
                ds_str = ds["dataset"]
            m_list.add(ds_str)
        print(f"{k} - {len(m_list)} models:")
        print(m_list)
def prepare_data(config):
    """Perform data calculations."""
    groups = group_metadata(config['input_data'].values(), 'variable_group')
    zm_g = groups["tos_zm"]
    zm_ref = prepare_reference(zm_g)['cube']
    zm_errors = [calc_error(dataset['cube'], zm_ref) for dataset in zm_g]
    eq_g = groups["tos_eq"]
    eq_ref = mask_equatorial(prepare_reference(eq_g)['cube'])
    eqs = [mask_equatorial(ds['cube']) for ds in eq_g]
    eq_errors = [calc_error(eq, eq_ref) for eq in eqs]
    data = {
        'zonal_mean_errors': zm_errors,
        'equatorials': eqs,
        'equatorial_ref': eq_ref,
        'equatorial_errors': eq_errors,
    }
    return data
Esempio n. 6
0
def do_preamble(cfg):
    """Execute some preamble functionality"""
    # prepare output dirs
    time_chunks = ['alltime', 'DJF', 'MAM', 'JJA', 'SON']
    time_plot_dirs = [
        os.path.join(cfg['plot_dir'], t_dir) for t_dir in time_chunks
    ]
    for time_plot_dir in time_plot_dirs:
        if not os.path.exists(time_plot_dir):
            os.makedirs(time_plot_dir)

    # get data
    input_data = cfg['input_data'].values()
    grouped_input_data = group_metadata(
        input_data, 'short_name', sort='dataset')

    return input_data, grouped_input_data
Esempio n. 7
0
def main(cfg):
    """
    Main function. Handles data wrangling and such.

    Parameters
    ----------
    cfg - Dictionary
        Nested dictionary containing dataset names and variables.

    Returns
    -------
    None.

    Notes
    -----
    * Dictionary returned by preprocessor is keyed by dataset name, value is
      list of metadata dictionaries for variables belonging to that dataset.
      Ex: dict = {'MPI-ESM-LR': [var1, var2...]}, where var1, var2 are dicts
      holding all variable metadata.
    * Since the preprocessor extracts the 1000 hPa level data,
      the cube's data will have shape (36, 180, 360) corresponding
      to time (in months), latitude, longitude. 
    """
    # Plot configuration dictionary.
    plt_config = {
        'ggplot': True,
        'out_dir': cfg['plot_dir'],
        'plt_name': 'time_series-initial_analysis-giss-{}.pdf',
        'time_interval': 'annual',
        'title': 'Annual Area Average - {}'
    }
    file_dict = group_metadata(cfg['input_data'].values(), 'dataset')
    common_emip_funcs.log_meta_dict(file_dict, main_log)
    # Get a dictionary keyed on variable name where the value is a list of
    # variable metadata dict from the various model configs.
    var_groups = common_emip_funcs.group_meta_by_var(file_dict)

    # Iterate over the variable dictionary and process each varible one-by-one.
    for esm_var, dict_list in var_groups.items():
        # Get list of ESMVariable objects.
        var_list = [
            ESMVariable(var_dict).get_area_statistic('mean')
            for var_dict in dict_list
        ]
        common_emip_funcs.plot_timeseries(var_list, plt_config)
Esempio n. 8
0
def main(cfg):
    """Compute the time average for each input dataset."""
    input_data = group_metadata(cfg['input_data'].values(),
                                'standard_name',
                                sort='dataset')

    for standard_name in input_data:
        logger.info("Processing variable %s", standard_name)
        # Load reference dataset
        for attributes in input_data[standard_name]:
            if attributes['reference_dataset'] == attributes['dataset']:
                reference_name = attributes['dataset']
                logger.info("Using %s as a reference dataset", reference_name)
                reference_filename = attributes['filename']
                reference = iris.load_cube(reference_filename)
                reference = reference.collapsed('time', MEAN)
                logger.info("Reference cube:\n%s\n%s", reference_filename,
                            reference)
                break
        else:
            raise ValueError("No reference_dataset defined in recipe.")

        # Compute and plot correlation
        for attributes in input_data[standard_name]:
            if attributes['dataset'] == reference_name:
                continue
            logger.info("Processing dataset %s", attributes['dataset'])

            filename = attributes['filename']
            dataset = iris.load_cube(filename)
            kwargs = cfg.get('pearsonr', {})
            logger.info(
                "Computing correlation with settings %s between "
                "reference and cube:\n%s\n%s", kwargs, filename, dataset)
            dataset = dataset.collapsed('time', MEAN)
            cube = pearsonr(dataset, reference, **kwargs)

            name = '{}_correlation_with_{}'.format(
                os.path.splitext(os.path.basename(filename))[0],
                reference_name)
            provenance_record = get_provenance_record(
                attributes,
                ancestor_files=[reference_filename, filename],
                plot_type=cfg['plot_type'])
            plot_diagnostic(cube, name, provenance_record, cfg)
Esempio n. 9
0
def main(cfg):
    """Process data for use as input to the LISFLOOD hydrological model."""
    input_metadata = cfg['input_data'].values()
    logger.info(input_metadata)

    for dataset, metadata in group_metadata(input_metadata, 'dataset').items():
        cubes, ancestors = get_input_cubes(metadata)

        if dataset == 'ERA5':
            shift_era5_time_coordinate(cubes['tas'])
            shift_era5_time_coordinate(cubes['tdps'])
            shift_era5_time_coordinate(cubes['uas'])
            shift_era5_time_coordinate(cubes['vas'])

        # Compute additional variables as input for lisvap
        tdps = cubes.pop('tdps')
        uas = cubes.pop('uas')
        vas = cubes.pop('vas')
        cubes['e'] = compute_vapour_pressure(tdps)
        ancestors['e'] = ancestors['tdps']
        cubes['sfcWind'] = compute_windspeed(uas, vas)
        ancestors['sfcWind'] = ancestors['uas'] + ancestors['vas']

        cubes['pr'].units = 'mm d-1'

        for var_name, cube in cubes.items():
            # Western emisphere longitudes should be negative
            points = cube.coord('longitude').points
            cube.coord('longitude').points = (points + 180) % 360 - 180
            # latitudes decreasing
            cube = cube[:, ::-1, ...]

            # convert to xarray dataset (xrds)
            # remove coordinate bounds drop extra coordinates and reorder
            xrds = xr.DataArray.from_iris(cube).to_dataset()
            ordered_coords = ['lon', 'lat', 'time']
            extra_coords = np.setdiff1d(xrds.coords, ordered_coords)
            xrds = xrds.drop(extra_coords)[ordered_coords + [var_name]]

            output_file = save(xrds, var_name, dataset, cfg)

            # Store provenance
            provenance_record = get_provenance_record(ancestors[var_name])
            with ProvenanceLogger(cfg) as provenance_logger:
                provenance_logger.log(output_file, provenance_record)
Esempio n. 10
0
def setup_namelist(cfg):
    """Set the namelist file of the cvdp package."""
    input_data = cfg['input_data'].values()
    grouped_selection = group_metadata(input_data, 'alias')

    content = []
    for _, attributes in grouped_selection.items():
        for item in attributes:
            create_link(cfg, item["filename"], item['alias'])
        ppath = "{0}/".format(cfg['lnk_dir'])
        content.append("{0} | {1}{0} | {2} | {3}\n".format(
            attributes[0]["alias"], ppath, attributes[0]["start_year"],
            attributes[0]["end_year"]))

    namelist = os.path.join(cfg['run_dir'], "namelist")

    with open(namelist, 'w') as namelist_file:
        namelist_file.write("\n".join(content))
Esempio n. 11
0
def main(cfg):
    """Load and plot hydro forcing data."""
    plot_type = cfg['plot_type']

    input_data = cfg['input_data'].values()
    variable_groups = group_metadata(input_data, 'variable_group')

    plot_func_mapping = {
        'climatology': plot_climatology,
        'timeseries': plot_timeseries,
    }

    for metadata in variable_groups.values():
        try:
            plot_func = plot_func_mapping[plot_type]
        except KeyError as err:
            raise ValueError(f'Unknown plot_type: {plot_type!r}') from err

        plot_func(cfg, metadata=metadata)
Esempio n. 12
0
def main(cfg):
    """Process data for use as input to the LISFLOOD hydrological model."""
    input_metadata = cfg['input_data'].values()
    logger.info(input_metadata)

    for dataset, metadata in group_metadata(input_metadata, 'dataset').items():
        cubes, ancestors = get_input_cubes(metadata)

        if dataset == 'ERA5':
            shift_era5_time_coordinate(cubes['tas'])
            shift_era5_time_coordinate(cubes['tdps'])
            shift_era5_time_coordinate(cubes['uas'])
            shift_era5_time_coordinate(cubes['vas'])

        # Compute additional variables as input for lisvap
        tdps = cubes.pop('tdps')
        uas = cubes.pop('uas')
        vas = cubes.pop('vas')
        cubes['e'] = compute_vapour_pressure(tdps)
        ancestors['e'] = ancestors['tdps']
        cubes['sfcWind'] = compute_windspeed(uas, vas)
        ancestors['sfcWind'] = ancestors['uas'] + ancestors['vas']

        cubes['pr'].units = 'mm d-1'

        for var_name, cube in cubes.items():
            cube.remove_coord('shape_id')
            # Western emisphere longitudes should be negative
            points = cube.coord('longitude').points
            cube.coord('longitude').points = (points + 180) % 360 - 180
            # latitudes decreasing
            cube = cube[:, ::-1, ...]

            output_file = save(cube, var_name, dataset, cfg)

            # Store provenance
            provenance_record = get_provenance_record(ancestors[var_name])
            with ProvenanceLogger(cfg) as provenance_logger:
                provenance_logger.log(output_file, provenance_record)
Esempio n. 13
0
def _get_multi_model_mean(input_data):
    """Get multi-model mean for all variables."""
    logger.info("Calculating multi-model means")
    project = input_data[0]['project']
    mmm_data = []
    for (var, datasets) in group_metadata(input_data, 'short_name').items():
        logger.debug("Calculating multi-model mean for variable '%s'", var)
        ancestors = []
        dataset_names = []
        mmm = []
        for dataset in datasets:
            try:
                cube = dataset['cube']
            except KeyError:
                raise KeyError(
                    f"No data for '{var}' of dataset '{dataset['dataset']}' "
                    f"for multi-model mean calculation")
            if cube.ndim > 1:
                raise ValueError(
                    f"Calculation of multi-model mean not supported for input "
                    f"data with more than one dimension (which should be "
                    f"time), got {cube.ndim:d}-dimensional cube")
            ancestors.extend(dataset['ancestors'])
            dataset_names.append(dataset['dataset'])
            mmm.append(cube.data)
        mmm = np.ma.array(mmm)
        mmm_cube = cube.copy(data=np.ma.mean(mmm, axis=0))
        attributes = {
            'ancestors': ancestors,
            'dataset': 'MultiModelMean',
            'datasets': '|'.join(dataset_names),
            'project': project,
            'short_name': var,
        }
        mmm_cube.attributes = attributes
        mmm_data.append({**attributes, 'cube': mmm_cube})
    input_data.extend(mmm_data)
    return input_data
Esempio n. 14
0
def get_anomalies(ds_list, relative=False):
    # determine historic and future periods
    start_years = list(group_metadata(ds_list, "start_year"))
    base_clim_start = min(start_years)
    fut_clim_start = max(start_years)

    # construct baseline
    base_metadata = select_metadata(ds_list, start_year=base_clim_start)
    base_file = base_metadata[0]["filename"]
    base_cube = iris.load_cube(base_file)

    # get future
    fut_metadata = select_metadata(ds_list, start_year=fut_clim_start)
    fut_file = fut_metadata[0]["filename"]
    fut_cube = iris.load_cube(fut_file)

    if relative:
        diff = fut_cube - base_cube
        anomaly = (diff / base_cube) * 100.0
        anomaly.units = "%"
    else:
        anomaly = fut_cube - base_cube

    return anomaly
    def _compute_dataset(self, alias, dataset):
        var_info = group_metadata(dataset, 'short_name')
        logger.info('Computing %s', alias)
        area_cello = iris.load_cube(var_info['areacello'][0]['filename'])
        cellarea = area_cello.data
        sit = iris.load_cube(var_info['sit'][0]['filename'])
        mask = np.asarray(sit.coord('latitude').points > 80.0, dtype=np.int8)
        try:
            mask = np.broadcast_to(mask, cellarea.shape)
        except ValueError:
            try:
                mask = np.broadcast_to(np.expand_dims(mask, -1),
                                       cellarea.shape)
            except ValueError:
                mask = np.broadcast_to(np.expand_dims(mask, 0), cellarea.shape)
        volume = self.compute_volume(sit, cellarea, mask=mask)
        del cellarea, sit

        neg_feedback, stats, _ = self.negative_seaice_feedback(
            var_info['sit'][0], volume, period=12, order=2)
        del volume
        logger.info("Negative feedback: %10.4f", neg_feedback)
        logger.info("P-Value:           %10.4f", stats[1])
        return (neg_feedback, stats[1])
def main(cfg):
    """Calculate linear regression between albedo and xxfrac.

    Arguments:
    ---------
        cfg - nested dictionary of metadata
    """
    # Assemble the data dictionary keyed by dataset name
    my_files_dict = group_metadata(cfg['input_data'].values(), 'dataset')
    all_short_names = [
        'alb', 'snc', 'cropFrac', 'treeFrac', 'grassFrac', 'shrubFrac',
        'pastureFrac'
    ]

    # Loop over all datasets
    for dataset_name in my_files_dict:
        dataset_dict = my_files_dict[dataset_name]

        if dataset_name == 'Duveiller2018':
            logger.info("Only do plotting for dataset %s", dataset_name)
            cube = iris.load_cube(dataset_dict[0]['filename'])
            # Set plot title and plot suptitle
            cube.attributes['plottitle'] = cube.coord('time').units.num2date(
                cube.coord('time').points)[0].strftime('%b') + '-'\
                + 'Duveiller2018'
            cube.attributes['model_id'] = 'Duveiller2018'

            _plot_cube(cube, cfg)
            continue

        logger.info("Starting diagnostic for dataset %s", dataset_name)

        # Now reorder the dictionary in a meaningfull way, making data
        # accessible by short name
        datadict = {}
        for file_dict in dataset_dict:
            if file_dict['short_name'] in all_short_names:
                datadict[file_dict['short_name']] = file_dict

        # Define the different lc classes
        this_models_xxfracs = [key for key in datadict if 'Frac' in key]
        # Note that lc3 class depends on the classes available for this model
        lc3_class = cfg['params']['lc3_class']
        cfg['params']['lc3_class'] = [
            key for key in this_models_xxfracs if key in lc3_class
        ]

        # Load all data
        model_data = {
            frac_key: iris.load_cube(datadict[frac_key]['filename'])
            for frac_key in this_models_xxfracs
        }
        # Load albedo and snow cover
        model_data['alb'] = iris.load_cube(datadict['alb']['filename'])
        model_data['snc'] = iris.load_cube(datadict['snc']['filename'])

        # Make sure that for each cube the dimension equals 2
        assert {c.ndim for _, c in model_data.items()} == {2}

        # Add the appropriate masks to model_data
        model_data = _add_masks_albedolandcover(model_data,
                                                this_models_xxfracs, cfg)

        # Now get albedo change due to landcover change
        alb_lc = _get_reconstructed_albedos(model_data, cfg)

        # Now mask where albedo values are physically impossible
        alb_lc[alb_lc < 0] = np.nan
        alb_lc[alb_lc > 1] = np.nan

        # Calculate differences between them and save
        _write_albedochanges_to_disk(alb_lc, model_data['snc'], datadict, cfg)

        # Loop through all nc files and plot them
        for ncfile in glob.glob(os.path.join(cfg['work_dir'], '*.nc')):
            transition_cube = iris.load_cube(ncfile)
            _plot_cube(transition_cube, cfg)
Esempio n. 17
0
    def compute(self):
        print('----------- COMPUTE ----------')
        # ---------------------------------------------------------------------
        # Every dataset in the recipe is associated with an alias. We are going
        # to use th:We alias and the group_metadata shared function to loop over
        # the datasets.
        #----------------------------------------------------------------------
        data = group_metadata(self.cfg['input_data'].values(), 'alias')
        ssp_trend = {}
        ssp_clim = {}
        hist_trend = {}
        hist_clim = {}
        # Loop over the datasets.
        for alias in data:
            exp = data[alias][0]['exp']
            variables = group_metadata(data[alias], 'short_name')
            # Returns the path to the preprocessed files.
            tas_file = variables['tas'][0]['filename']
            tas = iris.load(tas_file)[0]
            tas.convert_units('degC')

            # Calculate Trends
            nlat = tas.coord('latitude').shape[0]
            nlon = tas.coord('longitude').shape[0]
            lat = tas.coord('latitude').points
            lon = tas.coord('longitude').points
            time_array = np.arange(1, tas.coord('time').shape[0] + 1, 1)
            regr = np.zeros([nlat, nlon])
            for j in range(nlat):
                for k in range(nlon):
                    p = np.polyfit(time_array, tas[:, j, k].data, 1)
                    regr[j, k] = p[0] * 10  # the 10 is to convert to decadal
            latitude = DimCoord(lat, standard_name='latitude', units='degrees')
            longitude = DimCoord(lon,
                                 standard_name='longitude',
                                 units='degrees')
            regr_cube = Cube(regr,
                             dim_coords_and_dims=[(latitude, 0),
                                                  (longitude, 1)])
            ### ---------- remask -------------- ###
            # finding the trends turns the remask usseless as pyplot doesn't care about masked arrays
            # Ergo another remask is needed.
            output_trend = mask_landsea(regr_cube,
                                        ['/blablabla/where/the/fx/at/'], 'sea',
                                        True)
            # Save the output trends in the cube dict
            output_trend.standard_name = None
            output_trend.long_name = 'tas_trend_med'
            output_trend.short_name = 'tastrend'

            # Calculate Climatology
            output_clim = climate_statistics(tas)
            output_clim.standard_name = None
            output_clim.long_name = 'tas_clim_med'
            output_clim.short_name = 'tasclim'

            # Save diagnosed dataset to dict. TODO: what about averaging first?
            if exp == 'historical':
                hist_trend[alias] = output_trend
                hist_clim[alias] = output_clim
            if exp == 'ssp585':
                ssp_trend[alias] = output_trend
                ssp_clim[alias] = output_clim
            # Save the outputs for each dataset.
            #self.save(output, alias, data)
        # Plot the results.
        #self.plot_2D(total, data)
        print(ssp_trend.variables)
        print(len(ssp_trend))
Esempio n. 18
0
def main(cfg):
    # The config object is a dict of all the metadata from the pre-processor
    # set global plotting settings
    plt.rcParams.update({'font.size': 18})

    # get variable processed
    var = get_var(cfg)

    if var == "pr":
        rel_change = True
    else:
        rel_change = False

    # first group datasets by project..
    # this creates a dict of datasets keyed by project (CMIP5, CMIP6 etc.)
    projects = group_metadata(cfg["input_data"].values(), "project")
    # how to uniquely define a dataset varies by project, for CMIP it's simple, just dataset...
    # for CORDEX, combo of dataset and driver (and possibly also domain if we start adding those)
    # also gets more complex if we start adding in different ensembles..

    # This section of the code loads and organises the data to be ready for plotting
    logger.info("Loading data")
    # empty dict to store results
    projections = {}
    model_lists = {}
    cordex_drivers = []
    cordex_rcms = []
    # loop over projects
    for proj in projects:
        # we now have a list of all the data entries..
        # for CMIPs we can just group metadata again by dataset then work with that..
        models = group_metadata(projects[proj], "dataset")

        # empty dict for results
        if proj == 'non-cordex-rcm':
            proj = 'CORDEX'

        if proj == 'non-cmip5-gcm':
            proj = 'CMIP5'

        if proj not in projections.keys():
            projections[proj] = {}

        # loop over the models
        for m in models:
            if "CORDEX" in proj.upper():
                # then we need to go one deeper in the dictionary to deal with driving models
                drivers = group_metadata(models[m], "driver")
                projections[proj][m] = dict.fromkeys(drivers.keys())
                for d in drivers:
                    logging.info(f"Calculating anomalies for {proj} {m} {d}")
                    anoms = get_anomalies(drivers[d], rel_change)
                    if anoms is None:
                        continue
                    projections[proj][m][d] = anoms
                    if proj not in model_lists:
                        model_lists[proj] = []
                    model_lists[proj].append(f"{m} {d}")
                    cordex_drivers.append(d)
                    cordex_rcms.append(m)
            elif proj == "UKCP18":
                # go deeper to deal with ensembles and datasets
                # split UKCP into seperate GCM and RCM
                proj_key = f"UKCP18 {m}"
                ensembles = group_metadata(models[m], "ensemble")
                projections[proj_key] = dict.fromkeys(ensembles.keys())
                for ens in ensembles:
                    logging.info(f"Calculating anomalies for {proj_key} {ens}")
                    anoms = get_anomalies(ensembles[ens], rel_change)
                    if anoms is None:
                        continue
                    projections[proj_key][ens] = anoms
                    if proj_key not in model_lists:
                        model_lists[proj_key] = []
                    model_lists[proj_key].append(f"{proj_key} {ens}")
            else:
                logging.info(f"Calculating anomalies for {proj} {m}")
                anoms = get_anomalies(models[m], rel_change)
                if anoms is None:
                    continue
                projections[proj][m] = anoms
                if proj not in model_lists:
                    model_lists[proj] = []
                model_lists[proj].append(f"{m}")

        # remove any empty categories (i.e. UKCP18 which has been split into rcm and gcm)
        if projections[proj] == {}:
            del projections[proj]
    cordex_drivers = set(cordex_drivers)
    cordex_rcms = set(cordex_rcms)

    # reorganise and extract data for plotting
    n_seasons = len(anoms.coord('season_number').points)
    plotting_dict = proj_dict_to_season_dict(projections, n_seasons)

    for season in plotting_dict.keys():
        # this section of the code does all the plotting..

        # mega scatter plot
        # need to prepare subsets of projects
        gcm_sc, rcm_sc1, labels1 = prepare_scatter_data(
            plotting_dict[season]['CMIP5'], plotting_dict[season]['CORDEX'],
            'CORDEX')
        rcm_sc2, cpm_sc, labels2 = prepare_scatter_data(
            plotting_dict[season]['CORDEX'],
            plotting_dict[season]['cordex-cpm'], 'CPM')

        mega_scatter(gcm_sc, rcm_sc1, rcm_sc2, cpm_sc,
                     list(plotting_dict[season]['CMIP5'].values()),
                     list(plotting_dict[season]['CORDEX'].values()), labels1,
                     labels2, f'{season}')

        # simpler scatter for UKCP
        if 'UKCP18 land-gcm' in plotting_dict[season].keys():
            UKCP_g, UKCP_r, UKCP_labels = prepare_scatter_data(
                plotting_dict[season]['UKCP18 land-gcm'],
                plotting_dict[season]['UKCP18 land-rcm'], "UKCP18")
            simpler_scatter(UKCP_g, UKCP_r, UKCP_labels, f'UKCP_{season}')

        # side by side plots / dots for all models plus Glen's method...
        if 'CMIP6' in plotting_dict[season].keys():
            data_for_plotting = [
                plotting_dict[season]['CMIP6'].values(),
                plotting_dict[season]['CMIP5'].values(), rcm_sc1, cpm_sc,
                UKCP_g, UKCP_r
            ]
            labels_for_plotting = [
                'CMIP6', 'CMIP5', 'CORDEX', 'CPM', 'UKCP_g', 'UKCP_r'
            ]
        else:
            data_for_plotting = [
                plotting_dict[season]['CMIP5'].values(), rcm_sc1, cpm_sc
            ]
            labels_for_plotting = [
                'CMIP5',
                'CORDEX',
                'CPM',
            ]
        plot_datasets(data_for_plotting, labels_for_plotting, season)

        # save some plotting data for notebook experiments
        # create dictionary of all the required data for one particular season
        if season == 'JJA':
            pickle_dict = {}
            pickle_dict['CMIP5_sc'] = gcm_sc
            pickle_dict['RCM_sc1'] = rcm_sc1
            pickle_dict['RCM_sc2'] = rcm_sc2
            pickle_dict['labels1'] = labels1
            pickle_dict['labels2'] = labels2
            pickle_dict['cpm'] = cpm_sc
            pickle_dict['CMIP6'] = list(
                plotting_dict[season]['CMIP6'].values())
            pickle_dict['CMIP5'] = list(
                plotting_dict[season]['CMIP5'].values())
            pickle_dict['CORDEX'] = list(
                plotting_dict[season]['CORDEX'].values())
            pickle_dict['UKCP18 land-gcm'] = plotting_dict[season][
                'UKCP18 land-gcm']
            pickle_dict['UKCP18 land-rcm'] = plotting_dict[season][
                'UKCP18 land-rcm']

            pickle.dump(
                pickle_dict,
                open(f'{cfg["work_dir"]}/sample_plotting_data.pkl', 'wb'))

        # save details of values used for plotting the boxplots
        save_anoms_txt(plotting_dict[season]['CMIP6'],
                       f'{cfg["work_dir"]}/CMIP6_{season}.txt')
        save_anoms_txt(plotting_dict[season]['CMIP5'],
                       f'{cfg["work_dir"]}/CMIP5_{season}.txt')
        save_anoms_txt(plotting_dict[season]['CORDEX'],
                       f'{cfg["work_dir"]}/CORDEX_{season}.txt')
        save_anoms_txt(plotting_dict[season]['cordex-cpm'],
                       f'{cfg["work_dir"]}/CPM_{season}.txt')
        save_anoms_txt(plotting_dict[season]['UKCP18 land-gcm'],
                       f'{cfg["work_dir"]}/UKCP_gcm_{season}.txt')
        save_anoms_txt(plotting_dict[season]['UKCP18 land-rcm'],
                       f'{cfg["work_dir"]}/UKCP_rcm_{season}.txt')

    # print all datasets used
    print("Input models for plots:")
    for p in model_lists.keys():
        print(f"{p}: {len(model_lists[p])} models")
        print(model_lists[p])
        print("")
def main(cfg):
    """Run the diagnostic.

    Parameters :

    ----------
    cfg : dict
        Configuration dictionary of the recipe.

    """
    ###########################################################################
    # Read recipe data
    ###########################################################################

    # Dataset data containers
    data = e.Datasets(cfg)
    logging.debug("Found datasets in recipe:\n%s", data)

    # Variables
    # var = e.Variables(cfg)
    available_vars = list(
        group_metadata(cfg['input_data'].values(), 'short_name'))
    logging.debug("Found variables in recipe:\n%s", available_vars)

    available_exp = list(group_metadata(cfg['input_data'].values(), 'exp'))

    if len(available_exp) > 6:
        raise ValueError("The diagnostic can only plot up to 6 different " +
                         "model experiments.")

    ###########################################################################
    # Read data
    ###########################################################################

    # Create iris cube for each dataset and save annual means
    for dataset_path in data:
        cube = iris.load(dataset_path)[0]
        # cube = iris.load(dataset_path, var.standard_names())[0]
        cube = cube.collapsed('time', iris.analysis.MEAN)

        data.set_data(cube.data, dataset_path)

    ###########################################################################
    # Process data
    ###########################################################################

    data_var = OrderedDict()
    for iexp in available_exp:
        data_var[iexp] = OrderedDict()
        for jvar in available_vars:
            # data_var[iexp] = OrderedDict()
            data_var[iexp][jvar] = 0.0

    pathlist = data.get_path_list(short_name=available_vars[0],
                                  exp=available_exp[0])

    for dataset_path in pathlist:

        # Substract piControl experiment from abrupt4xCO2 experiment
        dataset = data.get_info(n.DATASET, dataset_path)

        for jvar in available_vars:
            for iexp in available_exp:
                print(data_var[iexp])
                print((data_var[iexp].values()))
                (data_var[iexp])[jvar] = (data_var[iexp])[jvar] + \
                    data.get_data(short_name=jvar, exp=iexp,
                                  dataset=dataset)

    data_var_sum = {}
    for iexp in available_exp:
        data_var_sum[iexp] = np.fromiter(data_var[iexp].values(),
                                         dtype=float) / float(len(pathlist))

    # Plot ECS regression if desired
    plot_bar_deangelis(cfg, data_var_sum, available_exp, available_vars)
Esempio n. 20
0
def main(cfg):
    """Process data for use as input to the wflow hydrological model."""
    input_metadata = cfg['input_data'].values()

    for dataset, metadata in group_metadata(input_metadata, 'dataset').items():
        all_vars, provenance = get_input_cubes(metadata)

        if dataset == 'ERA5':
            shift_era5_time_coordinate(all_vars['tas'])
            shift_era5_time_coordinate(all_vars['psl'])

        # Interpolating variables onto the dem grid
        # Read the target cube, which contains target grid and target elevation
        dem_path = Path(cfg['auxiliary_data_dir']) / cfg['dem_file']
        dem = load_dem(dem_path)
        check_dem(dem, cfg['region'])
        dem = extract_region(dem, **cfg['region'])

        logger.info("Processing variable precipitation_flux")
        pr_dem = regrid(all_vars['pr'], target_grid=dem, scheme='linear')

        logger.info("Processing variable temperature")
        tas_dem = regrid_temperature(all_vars['tas'], all_vars['orog'], dem)

        logger.info("Processing variable potential evapotranspiration")
        if 'evspsblpot' in all_vars:
            pet = all_vars['evspsblpot']
            pet_dem = regrid(pet, target_grid=dem, scheme='linear')
        else:
            logger.info("Potential evapotransporation not available, deriving")
            psl_dem = regrid(all_vars['psl'], target_grid=dem, scheme='linear')
            rsds_dem = regrid(all_vars['rsds'],
                              target_grid=dem,
                              scheme='linear')
            rsdt_dem = regrid(all_vars['rsdt'],
                              target_grid=dem,
                              scheme='linear')
            pet_dem = debruin_pet(
                tas=tas_dem,
                psl=psl_dem,
                rsds=rsds_dem,
                rsdt=rsdt_dem,
            )
        pet_dem.var_name = 'pet'

        logger.info("Converting units")
        pet_dem.units = pet_dem.units / 'kg m-3'
        pet_dem.data = pet_dem.core_data() / 1000.
        pet_dem.convert_units('mm day-1')

        pr_dem.units = pr_dem.units / 'kg m-3'
        pr_dem.data = pr_dem.core_data() / 1000.
        pr_dem.convert_units('mm day-1')

        tas_dem.convert_units('degC')

        # Adjust longitude coordinate to wflow convention
        for cube in [tas_dem, pet_dem, pr_dem]:
            cube.coord('longitude').points = (cube.coord('longitude').points +
                                              180) % 360 - 180

        cubes = iris.cube.CubeList([pr_dem, tas_dem, pet_dem])
        save(cubes, dataset, provenance, cfg)
def main(cfg):
    """Process data for use as input to the marrmot hydrological model.

    These variables are needed in all_vars:
    tas (air_temperature)
    pr (precipitation_flux)
    psl (air_pressure_at_mean_sea_level)
    rsds (surface_downwelling_shortwave_flux_in_air)
    rsdt (toa_incoming_shortwave_flux)
    """
    input_metadata = cfg['input_data'].values()
    for dataset, metadata in group_metadata(input_metadata, 'dataset').items():
        all_vars, provenance = get_input_cubes(metadata)

        # Fix time coordinate of ERA5 instantaneous variables
        if dataset == 'ERA5':
            _shift_era5_time_coordinate(all_vars['psl'])
            _shift_era5_time_coordinate(all_vars['tas'])

        # Processing variables and unit conversion
        # Unit of the fluxes in marrmot should be in kg m-2 day-1 (or mm/day)
        logger.info("Processing variable PET")
        pet = debruin_pet(
            psl=all_vars['psl'],
            rsds=all_vars['rsds'],
            rsdt=all_vars['rsdt'],
            tas=all_vars['tas'],
        )
        pet = preproc.area_statistics(pet, operator='mean')
        pet.convert_units('kg m-2 day-1')  # equivalent to mm/day

        logger.info("Processing variable tas")
        temp = preproc.area_statistics(all_vars['tas'], operator='mean')
        temp.convert_units('celsius')

        logger.info("Processing variable pr")
        precip = preproc.area_statistics(all_vars['pr'], operator='mean')
        precip.convert_units('kg m-2 day-1')  # equivalent to mm/day

        # Get the start and end times and latitude longitude
        time_start_end, lat_lon = _get_extra_info(temp)

        # make data structure
        # delta_t_days could also be extracted from the cube
        output_data = {
            'forcing': {
                'precip': precip.data,
                'temp': temp.data,
                'pet': pet.data,
                'delta_t_days': float(1),
                'time_unit': 'day',
            },
            'time_start': time_start_end[0],
            'time_end': time_start_end[1],
            'data_origin': lat_lon,
        }

        # Save to matlab structure
        basename = '_'.join([
            'marrmot',
            dataset,
            cfg['basin'],
            str(int(output_data['time_start'][0])),
            str(int(output_data['time_end'][0])),
        ])
        output_name = get_diagnostic_filename(basename, cfg, extension='mat')
        sio.savemat(output_name, output_data)

        # Store provenance
        with ProvenanceLogger(cfg) as provenance_logger:
            provenance_logger.log(output_name, provenance)
Esempio n. 22
0
    def retrieve_data(self):
        '''
        Here is where all the interesting stuff begins.

        (1) The function calls the variables and stores all the cubes according
        to their experiment (ssp585, historical, reanalysis...)

        (2) Calls the functions that calculates the output data to be shown

        (3) Calls the plotting functions
        '''
        print('----------- DATA RETREIVAL ----------')
        data = group_metadata(self.cfg['input_data'].values(), 'alias')
        hist_ls_tas, hist_ls_pr = [], []
        ssp_ls_tas, ssp_ls_pr = [], []
        rean_ls_tas, rean_ls_pr = [], []
        start_year_ssp, end_year_ssp, start_year_hist, end_year_hist, start_year_rean, end_year_rean = 0, 0, 0, 0, 0, 0
        obs_names = ' '
        w_density = iris.coords.AuxCoord(1000,
                                         long_name='water_density',
                                         units='kg m-3')
        # iteration that sorts the dataset cubes into it's own experiment list.
        # (hist_ls, ssp_ls, rean_ls)
        for i, alias in enumerate(data):
            print(alias)
            variables = group_metadata(data[alias], 'short_name')
            pr_exist, tas_exist = False, False
            if 'pr' in variables:
                pr_file = variables['pr'][0]['filename']
                pr = iris.load(pr_file)[0]
                pr = pr / w_density
                pr.convert_units('mm month-1')
                pr_exist = True
            if 'tas' in variables:
                tas_file = variables['tas'][0]['filename']
                tas = iris.load(tas_file)[0]
                tas.convert_units('degC')
                # coord_names = [coord.name() for coord in tas.coords()]
                tas_exist = True
            # LOAD DATA IN CUBES
            # OBS --> rean data, they don't have atribute 'exp'
            if 'OBS' not in alias:
                exp = data[alias][0]['exp']
                activity = data[alias][0]['activity']
                model_names = data[alias][0]['project']  # 'activity' ??
            else:
                exp = 'a'
                obs_names = obs_names + data[alias][0]['dataset'] + ', '
            # LIST THE CUBES ACCORDING TO THE 'EXP' THEY BELONG
            if exp == 'a':
                if (start_year_rean == 0) & (end_year_rean == 0):
                    start_year_rean = variables['tas'][0]['start_year']
                    end_year_rean = variables['tas'][0]['end_year']
                if tas_exist:
                    tas = self.regrid_time(tas, start_year_rean, end_year_rean)
                    rean_ls_tas.append(tas)
                if pr_exist:
                    pr = self.regrid_time(pr, start_year_rean, end_year_rean)
                    rean_ls_pr.append(pr)
            elif ((activity.lower() == 'highresmip') and
                  (exp == 'highres-future')) or (
                      (activity.lower() == 'scenariomip') and
                      (exp == 'ssp585')):
                if (start_year_ssp == 0) & (end_year_ssp == 0):
                    start_year_ssp = variables['tas'][0]['start_year']
                    end_year_ssp = variables['tas'][0]['end_year']
                if tas_exist:
                    tas = self.regrid_time(tas, start_year_ssp, end_year_ssp)
                    ssp_ls_tas.append(tas)
                if pr_exist:
                    pr = self.regrid_time(pr, start_year_ssp, end_year_ssp)
                    ssp_ls_pr.append(pr)
            elif ((activity.lower() == 'highresmip') and
                  (exp == 'hist-1950')) or ((activity.lower() == 'cmip') and
                                            (exp == 'historical')):
                if (start_year_hist == 0) & (end_year_hist == 0):
                    start_year_hist = variables['tas'][0]['start_year']
                    end_year_hist = variables['tas'][0]['end_year']
                if tas_exist:
                    tas = self.regrid_time(tas, start_year_hist, end_year_hist)
                    hist_ls_tas.append(tas)
                if pr_exist:
                    pr = self.regrid_time(pr, start_year_hist, end_year_hist)
                    hist_ls_pr.append(pr)

        start_year_ls = [start_year_ssp, start_year_hist, start_year_rean]
        end_year_ls = [end_year_ssp, end_year_hist, end_year_rean]

        # COMPUTE EVERYTHING & PLOT
        # TAS
        if (not rean_ls_tas) or (not ssp_ls_tas) or (not hist_ls_tas):
            print('No temperature to diagnose')
        else:
            cube_ls_tas, ts_ls_tas = self.compute(rean_ls_tas, ssp_ls_tas,
                                                  hist_ls_tas, 'tas')
            self.tas_plot_caller(cube_ls_tas, obs_names.upper(), model_names,
                                 str(start_year_rean))
            # # Compute standard deviation of the ensembles & plot
            STDs_tas = self.timeseries_std(
                [ssp_ls_tas, hist_ls_tas, rean_ls_tas])
            self.timeseries_plot(ts_ls_tas, start_year_ls, end_year_ls, '_tas',
                                 STDs_tas, '($^o$C)', 'Temperature')
        # PR
        if (not rean_ls_pr) or (not ssp_ls_pr) or (not hist_ls_pr):
            print('No precipitation to diagnose')
        else:
            cube_ls_pr, ts_ls_pr = self.compute(rean_ls_pr, ssp_ls_pr,
                                                hist_ls_pr, 'pr')
            self.pr_plot_caller(cube_ls_pr, obs_names.upper(), model_names,
                                str(start_year_rean))
            # # Compute standard deviation of the ensembles & plot
            STDs_pr = self.timeseries_std([ssp_ls_pr, hist_ls_pr, rean_ls_pr])
            self.timeseries_plot(ts_ls_pr, start_year_ls, end_year_ls, '_pr',
                                 STDs_pr, '(mm month$^{-1}$)', 'Precipitation')
def plot_regressions(input_data, cfg, description=None):
    """Plot linear regressions used to calculate feedback parameters."""
    table = OrderedDict()

    # Iterate over radiation quantities (y axis)
    for (var, datasets) in group_metadata(input_data, 'short_name').items():
        if 'tas' in var:
            continue
        logger.info("Creating regression plots for variable '%s'", var)

        # Iterate over all available datasets
        for dataset in datasets:
            dataset_name = dataset['dataset']
            table.setdefault(dataset_name, {})
            tas_data = select_metadata(input_data,
                                       short_name=_get_tas_var(
                                           dataset_name, var),
                                       dataset=dataset_name)
            if not tas_data:
                raise ValueError(
                    f"No 'tas' data for '{dataset_name}' available")
            tas_cube = tas_data[0]['cube']
            if dataset['cube'].ndim > 1:
                raise ValueError(
                    "Regression plots are not supported for input data with "
                    "more than one dimension (which should be time)")

            # Save plot and netcdf file
            (plot_path, reg) = _create_regression_plot(tas_cube,
                                                       dataset['cube'],
                                                       dataset_name,
                                                       cfg,
                                                       description=description)
            netcdf_path = _create_regression_file(tas_cube,
                                                  dataset['cube'],
                                                  dataset_name,
                                                  cfg,
                                                  description=description)

            # Expand table
            table[dataset_name][var] = reg.slope
            if var == 'rtnt':
                table[dataset_name]['ECS'] = (-reg.intercept / 2.0 / reg.slope)
                table[dataset_name]['F'] = reg.intercept

            # Provenance
            caption = (
                'Scatterplot between {} TOA radiance and global mean surface '
                'temperature anomaly{} of the abrupt 4x CO2 experiment '
                'including linear regression for {} (following Andrews et '
                'al., Geophys. Res. Lett., 39, 2012).'.format(
                    FEEDBACK_PARAMETERS.get(var, var),
                    '' if description is None else f' for {description}',
                    dataset_name))
            _write_provenance(netcdf_path,
                              plot_path,
                              caption,
                              dataset['ancestors'] + tas_data[0]['ancestors'],
                              cfg,
                              plot_types=['scatter'])

    # Create summary table
    _create_table(table, cfg, description=description)
def main(cfg):
    # The config object is a dict of all the metadata from the pre-processor

    # get variable processed
    var = list(extract_variables(cfg).keys())
    assert len(var) == 1
    var = var[0]

    if var == "pr":
        rel_change = True
    else:
        rel_change = False

    # establish the time periods of our datasets
    start_years = list(group_metadata(cfg["input_data"].values(),
                                      "start_year"))
    base_start = min(start_years)
    fut_start = max(start_years)

    # first group datasets by project..
    # this creates a dict of datasets keyed by project (CMIP5, CMIP6 etc.)
    projects = group_metadata(cfg["input_data"].values(), "project")
    # how to uniquely define a dataset varies by project, for CMIP it's simple, just dataset...
    # for CORDEX, combo of dataset and driver (and possibly also domain if we start adding those)
    # also gets more complex if we start adding in different ensembles..

    # This section of the code loads and organises the data to be ready for plotting
    logger.info("Loading data")
    # empty dict to store results
    projections = {}
    model_lists = {}
    cordex_drivers = []
    # loop over projects
    for proj in projects:
        # we now have a list of all the data entries..
        # for CMIPs we can just group metadata again by dataset then work with that..
        models = group_metadata(projects[proj], "dataset")

        # empty dict for results
        projections[proj] = {}
        # loop over the models
        for m in models:
            if proj[:6].upper() == "CORDEX":
                # then we need to go one deeper in the dictionary to deal with driving models
                drivers = group_metadata(models[m], "driver")
                projections[proj][m] = dict.fromkeys(drivers.keys())
                for d in drivers:
                    logging.info(f"Calculating anomalies for {proj} {m} {d}")
                    anoms = get_anomalies(drivers[d], base_start, fut_start,
                                          rel_change)
                    if anoms is None:
                        continue
                    projections[proj][m][d] = anoms
                    if proj not in model_lists:
                        model_lists[proj] = []
                    model_lists[proj].append(f"{m} {d}")
                    cordex_drivers.append(d)
            elif proj == "UKCP18":
                # go deeper to deal with ensembles and datasets
                # split UKCP into seperate GCM and RCM
                proj_key = f"UKCP18 {m}"
                ensembles = group_metadata(models[m], "ensemble")
                projections[proj_key] = dict.fromkeys(ensembles.keys())
                for ens in ensembles:
                    logging.info(f"Calculating anomalies for {proj_key} {ens}")
                    anoms = get_anomalies(ensembles[ens], base_start,
                                          fut_start, rel_change)
                    if anoms is None:
                        continue
                    projections[proj_key][ens] = anoms
                    if proj_key not in model_lists:
                        model_lists[proj_key] = []
                    model_lists[proj_key].append(f"{proj_key} {ens}")
            else:
                logging.info(f"Calculating anomalies for {proj} {m}")
                anoms = get_anomalies(models[m], base_start, fut_start,
                                      rel_change)
                if anoms is None:
                    continue
                projections[proj][m] = anoms
                if proj not in model_lists:
                    model_lists[proj] = []
                model_lists[proj].append(f"{m}")
        # remove any empty categories (i.e. UKCP18 which has been split into rcm and gcm)
        if projections[proj] == {}:
            del projections[proj]
    cordex_drivers = set(cordex_drivers)

    # this section of the code does the plotting..
    # we now have all the projections in the projections dictionary

    # now lets plot them
    # first we need to process the dictionary, and move the data into a list of vectors
    # the projections object is the key one that contains all our data..
    seasons = {0: "DJF", 1: "MAM", 2: "JJA", 3: "OND"}
    logger.info("Plotting")
    extent = (
        cfg["domain"]["start_longitude"] - 2,
        cfg["domain"]["end_longitude"] + 2,
        cfg["domain"]["start_latitude"] - 2,
        cfg["domain"]["end_latitude"] + 2,
    )
    for s in seasons.keys():
        # make directory
        try:
            os.mkdir(f"{cfg['plot_dir']}/{seasons[s]}")
        except FileExistsError:
            pass
        for p in projections:
            pdata = process_projections_dict(projections[p], s)

            for m in pdata:
                title = f"{p} {m} {seasons[s]} {var} change"
                plt.figure(figsize=(12.8, 9.6))
                ax = plt.axes(projection=ccrs.PlateCarree())
                ax.set_extent(extent)
                # set scales
                if var == "pr":
                    vmn = -50
                    vmx = 50
                    cmap = "brewer_RdYlBu_11"
                else:
                    vmn = 0
                    vmx = 5
                    cmap = "brewer_YlOrRd_09"
                qplt.pcolormesh(pdata[m], vmin=vmn, vmax=vmx, cmap=cmap)
                plt.title(title)
                ax.coastlines()
                ax.add_feature(cartopy.feature.BORDERS, linestyle=":")
                plt.savefig(
                    f"{cfg['plot_dir']}/{seasons[s]}/{p}_{m}_map_{seasons[s]}.png"
                )
                plt.close()

    # print all datasets used
    print("Input models for plots:")
    for p in model_lists.keys():
        print(f"{p}: {len(model_lists[p])} models")
        print(model_lists[p])
        print("")
    def _plot_comparison(self, data, datasets, p_values=False):
        if p_values:
            filename = 'feedback_p_values'
        else:
            filename = 'feedback'

        path = os.path.join(self.cfg[n.PLOT_DIR],
                            f'{filename}.{self.cfg[n.OUTPUT_FILE_TYPE]}')

        plot_options = self.cfg.get('plot', {})
        fig = plt.figure()
        index = np.arange(len(data))
        plt.scatter(
            index,
            data,
            plot_options.get('point_size', 8),
            color=plot_options.get('point_color', 'black'),
        )
        if p_values:
            plt.hlines(0.05, -1, index[-1] + 1, colors='red')
        axes = plt.gca()
        logger.debug(data)
        max_limit = math.ceil(max(data))
        if max_limit < 0:
            max_limit = 0
        min_limit = math.floor(min(data))
        separation = max_limit - min_limit

        if plot_options.get('show_values', False):

            def _get_y_position(value):
                if value > min_limit + separation * 0.75:
                    return value - separation * 0.05
                return value + separation * 0.10

            for i, value in enumerate(data):
                axes.annotate(
                    f'{value:.2f}',
                    xy=(index[i], value),
                    xycoords='data',
                    textcoords='data',
                    xytext=(index[i], _get_y_position(value)),
                    rotation=90,
                )

        # axes and labels
        axes.set_ylim(min_limit, max_limit)
        if p_values:
            axes.set_ylabel('P-value [log]')
            plt.ylim(0, max(0.25, max(data)))
        else:
            axes.set_ylabel('IFE')
        axes.set_title('IFE comparison')
        _, xtick_names = plt.xticks(index, datasets)
        plt.xlim(index[0] - 0.5, index[-1] + 0.5)
        plt.setp(xtick_names, rotation=90, fontsize=10)
        plt.grid(True, 'both', 'y')
        plt.tight_layout()
        fig.savefig(path)
        plt.close(fig)
        self._create_prov_record(
            path, f'IFE {filename} comparison for all datasets',
            group_metadata(self.cfg['input_data'].values(), n.ALIAS))
Esempio n. 26
0
def main(diag_config):
    """
    Evaluate global distribution of ecosystem carbon turnover time.

    Argument:
    --------
        diag_config - nested dictionary of metadata
    """
    model_data_dict = group_metadata(diag_config['input_data'].values(),
                                     'dataset')

    # get the data from the observation
    global_tau_obs = _get_obs_data(diag_config)
    base_name = ('{title}_{source_label}_'
                 '{grid_label}'.format(
                     title=global_tau_obs['grid']['tau_ctotal'].long_name,
                     source_label=diag_config['obs_info']['source_label'],
                     grid_label=diag_config['obs_info']['grid_label']))

    global_tau_mod = {}
    global_tau_mod['grid'] = {}
    global_tau_mod['global'] = {}

    provenance_record_matrix = _get_provenance_record(
        "Matrix Comparison of global distributions of turnover time of carbon",
        ['mean', 'perc'], ['global'],
        _get_ancestor_files(diag_config, 'tau_ctotal'))

    provenance_record_multimodel = _get_provenance_record(
        "Multimodel bias and agreements of global distributions of turnover"
        "time of carbon. Reproduces figure 3 in Carvalhais et al. (2014).",
        ['mean', 'perc'], ['global'],
        _get_ancestor_files(diag_config, 'tau_ctotal'))

    for model_name, model_dataset in model_data_dict.items():
        global_tau_mod[model_name] = {}

        # load the data
        ctotal = _load_variable(model_dataset, 'ctotal')
        gpp = _load_variable(model_dataset, 'gpp')
        tau_ctotal = _calc_turnover(ctotal, gpp, model_name)
        global_tau_mod['grid'][model_name] = tau_ctotal

        # apply the GPP threshold and set the data in dictionary
        gpp_global = gpp.collapsed(['latitude', 'longitude'],
                                   iris.analysis.SUM)
        ctotal_global = ctotal.collapsed(['latitude', 'longitude'],
                                         iris.analysis.SUM)
        tau_global = ctotal_global / gpp_global
        tau_global.convert_units('yr')

        global_tau_mod['global'][model_name] = np.float(tau_global.core_data())

        if diag_config['write_plots']:
            base_name_mod = (
                'global_{title}_{source_label}_'
                '{grid_label}'.format(
                    title=global_tau_obs['grid']['tau_ctotal'].long_name,
                    source_label=model_name,
                    grid_label=diag_config['obs_info']['grid_label']))
            plot_path_mod = get_plot_filename(base_name_mod, diag_config)
            # plot_path_list.append(plot_path_mod)
            provenance_record_mod = _get_provenance_record(
                "Map of global distribution of turnover time of carbon",
                ['mean', 'perc'], ['global'], {model_name: model_dataset})
            _plot_single_map(plot_path_mod, tau_ctotal,
                             global_tau_mod['global'][model_name], model_name,
                             provenance_record_mod, diag_config)
        if diag_config['write_netcdf']:
            model_cubes = [
                c for c in global_tau_mod['grid'].values()
                if isinstance(c, iris.cube.Cube)
            ]
            obs_cubes = [
                c for c in global_tau_obs['grid'].values()
                if isinstance(c, iris.cube.Cube)
            ]
            netcdf_path = get_diagnostic_filename(base_name_mod, diag_config)
            save_cubes = iris.cube.CubeList(model_cubes + obs_cubes)
            iris.save(save_cubes, netcdf_path)

        else:
            netcdf_path = None

        with ProvenanceLogger(diag_config) as provenance_logger:
            provenance_logger.log(netcdf_path, provenance_record_mod)

    if diag_config['write_plots']:
        # multimodel agreement
        base_name_multimodel = '{prefix}_{base_name}'.format(
            prefix='global_multimodelAgreement', base_name=base_name)
        plot_path_multimodel = get_plot_filename(base_name_multimodel,
                                                 diag_config)
        _plot_multimodel_agreement(plot_path_multimodel, global_tau_mod,
                                   global_tau_obs, config)
        with ProvenanceLogger(diag_config) as provenance_logger:
            provenance_logger.log(plot_path_multimodel,
                                  provenance_record_multimodel)

        # map of observation
        base_name_obs = '{prefix}_{base_name}'.format(prefix='global',
                                                      base_name=base_name)
        plot_path_obs = get_plot_filename(base_name_obs, diag_config)
        provenance_record_obs = _get_provenance_record(
            "Map of observed global distribution of turnover time of carbon",
            ['mean', 'perc'], ['global'],
            global_tau_obs['input_files'].tolist())

        _plot_single_map(plot_path_obs, global_tau_obs['grid']['tau_ctotal'],
                         global_tau_obs['global']['tau_ctotal'],
                         config['obs_info']['source_label'],
                         provenance_record_obs, diag_config)

        # matrix of maps
        base_name_matrix = '{prefix}_{base_name}'.format(
            prefix='global_matrix_map', base_name=base_name)
        plot_path_matrix = get_plot_filename(base_name_matrix, diag_config)
        _plot_matrix_map(plot_path_matrix, global_tau_mod, global_tau_obs,
                         config)

        with ProvenanceLogger(diag_config) as provenance_logger:
            provenance_logger.log(plot_path_matrix, provenance_record_matrix)
Esempio n. 27
0
def main(cfg):
    """Run the diagnostic."""
    ###########################################################################
    # Read recipe data
    ###########################################################################

    # Dataset data containers
    data = e.Datasets(cfg)
    logging.debug("Found datasets in recipe:\n%s", data)

    # Variables
    var = e.Variables(cfg)
    logging.debug("Found variables in recipe:\n%s", var)

    # Check for tas and rlnst
    if not var.vars_available('pr', 'ua', 'va', 'ts'):
        raise ValueError("This diagnostic needs 'pr', 'ua', " +
                         " 'va', and 'ts'")

    available_exp = list(group_metadata(cfg['input_data'].values(), 'exp'))

    if 'historical' not in available_exp:
        raise ValueError("The diagnostic needs an historical experiment " +
                         " and one other experiment.")

    if len(available_exp) != 2:
        raise ValueError("The diagnostic needs an two model experiments: " +
                         " onehistorical and one other one.")

    available_exp.remove('historical')
    future_exp = available_exp[0]
    ###########################################################################
    # Read data
    ###########################################################################

    # Create iris cube for each dataset and save annual means
    for dataset_path in data:
        cube = iris.load(dataset_path)[0]
        cat.add_month_number(cube, 'time', name='month_number')
        # MJJAS mean (monsoon season)
        cube = cube[np.where(
            np.absolute(cube.coord('month_number').points - 7) <= 2)]
        cube = cube.collapsed('time', iris.analysis.MEAN)

        short_name = data.get_info(n.SHORT_NAME, dataset_path)
        if short_name == 'pr':
            # convert from kg m-2 s-1 to mm d-1
            # cube.convert_units('mm d-1') doesn't work.
            cube.data = cube.data * (60.0 * 60.0 * 24.0)
            cube.units = 'mm d-1'
            # Possible because all data must be interpolated to the same grid.
            if 'lats' not in locals():
                lats = cube.coord('latitude').points
                lons = cube.coord('longitude').points

        data.set_data(cube.data, dataset_path)
    ###########################################################################
    # Process data
    ###########################################################################

    data_ar = substract_li(cfg, data, lats, lons, future_exp)

    # data_ar {"datasets": datasets, "ar_diff_rain": ar_diff_rain,
    #          "ar_diff_ua": ar_diff_ua, "ar_diff_va": ar_diff_va,
    #          "ar_hist_rain": ar_hist_rain, "mism_diff_rain": mism_diff_rain,
    #          "mwp_hist_rain": mwp_hist_rain}

    plot_rain_and_wind(cfg, 'Multi-model_mean',
                       {'ar_diff_rain': data_ar["ar_diff_rain"],
                        'ar_diff_ua': data_ar["ar_diff_ua"],
                        'ar_diff_va': data_ar["ar_diff_va"],
                        'lats': lats, 'lons': lons}, future_exp)

    # Regression between mean ISM rain difference and historical rain
    reg2d = get_reg_2d_li(data_ar["mism_diff_rain"], data_ar["ar_hist_rain"],
                          lats, lons)

    plot_2dcorrelation_li(cfg, reg2d, lats, lons)

    plot_reg_li(cfg, data_ar, future_exp)

    # Regression between mean WP rain and rain difference for each location
    reg2d_wp = get_reg_2d_li(data_ar["mwp_hist_rain"], data_ar["ar_diff_rain"],
                             lats, lons)

    data_ar2 = correct_li(data_ar, lats, lons, reg2d_wp)
    # return {"datasets": data["datasets"], "ar_diff_cor": ar_diff_cor,
    #         "proj_err": proj_err, "mism_diff_cor": mism_diff_cor,
    #         "mism_hist_rain": mism_hist_rain, "mwp_hist_cor": mwp_hist_cor}

    plot_reg_li2(cfg, data_ar["datasets"], data_ar["mism_diff_rain"],
                 data_ar2["mism_diff_cor"], data_ar2["mism_hist_rain"])

    plot_rain(cfg, 'Multi-model mean rainfall change due to model error',
              np.mean(data_ar2["proj_err"], axis=2), lats, lons)
    plot_rain(cfg, 'Corrected multi-model mean rainfall change',
              np.mean(data_ar2["ar_diff_cor"], axis=2), lats, lons)
def main(diag_config):
    """
    Diagnostic to evaluate zonal correlation between turnover time and climate.

    Argument:
    --------
        diag_config - nested dictionary of metadata
    """
    model_data_dict = group_metadata(diag_config['input_data'].values(),
                                     'dataset')
    fig_config = _get_fig_config(diag_config)
    zonal_correlation_mod = {}
    for model_name, model_dataset in model_data_dict.items():
        zonal_correlation_mod[model_name] = {}
        mod_coords = {}
        ctotal = _load_variable(model_dataset, 'ctotal')
        gpp = _load_variable(model_dataset, 'gpp')
        precip = _load_variable(model_dataset, 'pr')
        tas = _load_variable(model_dataset, 'tas')
        tau_ctotal = (ctotal / gpp)
        tau_ctotal.convert_units('yr')
        # set the attributes
        tau_ctotal.var_name = 'tau_ctotal'
        for coord in gpp.coords():
            mod_coords[coord.name()] = coord

        _tau_dat = _remove_invalid(tau_ctotal.data, fill_value=np.nan)
        _precip_dat = _remove_invalid(precip.data, fill_value=np.nan)
        _tas_dat = _remove_invalid(tas.data, fill_value=np.nan)
        zon_corr = _calc_zonal_correlation(_tau_dat, _precip_dat, _tas_dat,
                                           mod_coords['latitude'].points,
                                           fig_config)
        zonal_correlation_mod[model_name]['data'] = zon_corr
        zonal_correlation_mod[model_name]['latitude'] = mod_coords['latitude']
    zonal_correlation_obs = _get_obs_data_zonal(diag_config)

    base_name = '{title}_{corr}_{source_label}_{grid_label}z'.format(
        title='r_tau_ctotal_climate',
        corr=fig_config['correlation_method'],
        source_label=diag_config['obs_info']['source_label'],
        grid_label=diag_config['obs_info']['grid_label'])

    provenance_record = _get_provenance_record(
        "Comparison of latitudinal (zonal) variations of pearson"
        " correlation between turnover time and climate: turnover"
        " time and precipitation, controlled for temperature"
        " (left) and vice-versa (right). Reproduces figures 2c"
        " and 2d in Carvalhais et al. (2014).", ['corr', 'perc'], ['zonal'],
        _get_ancestor_files(diag_config, 'tau_ctotal'))

    if diag_config['write_netcdf']:
        model_cubes = [
            c for c in zonal_correlation_mod.values()
            if isinstance(c, iris.cube.Cube)
        ]
        obs_cubes = [
            c for c in zonal_correlation_obs.values()
            if isinstance(c, iris.cube.Cube)
        ]
        netcdf_path = get_diagnostic_filename(base_name, diag_config)
        save_cubes = iris.cube.CubeList(model_cubes + obs_cubes)
        iris.save(save_cubes, netcdf_path)

        with ProvenanceLogger(diag_config) as provenance_logger:
            provenance_logger.log(netcdf_path, provenance_record)

    if diag_config['write_plots']:
        plot_path = get_plot_filename(base_name, diag_config)
        _plot_zonal_correlation(plot_path, zonal_correlation_mod,
                                zonal_correlation_obs, diag_config)
        provenance_record['plot_file'] = plot_path

        with ProvenanceLogger(diag_config) as provenance_logger:
            provenance_logger.log(plot_path, provenance_record)
def main(cfg):
    # The config object is a dict of all the metadata from the pre-processor

    # get variable processed
    var = get_var(cfg)

    if var == "pr":
        rel_change = True
    else:
        rel_change = False

    # establish the time periods of our datasets
    start_years = list(group_metadata(cfg["input_data"].values(),
                                      "start_year"))
    base_start = min(start_years)
    fut_start = max(start_years)

    # first group datasets by project..
    # this creates a dict of datasets keyed by project (CMIP5, CMIP6 etc.)
    projects = group_metadata(cfg["input_data"].values(), "project")
    # how to uniquely define a dataset varies by project, for CMIP it's simple, just dataset...
    # for CORDEX, combo of dataset and driver (and possibly also domain if we start adding those)
    # also gets more complex if we start adding in different ensembles..

    # This section of the code loads and organises the data to be ready for plotting
    logger.info("Loading data")
    # empty dict to store results
    projections = {}
    model_lists = {}
    cordex_drivers = []
    # loop over projects
    for proj in projects:
        # we now have a list of all the data entries..
        # for CMIPs we can just group metadata again by dataset then work with that..
        models = group_metadata(projects[proj], "dataset")

        # empty dict for results
        projections[proj] = {}
        # loop over the models
        for m in models:
            if proj[:6].upper() == "CORDEX":
                # then we need to go one deeper in the dictionary to deal with driving models
                drivers = group_metadata(models[m], "driver")
                projections[proj][m] = dict.fromkeys(drivers.keys())
                for d in drivers:
                    logging.info(f"Calculating anomalies for {proj} {m} {d}")
                    anoms = get_anomalies(drivers[d], base_start, fut_start,
                                          rel_change)
                    if anoms is None:
                        continue
                    projections[proj][m][d] = anoms
                    if proj not in model_lists:
                        model_lists[proj] = []
                    model_lists[proj].append(f"{m} {d}")
                    cordex_drivers.append(d)
            elif proj == "UKCP18":
                # go deeper to deal with ensembles and datasets
                # split UKCP into seperate GCM and RCM
                proj_key = f"UKCP18 {m}"
                ensembles = group_metadata(models[m], "ensemble")
                projections[proj_key] = dict.fromkeys(ensembles.keys())
                for ens in ensembles:
                    logging.info(f"Calculating anomalies for {proj_key} {ens}")
                    anoms = get_anomalies(ensembles[ens], base_start,
                                          fut_start, rel_change)
                    if anoms is None:
                        continue
                    projections[proj_key][ens] = anoms
                    if proj_key not in model_lists:
                        model_lists[proj_key] = []
                    model_lists[proj_key].append(f"{proj_key} {ens}")
            else:
                logging.info(f"Calculating anomalies for {proj} {m}")
                anoms = get_anomalies(models[m], base_start, fut_start,
                                      rel_change)
                if anoms is None:
                    continue
                projections[proj][m] = anoms
                if proj not in model_lists:
                    model_lists[proj] = []
                model_lists[proj].append(f"{m}")
        # remove any empty categories (i.e. UKCP18 which has been split into rcm and gcm)
        if projections[proj] == {}:
            del projections[proj]
    cordex_drivers = set(cordex_drivers)

    # this section of the code does all the plotting..
    plot_boxplots(projections, cordex_drivers)
    simple_dots_plot(projections, cordex_drivers)

    # print all datasets used
    print("Input models for plots:")
    for p in model_lists.keys():
        print(f"{p}: {len(model_lists[p])} models")
        print(model_lists[p])
        print("")
Esempio n. 30
0
    def compute(self):
        print('----------- COMPUTE ----------')
        # ---------------------------------------------------------------------
        # Every dataset in the recipe is associated with an alias. We are going
        # to use th:We alias and the group_metadata shared function to loop over
        # the datasets.
        #----------------------------------------------------------------------
        data = group_metadata(self.cfg['input_data'].values(), 'alias')
        ssp_ts = {}
        hist_ts = {} 
        rean_ts = {}
        hist = 0
        ssp = 0
        rean = 0
        # Loop over the datasets.
        for i, alias in enumerate(data):
            exp = data[alias][0]['exp']
            variables = group_metadata(data[alias], 'short_name')
            # Returns the path to the preprocessed files.
            tas_file = variables['tas'][0]['filename']
            tas = iris.load(tas_file)[0]
            tas.convert_units('degC')
            if i == 0:
                climatology = self.ref_clim(tas, 1960, 1962)
            #anomaly = tas - climatology
            #timeseries = anomaly.collapsed(['longitude', 'latitude'], iris.analysis.MEAN)
            timeseries = tas.collapsed(['longitude', 'latitude'], iris.analysis.MEAN)
            #timeseries.long_name = 'med_r_timeseries_tas'
  
            # Calculate Trends
            nlat = tas.coord('latitude').shape[0]
            nlon = tas.coord('longitude').shape[0]
            lat = tas.coord('latitude').points
            lon = tas.coord('longitude').points
            time_array = np.arange(1,tas.coord('time').shape[0]+1,1)
            regr = np.zeros([nlat, nlon])
            for j in range(nlat):
                for k in range(nlon):
                    p = np.polyfit(time_array, tas[:,j,k].data, 1)
                    regr[j, k] = p[0]*10 # the 10 is to convert to decadal
            latitude = DimCoord(lat, standard_name='latitude', units='degrees')
            longitude = DimCoord(lon, standard_name='longitude', units='degrees')
            regr_cube = Cube(regr, dim_coords_and_dims=[(latitude, 0), (longitude, 1)])
            ### ---------- remask -------------- ###
            # finding the trends turns the remask usseless as pyplot doesn't care about masked arrays
            # Ergo another remask is needed.
            output_trend = mask_landsea(regr_cube, ['/blablabla/where/the/fx/at/'] ,'sea', True)   
            # Save the output trends in the cube dict
            output_trend.standard_name = None 
            output_trend.long_name = 'tas_trend_med'
            output_trend.short_name = 'tastrend'
        
            # Calculate Climatology
            output_clim = climate_statistics(tas)           
            output_clim.standard_name = None
            output_clim.long_name = 'tas_clim_med'
            output_clim.short_name = 'tasclim' 

            # Save diagnosed dataset to dict. TODO: what about averaging first? 
            if exp == 'historical':
                hist_ts[alias] = timeseries 
                if hist == 0:
                   mean_hist_trend = output_trend
                   mean_hist_clim = output_clim
                   hist += 1
                else:
                   mean_hist_trend = (mean_hist_trend + output_trend)
                   mean_hist_clim = (mean_hist_clim + output_clim)
                   hist += 1
            if exp == 'ssp585':
                ssp_ts[alias] = timeseries 
                if ssp == 0:
                   mean_ssp_trend = output_trend
                   mean_ssp_clim = output_clim
                   ssp += 1
                else:
                   mean_ssp_trend = (mean_ssp_trend + output_trend)
                   mean_ssp_clim = (mean_ssp_clim + output_clim)
                   ssp += 1
            if exp == 'reanaly':
                rean_ts[alias] = timeseries 
                if rean == 0:
                   mean_rean_trend = output_trend
                   mean_rean_clim = output_clim
                   rean += 1
                else:
                   mean_rean_trend = (mean_rean_trend + output_trend)
                   mean_rean_clim = (mean_rean_clim + output_clim)
                   rean += 1
            
        mean_hist_trend = mean_hist_trend/hist
        mean_hist_clim = mean_hist_clim/hist
        mean_ssp_trend = mean_ssp_trend/ssp
        mean_ssp_clim = mean_ssp_clim/ssp
        #mean_rean_trend = mean_rean_trend/ssp
        #mean_rean_clim = mean_rean_clim/ssp
        mean_ssp_trend.long_name = 'ssp_trend_Med'
        mean_ssp_clim.long_name = 'ssp_clim_Med'
        mean_hist_trend.long_name = 'hist_trend_Med'
        mean_hist_clim.long_name = 'hist_clim_Med'
       # mean_rean_trend.long_name = 'rean_trend_Med'
       # mean_rean_clim.long_name = 'rean_clim_Med'
                
        ##### Biases #####
        #trend_bias = mean_hist_trend - mean_rean_trend
        #clim_bias = mean_hist_clim - mean_rean_clim


            # Save the outputs for each dataset.
            #self.save(output, alias, data)
        # Plot the results.
        self.plot_2D(mean_ssp_trend)
        self.plot_1D(timeseries)   
        print(mean_ssp_trend)