def _create_regression_file(tas_cube,
                            cube,
                            dataset_name,
                            cfg,
                            description=None):
    """Save regression plot as netcdf file for a given dataset."""
    var = cube.var_name
    reg = stats.linregress(tas_cube.data, cube.data)
    filename = f'{var}_regression_{dataset_name}'
    attrs = {
        'dataset': dataset_name,
        'regression_r_value': reg.rvalue,
        'regression_slope': reg.slope,
        'regression_interception': reg.intercept,
        'feedback_parameter': reg.slope,
    }
    attrs.update(cfg.get('output_attributes', {}))
    if description is not None:
        attrs['description'] = description
        filename += f"_{description.replace(' ', '_')}"
    if var in ('rtmt', 'rtnt'):
        attrs['ECS'] = -reg.intercept / (2.0 * reg.slope)
    tas_coord = iris.coords.AuxCoord(
        tas_cube.data,
        **extract_variables(cfg, as_iris=True)['tas'])
    cube = iris.cube.Cube(cube.data,
                          attributes=attrs,
                          aux_coords_and_dims=[(tas_coord, 0)],
                          **extract_variables(cfg, as_iris=True)[var])
    netcdf_path = get_diagnostic_filename(filename, cfg)
    io.iris_save(cube, netcdf_path)
    return netcdf_path
Esempio n. 2
0
def get_var(cfg):
    # get variable processed
    var = list(extract_variables(cfg).keys())
    assert len(var) == 1
    var = var[0]

    return var
Esempio n. 3
0
def write_data(cfg, hist_cubes, pi_cubes, ecs_cube):
    """Write netcdf file."""
    datasets = []
    data_ecs = []
    data_hist = []
    data_pi = []
    for dataset in list(hist_cubes):
        ecs = ecs_cube.extract(iris.Constraint(dataset=dataset))
        if ecs is None:
            raise ValueError(f"No ECS data for '{dataset}' available")
        datasets.append(dataset)
        data_ecs.append(ecs.data)
        data_hist.append(hist_cubes[dataset].data)
        data_pi.append(pi_cubes[dataset].data)

    # Create cube
    dataset_coord = iris.coords.AuxCoord(datasets, long_name='dataset')
    tas_hist_coord = iris.coords.AuxCoord(data_hist,
                                          attributes={'exp': 'historical'},
                                          **extract_variables(
                                              cfg, as_iris=True)['tas'])
    tas_picontrol_coord = iris.coords.AuxCoord(data_pi,
                                               attributes={'exp': 'piControl'},
                                               **extract_variables(
                                                   cfg, as_iris=True)['tas'])
    cube = iris.cube.Cube(data_ecs,
                          var_name='ecs',
                          long_name='Equilibrium Climate Sensitivity (ECS)',
                          aux_coords_and_dims=[(dataset_coord, 0),
                                               (tas_hist_coord, 0),
                                               (tas_picontrol_coord, 0)])

    # Save file
    path = get_diagnostic_filename('ch09_fig09_42a', cfg)
    io.iris_save(cube, path)
    return path
Esempio n. 4
0
def plot_ecs_regression(cfg, dataset_name, tas_cube, rtnt_cube, reg_stats):
    """Plot linear regression used to calculate ECS."""
    if not cfg['write_plots']:
        return (None, None)
    ecs = -reg_stats.intercept / (2 * reg_stats.slope)

    # Regression line
    x_reg = np.linspace(-1.0, 9.0, 2)
    y_reg = reg_stats.slope * x_reg + reg_stats.intercept

    # Plot data
    text = r'r = {:.2f}, $\lambda$ = {:.2f}, F = {:.2f}, ECS = {:.2f}'.format(
        reg_stats.rvalue, -reg_stats.slope, reg_stats.intercept, ecs)
    plot_path = get_plot_filename(dataset_name, cfg)
    plot.scatterplot(
        [tas_cube.data, x_reg],
        [rtnt_cube.data, y_reg],
        plot_path,
        plot_kwargs=[{
            'linestyle': 'none',
            'markeredgecolor': 'b',
            'markerfacecolor': 'none',
            'marker': 's',
        }, {
            'color': 'k',
            'linestyle': '-',
        }],
        save_kwargs={
            'bbox_inches': 'tight',
            'orientation': 'landscape',
        },
        axes_functions={
            'set_title': dataset_name,
            'set_xlabel': 'tas / ' + tas_cube.units.origin,
            'set_ylabel': 'rtnt / ' + rtnt_cube.units.origin,
            'set_xlim': [0.0, 8.0],
            'set_ylim': [-2.0, 10.0],
            'text': {
                'args': [0.05, 0.9, text],
                'kwargs': {
                    'transform': 'transAxes'
                },
            },
        },
    )

    # Write netcdf file for every plot
    tas_coord = iris.coords.AuxCoord(
        tas_cube.data,
        **extract_variables(cfg, as_iris=True)['tas'])
    attrs = {
        'model': dataset_name,
        'regression_r_value': reg_stats.rvalue,
        'regression_slope': reg_stats.slope,
        'regression_interception': reg_stats.intercept,
        'Climate Feedback Parameter': -reg_stats.slope,
        'ECS': ecs,
    }
    cube = iris.cube.Cube(rtnt_cube.data,
                          attributes=attrs,
                          aux_coords_and_dims=[(tas_coord, 0)],
                          **extract_variables(cfg, as_iris=True)['rtnt'])
    netcdf_path = get_diagnostic_filename('ecs_regression_' + dataset_name,
                                          cfg)
    io.iris_save(cube, netcdf_path)

    # Provenance
    provenance_record = get_provenance_record(
        f"Scatterplot between TOA radiance and global mean surface "
        f"temperature anomaly for 150 years of the abrupt 4x CO2 experiment "
        f"including linear regression to calculate ECS for {dataset_name}.")
    provenance_record.update({
        'plot_file': plot_path,
        'plot_types': ['scatter'],
    })

    return (netcdf_path, provenance_record)
def main(cfg):
    # The config object is a dict of all the metadata from the pre-processor

    # get variable processed
    var = list(extract_variables(cfg).keys())
    assert len(var) == 1
    var = var[0]

    if var == "pr":
        rel_change = True
    else:
        rel_change = False

    # establish the time periods of our datasets
    start_years = list(group_metadata(cfg["input_data"].values(),
                                      "start_year"))
    base_start = min(start_years)
    fut_start = max(start_years)

    # first group datasets by project..
    # this creates a dict of datasets keyed by project (CMIP5, CMIP6 etc.)
    projects = group_metadata(cfg["input_data"].values(), "project")
    # how to uniquely define a dataset varies by project, for CMIP it's simple, just dataset...
    # for CORDEX, combo of dataset and driver (and possibly also domain if we start adding those)
    # also gets more complex if we start adding in different ensembles..

    # This section of the code loads and organises the data to be ready for plotting
    logger.info("Loading data")
    # empty dict to store results
    projections = {}
    model_lists = {}
    cordex_drivers = []
    # loop over projects
    for proj in projects:
        # we now have a list of all the data entries..
        # for CMIPs we can just group metadata again by dataset then work with that..
        models = group_metadata(projects[proj], "dataset")

        # empty dict for results
        projections[proj] = {}
        # loop over the models
        for m in models:
            if proj[:6].upper() == "CORDEX":
                # then we need to go one deeper in the dictionary to deal with driving models
                drivers = group_metadata(models[m], "driver")
                projections[proj][m] = dict.fromkeys(drivers.keys())
                for d in drivers:
                    logging.info(f"Calculating anomalies for {proj} {m} {d}")
                    anoms = get_anomalies(drivers[d], base_start, fut_start,
                                          rel_change)
                    if anoms is None:
                        continue
                    projections[proj][m][d] = anoms
                    if proj not in model_lists:
                        model_lists[proj] = []
                    model_lists[proj].append(f"{m} {d}")
                    cordex_drivers.append(d)
            elif proj == "UKCP18":
                # go deeper to deal with ensembles and datasets
                # split UKCP into seperate GCM and RCM
                proj_key = f"UKCP18 {m}"
                ensembles = group_metadata(models[m], "ensemble")
                projections[proj_key] = dict.fromkeys(ensembles.keys())
                for ens in ensembles:
                    logging.info(f"Calculating anomalies for {proj_key} {ens}")
                    anoms = get_anomalies(ensembles[ens], base_start,
                                          fut_start, rel_change)
                    if anoms is None:
                        continue
                    projections[proj_key][ens] = anoms
                    if proj_key not in model_lists:
                        model_lists[proj_key] = []
                    model_lists[proj_key].append(f"{proj_key} {ens}")
            else:
                logging.info(f"Calculating anomalies for {proj} {m}")
                anoms = get_anomalies(models[m], base_start, fut_start,
                                      rel_change)
                if anoms is None:
                    continue
                projections[proj][m] = anoms
                if proj not in model_lists:
                    model_lists[proj] = []
                model_lists[proj].append(f"{m}")
        # remove any empty categories (i.e. UKCP18 which has been split into rcm and gcm)
        if projections[proj] == {}:
            del projections[proj]
    cordex_drivers = set(cordex_drivers)

    # this section of the code does the plotting..
    # we now have all the projections in the projections dictionary

    # now lets plot them
    # first we need to process the dictionary, and move the data into a list of vectors
    # the projections object is the key one that contains all our data..
    seasons = {0: "DJF", 1: "MAM", 2: "JJA", 3: "OND"}
    logger.info("Plotting")
    extent = (
        cfg["domain"]["start_longitude"] - 2,
        cfg["domain"]["end_longitude"] + 2,
        cfg["domain"]["start_latitude"] - 2,
        cfg["domain"]["end_latitude"] + 2,
    )
    for s in seasons.keys():
        # make directory
        try:
            os.mkdir(f"{cfg['plot_dir']}/{seasons[s]}")
        except FileExistsError:
            pass
        for p in projections:
            pdata = process_projections_dict(projections[p], s)

            for m in pdata:
                title = f"{p} {m} {seasons[s]} {var} change"
                plt.figure(figsize=(12.8, 9.6))
                ax = plt.axes(projection=ccrs.PlateCarree())
                ax.set_extent(extent)
                # set scales
                if var == "pr":
                    vmn = -50
                    vmx = 50
                    cmap = "brewer_RdYlBu_11"
                else:
                    vmn = 0
                    vmx = 5
                    cmap = "brewer_YlOrRd_09"
                qplt.pcolormesh(pdata[m], vmin=vmn, vmax=vmx, cmap=cmap)
                plt.title(title)
                ax.coastlines()
                ax.add_feature(cartopy.feature.BORDERS, linestyle=":")
                plt.savefig(
                    f"{cfg['plot_dir']}/{seasons[s]}/{p}_{m}_map_{seasons[s]}.png"
                )
                plt.close()

    # print all datasets used
    print("Input models for plots:")
    for p in model_lists.keys():
        print(f"{p}: {len(model_lists[p])} models")
        print(model_lists[p])
        print("")
Esempio n. 6
0
def test_extract_variables(as_iris):

    cfg = {
        'input_data': {
            'file1.nc': {
                'short_name': 'ta',
                'standard_name': 'air_temperature',
                'long_name': 'Air Temperature',
                'units': 'K',
            },
            'file2.nc': {
                'short_name': 'ta',
                'standard_name': 'air_temperature',
                'long_name': 'Air Temperature',
            },
            'file3.nc': {
                'short_name': 'pr',
                'standard_name': 'precipitation_flux',
                'long_name': 'Precipitation',
                'extra_attribute': 1,
            },
            'file4.nc': {
                'short_name': 'toz',
                'standard_name': '',
                'long_name': 'Total Ozone Column',
            },
        }
    }

    if as_iris:
        expected = {
            'ta': {
                'var_name': 'ta',
                'standard_name': 'air_temperature',
                'long_name': 'Air Temperature',
                'units': 'K',
            },
            'pr': {
                'var_name': 'pr',
                'standard_name': 'precipitation_flux',
                'long_name': 'Precipitation',
            },
            'toz': {
                'var_name': 'toz',
                'standard_name': None,
                'long_name': 'Total Ozone Column',
            },
        }
    else:
        expected = {
            'ta': {
                'short_name': 'ta',
                'standard_name': 'air_temperature',
                'long_name': 'Air Temperature',
                'units': 'K',
            },
            'pr': {
                'short_name': 'pr',
                'standard_name': 'precipitation_flux',
                'long_name': 'Precipitation',
            },
            'toz': {
                'short_name': 'toz',
                'standard_name': '',
                'long_name': 'Total Ozone Column',
            },
        }

    result = shared.extract_variables(cfg, as_iris)

    assert result == expected
def main(cfg):
    # The config object is a dict of all the metadata from the pre-processor

    # get variable processed
    var = list(extract_variables(cfg).keys())
    assert len(var) == 1
    var = var[0]

    if var == "pr":
        rel_change = True
    else:
        rel_change = False

    # first group datasets by project..
    # this creates a dict of datasets keyed by project (CMIP5, CMIP6 etc.)
    projects = group_metadata(cfg["input_data"].values(), "project")
    # how to uniquely define a dataset varies by project, for CMIP it's simple, just dataset...
    # for CORDEX, combo of dataset and driver (and possibly also domain if we start adding those)
    # also gets more complex if we start adding in different ensembles..

    # This section of the code loads and organises the data to be ready for plotting
    logger.info("Loading data")
    # empty dict to store results
    projections = {}
    model_lists = {}
    cordex_drivers = []
    # loop over projects
    for proj in projects:
        # we now have a list of all the data entries..
        # for CMIPs we can just group metadata again by dataset then work with that..
        models = group_metadata(projects[proj], "dataset")

        # empty dict for results
        if proj == 'non-cordex-rcm':
            proj = 'CORDEX'

        if proj == 'non-cmip5-gcm':
            proj = 'CMIP5'

        if proj not in projections.keys():
            projections[proj] = {}

        proj_key = proj
        # loop over the models
        for m in models:
            if proj == "CORDEX":
                # then we need to go one deeper in the dictionary to deal with driving models
                drivers = group_metadata(models[m], "driver")
                projections[proj][m] = dict.fromkeys(drivers.keys())
                for d in drivers:
                    logging.info(f"Calculating anomalies for {proj} {m} {d}")
                    anoms = get_anomalies(drivers[d], rel_change)
                    if anoms is None:
                        continue
                    projections[proj][m][d] = anoms
                    if proj not in model_lists:
                        model_lists[proj] = []
                    model_lists[proj].append(f"{m} {d}")

                    # fix shorthand driver names
                    if d == 'HadGEM':
                        d = 'MOHC-HadGEM2-ES'
                    elif d == 'MPI':
                        d = 'MPI-M-MPI-ESM-LR'

                    if proj == "CORDEX":
                        cordex_drivers.append(d)
            elif proj == "UKCP18":
                # go deeper to deal with ensembles and datasets
                # split UKCP into seperate GCM and RCM
                proj_key = f"UKCP18 {m}"
                ensembles = group_metadata(models[m], "ensemble")
                projections[proj_key] = dict.fromkeys(ensembles.keys())
                for ens in ensembles:
                    logging.info(f"Calculating anomalies for {proj_key} {ens}")
                    anoms = get_anomalies(ensembles[ens], rel_change)
                    if anoms is None:
                        continue
                    projections[proj_key][ens] = anoms
                    if proj_key not in model_lists:
                        model_lists[proj_key] = []
                    model_lists[proj_key].append(f"{proj_key} {ens}")
            elif "cordex-cpm" in proj:
                # in this case need to split by domain as same model spec
                # is used in multiple domains in some cases
                domains = group_metadata(models[m], "domain")
                proj_key = "cordex-cpm"
                projections[proj_key][m] = dict.fromkeys(domains.keys())
                for dom in domains:
                    logging.info(
                        f"calculating anomalies for  {proj_key} {dom} {m}")
                    anoms = get_anomalies(domains[dom], rel_change)
                    projections[proj_key][m][dom] = anoms
                if proj_key not in model_lists:
                    model_lists[proj_key] = []
                model_lists[proj_key].append(f"{dom} {m}")
            else:
                logging.info(f"Calculating anomalies for {proj} {m}")
                anoms = get_anomalies(models[m], rel_change)
                if anoms is None:
                    continue
                projections[proj][m] = anoms
                if proj not in model_lists:
                    model_lists[proj] = []
                model_lists[proj].append(f"{m}")
        # remove any empty categories (i.e. UKCP18 which has been split into rcm and gcm)
        if projections[proj] == {}:
            del projections[proj]

    cordex_drivers = set(cordex_drivers)

    # create two extra subsets containing CORDEX drivers, and CPM drivers
    projections['CORDEX_drivers'] = {}
    cmip5_driving_models = []
    for m in cordex_drivers:
        cmip5_driving_models.append(remove_institute_from_driver(m))

    for m in projections['CMIP5']:
        if m in cmip5_driving_models:
            projections['CORDEX_drivers'][m] = projections['CMIP5'][m]

    projections['CPM_drivers'] = {}
    for rcm in projections['CORDEX']:
        for d in projections['CORDEX'][rcm]:
            if f'{rcm} {d}' in list(CPM_DRIVERS.values()):
                projections['CPM_drivers'][f'{rcm} {d}'] = projections[
                    'CORDEX'][rcm][d]

    # compute multi model means
    for p in projections:
        mm_mean = compute_multi_model_stats(
            list(NestedDictValues(projections[p])), iris.analysis.MEAN)
        projections[p]['mean'] = mm_mean

    # compute regridded versions for CORDEX and CPMs
    for p in projections:
        grid = None
        if p == 'CORDEX':
            grid = projections['CORDEX_drivers']['mean']
            scheme = 'area_weighted'
        elif p == 'cordex-cpm':
            grid = projections['CPM_drivers']['mean']
            scheme = 'area_weighted'

        if grid:
            src = projections[p]['mean']
            regrid_mean = regrid(src, grid, scheme)
            projections[p]['mean_rg'] = regrid_mean

    # compute regrid diffs
    for p in projections:
        if p == 'CORDEX':
            diff = projections[p]['mean_rg'] - projections['CORDEX_drivers'][
                'mean']
            projections[p]['diff_rg'] = diff
        elif p == 'cordex-cpm':
            diff = projections[p]['mean_rg'] - projections['CPM_drivers'][
                'mean']
            projections[p]['diff_rg'] = diff

    # this section of the code does the plotting..
    # we now have all the projections in the projections dictionary

    # now lets plot them
    # first we need to process the dictionary, and move the data into a list of vectors
    # the projections object is the key one that contains all our data..
    seasons = {0: "DJF", 1: "MAM", 2: "JJA", 3: "SON"}
    logger.info("Plotting")
    extent = (
        cfg["domain"]["start_longitude"] - 2,
        cfg["domain"]["end_longitude"] + 2,
        cfg["domain"]["start_latitude"] - 2,
        cfg["domain"]["end_latitude"] + 2,
    )
    for s in seasons.keys():
        # make directory
        try:
            os.mkdir(f"{cfg['plot_dir']}/{seasons[s]}")
        except FileExistsError:
            pass
        for p in projections:
            pdata = process_projections_dict(projections[p], s)

            for m in pdata:
                # dont plot driving model data twice.
                if '_drivers' in p:
                    if m != 'mean':
                        continue

                title = f"{p} {m} {seasons[s]} {var} change"
                plt.figure(figsize=(12.8, 9.6))
                ax = plt.axes(projection=ccrs.PlateCarree())
                plot_map(pdata[m], extent, var, ax, True)
                plt.title(title)
                logging.info(f'Saving plot for {p} {m} {s}')
                plt.savefig(
                    f"{cfg['plot_dir']}/{seasons[s]}/{p}_{m}_map_{seasons[s]}.png"
                )
                plt.close()

                # save calculated anomaly data, in case we want to work with it later
                # make directory
                try:
                    os.mkdir(f"{cfg['work_dir']}/{seasons[s]}")
                except FileExistsError:
                    pass

                iris.save(
                    pdata[m],
                    f"{cfg['work_dir']}/{seasons[s]}/{p}_{m}_anom_{seasons[s]}.nc"
                )

        # now make panel plots for the mean data
        # only if we have CPM data though
        if 'cordex-cpm' in projections:
            scon = iris.Constraint(season_number=s)
            logging.info(f'Making {seasons[s]} panel plot')
            plt.figure(figsize=(12.8, 9.6))
            # plots should include. All CMIP5, CORDEX drivers, CORDEX, CPM drivers, CPM.
            ax = plt.subplot(331, projection=ccrs.PlateCarree())
            cmesh = plot_map(projections['CMIP5']['mean'].extract(scon),
                             extent, var, ax)
            plt.title('CMIP5')

            ax = plt.subplot(334, projection=ccrs.PlateCarree())
            plot_map(projections['CORDEX_drivers']['mean'].extract(scon),
                     extent, var, ax)
            plt.title('CORDEX driving models')

            ax = plt.subplot(335, projection=ccrs.PlateCarree())
            plot_map(projections['CORDEX']['mean'].extract(scon), extent, var,
                     ax)
            plt.title('CORDEX')

            # plot diff of CORDEX to CMIP
            ax = plt.subplot(336, projection=ccrs.PlateCarree())
            cmesh_diff = plot_map(
                projections['CORDEX']['diff_rg'].extract(scon), extent,
                f'{var}_diff', ax)
            plt.title('CORDEX - CMIP5 diff')

            ax = plt.subplot(337, projection=ccrs.PlateCarree())
            plot_map(projections['CPM_drivers']['mean'].extract(scon), extent,
                     var, ax)
            plt.title('CPM driving models')

            ax = plt.subplot(338, projection=ccrs.PlateCarree())
            plot_map(projections['cordex-cpm']['mean'].extract(scon), extent,
                     var, ax)
            plt.title('CPM')

            # plot diff of CPM to CORDEX
            ax = plt.subplot(339, projection=ccrs.PlateCarree())
            plot_map(projections['cordex-cpm']['diff_rg'].extract(scon),
                     extent, f'{var}_diff', ax)
            plt.title('CPM - CORDEX diff')

            # add legends
            ax = plt.subplot(332)
            ax.axis("off")
            plt.colorbar(cmesh, orientation="horizontal")

            ax = plt.subplot(333)
            ax.axis("off")
            plt.colorbar(cmesh_diff, orientation="horizontal")

            plt.suptitle(f'{seasons[s]} {var} change')
            plt.savefig(
                f"{cfg['plot_dir']}/{seasons[s]}/all_means_map_{seasons[s]}.png"
            )

    # print all datasets used
    print("Input models for plots:")
    for p in model_lists.keys():
        print(f"{p}: {len(model_lists[p])} models")
        print(model_lists[p])
        print("")