Example #1
0
def calib_land_TRENDYv7(mod_region,
                        biome_specific_process=True,
                        path_in='calib_data/',
                        path_out='input_data/parameters/',
                        **useless):

    ## load original data
    with xr.open_dataset(path_in + 'land_TRENDYv7.nc') as TMP:
        ds = TMP.sel(sim=['S0', 'S4']).sel(weight='area3', drop=True).load()

    ## aggregate over regions
    ds = aggreg_region(ds, mod_region)

    ## separate natural and anthropogenic biomes
    ds_nat = ds.sel(bio_land=['Forest', 'Non-Forest']).sel(
        sim='S0', drop=True).mean('year')
    ds_ant = ds.sel(bio_land=['Cropland', 'Pasture', 'Urban']).sel(
        sim='S4', drop=True).sel(year=slice(1990, 2010)).mean('year')
    ds2 = xr.merge([ds_nat, ds_ant]).sel(bio_land=['Forest', 'Non-Forest'] +
                                         ['Cropland', 'Pasture', 'Urban'])

    ## test existing variables per model
    exist = xr.Dataset()
    for var in [
            'area', 'npp', 'cVeg', 'cLitter', 'cSoil', 'fFire', 'fHarvest',
            'fGrazing', 'fVegLitter', 'fVegSoil', 'fLitterSoil', 'rh', 'fDOC',
            'cRoot', 'cWood'
    ]:
        exist[var] = ds2[var].sum('reg_land', min_count=1).sum(
            'bio_land', min_count=1).notnull() & (ds2[var].sum(
                'reg_land', min_count=1).sum('bio_land', min_count=1) != 0)

    ## test critical variables
    for var in ['npp', 'cVeg', 'cSoil']:
        if exist[var].sum() < len(exist.model):
            raise RuntimeError(
                "'{0}' must be defined for all models!".format(var))

    ## tests whether 3- or 2-box model
    is_3box = exist.cLitter & exist.fVegLitter & exist.fLitterSoil

    ## calculate Fsoil2, Fmort and Rh depending on existing data
    fSoilIn = (ds2.fVegSoil.fillna(0) +
               ds2.fLitterSoil.fillna(0)).where(ds2.fVegSoil.notnull()
                                                | ds2.fLitterSoil.notnull())
    fMort = (ds2.fVegLitter.fillna(0) +
             ds2.fVegSoil.fillna(0)).where(ds2.fVegLitter.notnull()
                                           | ds2.fVegSoil.notnull())
    fMort = fMort.where(
        exist.fVegLitter | exist.fVegSoil, ds2.npp - ds2.fFire.fillna(0) -
        ds2.fHarvest.fillna(0) - ds2.fGrazing.fillna(0))
    Rh = ds2.rh.where(
        exist.rh, ds2.npp - ds2.fFire.fillna(0) - ds2.fHarvest.fillna(0) -
        ds2.fGrazing.fillna(0) - ds2.fDOC.fillna(0))
    cSoilTot = (ds2.cLitter.fillna(0) +
                ds2.cSoil.fillna(0)).where(ds2.cLitter.notnull()
                                           | ds2.cSoil.notnull())

    ## initialisation of final array
    Par = xr.Dataset()

    ## areal net primary productivity
    Par['npp_0'] = ds2.npp / ds2.area

    ## wildfire emission rate
    Par['igni_0'] = (ds2.fFire / ds2.cVeg).where(exist.fFire)

    ## harvest index
    Par['harv_0'] = (ds2.fHarvest / ds2.cVeg).where(exist.fHarvest)

    ## grazing rate
    Par['graz_0'] = (ds2.fGrazing / ds2.cVeg).where(exist.fGrazing)

    ## mortality rates
    Par['mu1_0'] = (ds2.fVegLitter / ds2.cVeg).where(is_3box, 0)
    Par['mu2_0'] = (ds2.fVegSoil.where(exist.fVegSoil, 0) / ds2.cVeg).where(
        is_3box, 0) + (fMort / ds2.cVeg).where(~is_3box, 0)

    ## metabolization rate
    Par['muM_0'] = (ds2.fLitterSoil / ds2.cLitter).where(is_3box, 0)

    ## respiration rates
    Par['rho1_0'] = ((ds2.fVegLitter - ds2.fLitterSoil) / ds2.cLitter).where(
        is_3box, 0)
    Par['rho2_0'] = (fSoilIn / ds2.cSoil).where(
        is_3box, 0) + (Rh / cSoilTot).where(~is_3box, 0)

    ## above-ground biomass fraction
    Par['p_agb'] = 1 - ds2.cRoot / ds2.cVeg

    ## additional processing (some conditional)
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore')

        ## fill missing anthropogenic biomes:
        ## using Non-Forest parameters (except Urban NPP set to zero)
        not_fill = Par.npp_0.notnull()
        for var in Par:
            Par[var] = Par[var].where(
                not_fill, Par[var].sel(bio_land='Non-Forest', drop=True))
        Par['npp_0'] = Par.npp_0.where((Par.bio_land != 'Urban') | not_fill, 0)

        ## fill 'Unknown' region if needed:
        ## using average of other regions
        if 'Unknown' in Par.reg_land_long_name:
            for var in Par:
                Par[var] = Par[var].where(
                    Par[var].notnull(), Par[var].where(
                        Par.reg_land_long_name != 'Unknown').mean('reg_land'))

        ## assume biome-specific processes:
        ## no wildfire on Urban, harvest only on Cropland, grazing only on Pasture
        if biome_specific_process:
            Par['igni_0'] = Par.igni_0.where(Par.bio_land != 'Urban',
                                             0).where(exist.fFire)
            Par['harv_0'] = Par.harv_0.where(Par.bio_land == 'Cropland',
                                             0).where(exist.fHarvest)
            Par['graz_0'] = Par.graz_0.where(Par.bio_land == 'Pasture',
                                             0).where(exist.fGrazing)

    ## add units
    Par['npp_0'].attrs['units'] = 'PgC yr-1 Mha-1'
    Par['p_agb'].attrs['units'] = '1'
    for var in [
            'igni_0', 'harv_0', 'graz_0', 'mu1_0', 'mu2_0', 'muM_0', 'rho1_0',
            'rho2_0'
    ]:
        Par[var].attrs['units'] = 'yr-1'

    ## create model option axes
    Par['igni_0'] = Par.igni_0.dropna('model', how='all').rename(
        {'model': 'mod_Efire_preind'})
    Par['harv_0'] = Par.harv_0.dropna('model', how='all').rename(
        {'model': 'mod_Eharv_preind'})
    Par['graz_0'] = Par.graz_0.dropna('model', how='all').rename(
        {'model': 'mod_Egraz_preind'})
    Par['p_agb'] = Par.p_agb.dropna('model', how='all').rename(
        {'model': 'mod_Eluc_agb'})
    Par = Par.rename({'model': 'mod_Fland_preind'})

    ## create multi-model mean values
    try:
        mmm = xr.Dataset()
        for mod in [
                'mod_Fland_preind', 'mod_Efire_preind', 'mod_Eharv_preind',
                'mod_Egraz_preind', 'mod_Eluc_agb'
        ]:
            mmm = xr.merge([
                mmm,
                xr.Dataset({
                    var: Par[var].mean(mod)
                    for var in Par if mod in Par[var].dims
                }).assign_coords({
                    mod: 'mean_TRENDYv7'
                }).expand_dims(mod, -1)
            ])
    except:
        mmm = xr.Dataset()
        mmm = xr.merge([
            mmm,
            xr.Dataset({
                var: Par[var].mean('mod_Fland_preind')
                for var in Par if 'mod_Fland_preind' in Par[var].dims
            }).assign_coords(mod_Fland_preind='mean_TRENDYv7').expand_dims(
                'mod_Fland_preind', -1)
        ])
        mmm = xr.merge([
            mmm,
            xr.Dataset({
                var: Par[var].mean('mod_Efire_preind')
                for var in Par if 'mod_Efire_preind' in Par[var].dims
            }).assign_coords(mod_Efire_preind='mean_TRENDYv7').expand_dims(
                'mod_Efire_preind', -1)
        ])
        mmm = xr.merge([
            mmm,
            xr.Dataset({
                var: Par[var].mean('mod_Eharv_preind')
                for var in Par if 'mod_Eharv_preind' in Par[var].dims
            }).assign_coords(mod_Eharv_preind='mean_TRENDYv7').expand_dims(
                'mod_Eharv_preind', -1)
        ])
        mmm = xr.merge([
            mmm,
            xr.Dataset({
                var: Par[var].mean('mod_Egraz_preind')
                for var in Par if 'mod_Egraz_preind' in Par[var].dims
            }).assign_coords(mod_Egraz_preind='mean_TRENDYv7').expand_dims(
                'mod_Egraz_preind', -1)
        ])
        mmm = xr.merge([
            mmm,
            xr.Dataset({
                var: Par[var].mean('mod_Eluc_agb')
                for var in Par if 'mod_Eluc_agb' in Par[var].dims
            }).assign_coords(mod_Eluc_agb='mean_TRENDYv7').expand_dims(
                'mod_Eluc_agb', -1)
        ])

    ## create off values
    try:
        off = xr.Dataset()
        for mod in [
                'mod_Efire_preind', 'mod_Eharv_preind', 'mod_Egraz_preind'
        ]:
            off = xr.merge([
                off,
                xr.Dataset({
                    var: Par[var].mean(mod) * 0
                    for var in Par if mod in Par[var].dims
                }).assign_coords({
                    mod: 'off_'
                }).expand_dims(mod, -1)
            ])
    except:
        off = xr.Dataset()
        off = xr.merge([
            off,
            xr.Dataset({
                var: Par[var].mean('mod_Efire_preind') * 0
                for var in Par if 'mod_Efire_preind' in Par[var].dims
            }).assign_coords(mod_Efire_preind='off_').expand_dims(
                'mod_Efire_preind', -1)
        ])
        off = xr.merge([
            off,
            xr.Dataset({
                var: Par[var].mean('mod_Eharv_preind') * 0
                for var in Par if 'mod_Eharv_preind' in Par[var].dims
            }).assign_coords(mod_Eharv_preind='off_').expand_dims(
                'mod_Eharv_preind', -1)
        ])
        off = xr.merge([
            off,
            xr.Dataset({
                var: Par[var].mean('mod_Egraz_preind') * 0
                for var in Par if 'mod_Egraz_preind' in Par[var].dims
            }).assign_coords(mod_Egraz_preind='off_').expand_dims(
                'mod_Egraz_preind', -1)
        ])

    ## merge, save and return
    Par = Par.combine_first(mmm).combine_first(off)
    Par.to_netcdf(path_out + 'land_TRENDYv7__' + mod_region + '.nc')
    return Par
Example #2
0
def load_landuse_scen(mod_region,
                      datasets=['LUH1', 'LUH2'],
                      LCC='all',
                      **useless):
    '''
    Function to load and format primary scenario land-use datasets, taken from the 'input_data' folder.
    
    Input:
    ------
    mod_region (str)        regional aggregation name

    Output:
    -------
    For (xr.Dataset)        dataset that contains the loaded datasets aggregated over 'mod_region'

    Options:
    --------
    datasets (list)         names of primary datasets to be loaded;
                            default = ['LUH1', 'LUH2']
    LCC (str)               which of 'gross' or 'net' land-cover transitions should be kept;
                            unless both are kept ('all'), the driver is renamed to 'd_Acover';
                            default = 'all'
    '''

    ## main loading loop
    For0 = []
    units = {}
    for data in datasets:

        ## load data if available
        For1 = load_existing('land-use_' + data + '.nc')

        ## get and check units
        for VAR in For1:
            if 'units' in For1[VAR].attrs:
                if VAR not in units.keys():
                    units[VAR] = For1[VAR].units
                else:
                    if units[VAR] != For1[VAR].units:
                        raise RuntimeWarning(
                            'inconsistent units: {0} (internal dic) vs. {1} ({2} in {3})'
                            .format(units[VAR], For1[VAR].units,
                                    "'" + VAR + "'", "'" + data + "'"))

        ##---

        ## take scenarios
        if data in ['LUH1', 'LUH2']:
            For1 = For1.sel(
                scen=[sc for sc in For1.scen.values
                      if 'historical' not in sc]).dropna('year', how='all')

        ##---

        ## interpolate to yearly data
        if not all(np.diff(For1.year.values) == 1):
            For1 = For1.interp({
                'year':
                np.arange(int(For1.year[0]),
                          int(For1.year[-1]) + 1, 1)
            })

        ## aggregate to model regions
        if 'reg_iso' in For1.coords:
            For1 = aggreg_region(For1, mod_region)
        else:
            For1 = aggreg_region(For1,
                                 mod_region,
                                 old_axis='region',
                                 dataset=data)

        ## append to final list
        For0.append(For1)
        del For1

    ## merge into one xarray (and rename scenario dimension)
    For0 = xr.merge(For0)
    For0 = For0.rename({'scen': 'scen_LULCC'})

    ## order dimensions
    For0 = For0.transpose(
        *(['year', 'reg_land', 'bio_land', 'bio_from', 'bio_to'] +
          [var for var in For0.coords if 'scen' in var]))

    ## reapply units
    for VAR in For0:
        if VAR in units.keys():
            For0[VAR].attrs['units'] = units[VAR]

    ## return (with selected net/gross LCC, if requested)
    For0 = For0.drop('Aland_0')
    if LCC == 'net':
        return For0.rename({'d_Anet': 'd_Acover'}).drop('d_Agross')
    elif LCC == 'gross':
        return For0.rename({'d_Agross': 'd_Acover'}).drop('d_Anet')
    else:
        return For0
Example #3
0
def load_emissions_hist(mod_region,
                        datasets=[
                            'ACCMIP', 'CDIAC', 'CEDS', 'EDGAR-HYDEv13',
                            'EDGAR-HYDEv14', 'EDGARv42', 'EDGARv42-FT2010',
                            'EDGARv432', 'EDGARv432-FT2016', 'EPA',
                            'Meinshausen_2011', 'PRIMAP'
                        ],
                        dropped_species=['PM10', 'PM2p5'],
                        **useless):
    '''
    Function to load and format primary historical emissions datasets, taken from the 'input_data' folder.
    
    Input:
    ------
    mod_region (str)            regional aggregation name

    Output:
    -------
    For (xr.Dataset)            dataset that contains the loaded datasets aggregated over 'mod_region'

    Options:
    --------
    datasets (list)             names of primary datasets to be loaded;
                                default = ['ACCMIP', 'CDIAC', 'CEDS', 'EDGAR-HYDEv13', 'EDGAR-HYDEv14', 
                                           'EDGARv42', 'EDGARv42-FT2010', 'EDGARv432', 'EDGARv432-FT2016', 
                                           'EPA', 'Meinshausen_2011', 'PRIMAP']
    dropped_species (list)      species to be excluded from the loaded datasets;
                                default = ['PM10', 'PM2p5']
    '''

    ## list of missing halogenated species taken from Meinshausen_2011
    missing_halo = [
        'CFC-11', 'CFC-12', 'CFC-113', 'CFC-114', 'CFC-115', 'CCl4', 'CH3CCl3',
        'HCFC-22', 'Halon-1211', 'Halon-1202', 'Halon-1301', 'Halon-2402',
        'CH3Br', 'CH3Cl'
    ]

    ## dictionaries for ignoring sectors
    ## CO2 emissions
    CO2_ignored_sectors = {
        'ACCMIP': ['agr', 'awb', 'for', 'gra', 'wst'],
        'CEDS': [
            '3B_Manure-management', '3D_Rice-Cultivation', '3D_Soil-emissions',
            '3E_Enteric-fermentation', '3I_Agriculture-other',
            '5A_Solid-waste-disposal', '5C_Waste-combustion',
            '5D_Wastewater-handling', '5E_Other-waste-handling'
        ],
        'EDGAR-HYDEv13': ['agr', 'bfc', 'liv', 'sav', 'def', 'awb', 'lfl'],
        'EDGAR-HYDEv14': ['BF3', 'AGL', 'ANM', 'SAV', 'DEF', 'AGR', 'LAN'],
        'EDGARv42': [
            '4A', '4B', '4C', '4D1', '4D2', '4D3', '4D4', '4E', '4F', '5A',
            '5C', '5D', '5F', '5F1', '5F2', '5FL', '5FL1', '6A', '6B', '6C',
            '6D'
        ],
        'EDGARv42-FT2010': ['5A', '5C', '5D', '5F2'],
        'EDGARv432': [
            '4A', '4B', '4C', '4D1', '4D2', '4D3', '4D4', '4F', '6A', '6B',
            '6C', '6D'
        ],
        'EDGARv432-FT2016': [],
        'EPA': [
            'agr1', 'agr2', 'agr3', 'agr4', 'agr5', 'was1', 'was2', 'was3',
            'was4'
        ],
        'PRIMAP': ['4', '5', '6']
    }
    ## non-CO2 emissions
    nonCO2_ignored_sectors = {
        'ACCMIP': ['for', 'gra'],
        'CEDS': [],
        'EDGAR-HYDEv13': ['sav', 'def'],
        'EDGAR-HYDEv14': ['SAV', 'DEF'],
        'EDGARv42':
        ['4E', '5A', '5C', '5D', '5F', '5F1', '5F2', '5FL', '5FL1'],
        'EDGARv42-FT2010': ['4E', '5A', '5C', '5D', '5F2'],
        'EDGARv432': [],
        'EDGARv432-FT2016': [],
        'EPA': [
            'agr5'
        ],  # slightly inconsistent as mixing e.g. agricultural waste burning & savannah burning
        'PRIMAP': ['5']
    }

    ## main loading loop
    For0 = []
    units = {}
    for data in datasets:

        For1 = load_existing('emissions_' + data + '.nc')

        ## get and check units
        for VAR in For1:
            if 'units' in For1[VAR].attrs:
                if VAR not in units.keys():
                    units[VAR] = For1[VAR].units
                else:
                    if units[VAR] != For1[VAR].units:
                        raise RuntimeWarning(
                            'inconsistent units: {0} (internal dic) vs. {1} ({2} in {3})'
                            .format(units[VAR], For1[VAR].units,
                                    "'" + VAR + "'", "'" + data + "'"))

        ##---

        ## take only historical period
        if data in ['ACCMIP', 'Meinshausen_2011']:
            For1 = For1.sel(scen='historical', drop=True).dropna('year',
                                                                 how='all')

        ## take only national-based data
        if data in ['CDIAC']:
            For1 = For1.sel(data='national', drop=True)

        ## aggregate over fuels
        if data in ['CDIAC']:
            For1 = For1.sum('fuel', min_count=1)

        ## aggregate over sectors (ignoring some)
        if data in [
                'ACCMIP', 'CEDS', 'EDGAR-HYDEv13', 'EDGAR-HYDEv14', 'EDGARv42',
                'EDGARv42-FT2010', 'EDGARv432', 'EDGARv432-FT2016', 'EPA',
                'PRIMAP'
        ]:
            ## selection and aggregation
            for VAR in For1:
                if VAR == 'E_CO2':
                    For1['E_CO2'] = For1['E_CO2'].sel(sector=[
                        sec for sec in For1.sector.values
                        if sec not in CO2_ignored_sectors[data]
                    ]).sum('sector', min_count=1)
                else:
                    For1[VAR] = For1[VAR].sel(sector=[
                        sec for sec in For1.sector.values
                        if sec not in nonCO2_ignored_sectors[data]
                    ]).sum('sector', min_count=1)
            ## dropping useless coords
            For1 = For1.drop('sector')
            if 'sector_long_name' in For1.coords:
                For1 = For1.drop('sector_long_name')

        ## take only missing halogenated species
        if data in ['Meinshausen_2011']:
            For1 = For1.drop([VAR for VAR in For1 if VAR != 'E_Xhalo'])
            For1 = For1.sel(spc_halo=missing_halo)

        ## put global data on regional axis
        if data in ['Meinshausen_2011']:
            For1 = For1.expand_dims('reg_iso', -1).assign_coords(reg_iso=[999])

        ##---

        ## interpolate to yearly data
        if not all(np.diff(For1.year.values) == 1):
            For1 = For1.interp({
                'year':
                np.arange(int(For1.year[0]),
                          int(For1.year[-1]) + 1, 1)
            })

        ## rename CO2 emissions
        if 'E_CO2' in For1:
            For1 = For1.rename({'E_CO2': 'Eff'})

        ## aggregate to model regions
        if 'reg_iso' in For1.coords:
            For1 = aggreg_region(For1, mod_region)
        else:
            For1 = aggreg_region(For1,
                                 mod_region,
                                 old_axis='region',
                                 dataset=data)

        ## append to final list (with new dimension)
        For0.append(For1.expand_dims('data', -1).assign_coords(data=[data]))
        del For1

    ## merge into one xarray
    For0 = xr.merge(For0)

    ## create one data axis per driver
    For = xr.Dataset()
    for VAR in For0:
        TMP = [
            For0[VAR].sel(data=data).rename({'data': 'data_' + VAR})
            for data in For0.data.values
            if not np.isnan(For0[VAR].sel(data=data).sum(min_count=1))
        ]
        For[VAR] = xr.concat(TMP, dim='data_' + VAR)
        del TMP

    ## order dimensions
    For = For.transpose(*(['year', 'reg_land', 'spc_halo'] +
                          [var for var in For.coords if 'data' in var]))

    ## drop requested species
    for vars_then_coords in [For, For.coords]:
        for VAR in vars_then_coords:
            if any([spc in VAR for spc in dropped_species]):
                For = For.drop(VAR)

    ## reapply units
    for VAR in For:
        if VAR in units.keys():
            For[VAR].attrs['units'] = units[VAR]

    ## return
    return For
Example #4
0
def load_emissions_scen(
        mod_region,
        datasets=['Meinshausen_2011', 'RCPdb', 'SRES', 'ScenarioMIP'],
        all_SRES=False,
        all_SSPdb=False,
        dropped_species=['CCS'],
        Xhalo_offset={
            'CF4': {},
            'C2F6': {},
            'HFC-23': {},
            'CH3Br': {},
            'CH3Cl': {
                'RCPdb': 3100.211,
                'Meinshausen_2011': 3100.211
            }
        },
        **useless):
    '''
    Function to load and format primary scenario emissions datasets, taken from the 'input_data' folder.
    
    Input:
    ------
    mod_region (str)            regional aggregation name

    Output:
    -------
    For (xr.Dataset)            dataset that contains the loaded datasets tentatively aggregated over 'mod_region'

    Options:
    --------
    datasets (list)             names of primary datasets to be loaded;
                                default = ['Meinshausen_2011', 'RCPdb', 'SRES', 'ScenarioMIP']
    all_SRES (bool)             whether to take all SRES scenarios (if loaded) or just markers;
                                default = False
    all_SSPdb (bool)            whether to take all SSP database scenarios (if loaded) or just markers;
                                default = False
    dropped_species (list)      species to be excluded from the loaded datasets;
                                default = ['CCS']
    Xhalo_offset (dict)         how the offset by Xhalo preindustrial emissions is handled;
                                keys are species whose emissions must be offset;
                                values are another dict being:
                                    either empty, in which case the offset is made on RCP2.6;
                                    or whose keys are dataset names and values are floats, for offset by the specified values;
                                default = {'CF4':{}, 'C2F6':{}, 'HFC-23':{}, 'CH3Br':{},
                                           'CH3Cl':{'RCPdb':3100.211, 'Meinshausen_2011':3100.211}}
    '''

    ## dictionaries for ignoring sectors
    ## non-CO2 emissions
    nonCO2_ignored_sectors = {
        'RCPdb': ['for', 'gra'],
        'ScenarioMIP': ['Forest Burning', 'Grassland Burning', 'Peat Burning']
    }

    ## main loading loop
    For0 = []
    units = {}
    for data in datasets:

        For1 = load_existing('emissions_' + data + '.nc')

        ## get and check units
        for VAR in For1:
            if 'units' in For1[VAR].attrs:
                if VAR not in units.keys():
                    units[VAR] = For1[VAR].units
                else:
                    if units[VAR] != For1[VAR].units:
                        raise RuntimeWarning(
                            'inconsistent units: {0} (internal dic) vs. {1} ({2} in {3})'
                            .format(units[VAR], For1[VAR].units,
                                    "'" + VAR + "'", "'" + data + "'"))

        ##---

        ## take only halogenated species over right scenario
        ## note: all RCPs are supposed to be the same, however some inconsistent values appear but not in RCP2.6
        if data in ['Meinshausen_2011']:
            For1 = For1['E_Xhalo'].to_dataset(name='E_Xhalo')
            For1 = For1.sel(scen='RCP2.6').dropna('year', how='all')
            For1 = For1.assign_coords(scen='CMIP5').expand_dims('scen', -1)

        ## put global data on regional axis
        if data in ['Meinshausen_2011']:
            For1 = For1.expand_dims('reg_iso', -1).assign_coords(reg_iso=[999])

        ## offset with preindustrial emissions level
        if data in ['Meinshausen_2011', 'RCPdb']:
            for VAR in Xhalo_offset.keys():
                if data in Xhalo_offset[VAR].keys():
                    For1['E_Xhalo'] = xr.where(
                        For1.spc_halo == VAR,
                        For1['E_Xhalo'].sel(spc_halo=VAR) -
                        Xhalo_offset[VAR][data], For1['E_Xhalo'])
                else:
                    if 'RCP2.6' in For1.scen:
                        For1['E_Xhalo'] = xr.where(
                            For1.spc_halo == VAR,
                            For1['E_Xhalo'].sel(spc_halo=VAR) -
                            For1['E_Xhalo'].sel(
                                spc_halo=VAR, scen='RCP2.6').dropna(
                                    'year', how='all').isel(year=-1),
                            For1['E_Xhalo'])
                    else:
                        For1['E_Xhalo'] = xr.where(
                            For1.spc_halo == VAR,
                            For1['E_Xhalo'].sel(spc_halo=VAR) -
                            For1['E_Xhalo'].sel(spc_halo=VAR).dropna(
                                'year', how='all').isel(year=-1),
                            For1['E_Xhalo'])

        ## aggregate over sectors (ignoring some)
        if data in ['RCPdb', 'ScenarioMIP']:
            ## selection and aggregation
            for VAR in For1:
                if 'sector' in For1[VAR].coords:
                    For1[VAR] = For1[VAR].sel(sector=[
                        sec for sec in For1.sector.values
                        if sec not in nonCO2_ignored_sectors[data]
                    ]).sum('sector', min_count=1)
            ## dropping useless coords
            For1 = For1.drop('sector')
            if 'sector_long_name' in For1.coords:
                For1 = For1.drop('sector_long_name')

        ## select SRES scenarios
        if data in ['SRES']:
            ## take all scenarios (flatten array)
            if all_SRES:
                For1 = For1.stack(new_scen=('scen', 'model'))
                For1['new_scen'] = [
                    'SRES-' + var1 + ' (' + var2 + ')'
                    for var1, var2 in For1.new_scen.values
                ]
                For1 = For1.dropna('new_scen').rename({'new_scen': 'scen'})
            ## take only markers
            else:
                For1 = For1.where(For1.is_marker, drop=True).sum('model',
                                                                 min_count=1)
                For1['scen'] = [
                    data + '-' + var + ' (marker)' for var in For1.scen.values
                ]

        ## select SSP scenarios
        if data in ['SSPdb']:
            ## take all scenarios (flatten array)
            if all_SSPdb:
                For1 = For1.stack(new_scen=('scen_ssp', 'scen_rcp', 'model'))
                For1['new_scen'] = [
                    var1 + '-' + var2 + ' (' + var3 + ')'
                    for var1, var2, var3 in For1.new_scen.values
                ]
                For1 = For1.dropna('new_scen').rename({'new_scen': 'scen'})
            ## take only markers (also flattened)
            else:
                For1 = For1.where(For1.is_marker, drop=True).sum('model',
                                                                 min_count=1)
                For1 = For1.stack(new_scen=('scen_ssp', 'scen_rcp'))
                For1['new_scen'] = [
                    var1 + '-' + var2 + ' (marker)'
                    for var1, var2 in For1.new_scen.values
                ]
                For1 = For1.dropna('new_scen').rename({'new_scen': 'scen'})

        ##---

        ## interpolate to yearly data
        if not all(np.diff(For1.year.values) == 1):
            For1 = For1.interp({
                'year':
                np.arange(int(For1.year[0]),
                          int(For1.year[-1]) + 1, 1)
            })

        ## aggregate to model regions
        if 'reg_iso' in For1.coords:
            For1 = aggreg_region(For1, mod_region)
        else:
            For1 = aggreg_region(For1,
                                 mod_region,
                                 old_axis='region',
                                 dataset=data)

        ## append to final list
        For0.append(For1)
        del For1

    ## merge into one xarray
    For0 = xr.merge(For0)

    ## create one data axis per driver
    For = xr.Dataset()
    for VAR in For0:
        TMP = [
            For0[VAR].sel(scen=scen).rename({'scen': 'scen_' + VAR})
            for scen in For0.scen.values
            if not np.isnan(For0[VAR].sel(scen=scen).sum(min_count=1))
        ]
        For[VAR] = xr.concat(TMP, dim='scen_' + VAR)
        del TMP

    ## order dimensions
    For = For.transpose(*(['year', 'reg_land'] + ['spc_halo'] *
                          ('spc_halo' in For.coords) +
                          [var for var in For.coords if 'scen' in var]))

    ## drop requested species
    for vars_then_coords in [For, For.coords]:
        for VAR in vars_then_coords:
            if any([spc in VAR for spc in dropped_species]):
                For = For.drop(VAR)

    ## reapply units
    for VAR in For:
        if VAR in units.keys():
            For[VAR].attrs['units'] = units[VAR]

    ## return
    return For