Esempio n. 1
0
def get_cmip(model: str,
             scenario: str,
             member: str,
             region: Dict = None,
             account_key: str = None) -> xr.Dataset:
    """Load downscaled CMIP data, concatenating historical and future scenarios along their time dimension

    Parameters
    ----------
    model : str
        CMIP model_id
    scenario : str
        CMIP scenario
    member : str
        CMIP member_id
    region : dict
        Dictionary of slices defining a single chunk, e.g. {'x': slice(1150, 1200), 'y': slice(700, 750), 'time': slice(None)}
    account_key : str
        Secret key giving Zarr access to Azure store
    """
    # open the historical simulation dataset
    hist_mapper = get_store(
        f'cmip6/{downscale_method}/conus/4000m/monthly/{model}.historical.{member}.zarr',
        account_key=account_key,
    )
    ds_hist = xr.open_zarr(hist_mapper, consolidated=True)[force_vars]
    ds_hist = maybe_slice_region(ds_hist, region)
    ds_hist = ds_hist.pipe(preprocess).pipe(load_coords)

    # open the future simulation dataset
    scen_mapper = get_store(
        f'cmip6/{downscale_method}/conus/4000m/monthly/{model}.{scenario}.{member}.zarr'
    )
    ds_scen = xr.open_zarr(scen_mapper, consolidated=True)[force_vars]
    ds_scen = maybe_slice_region(ds_scen, region)
    ds_scen = ds_scen.pipe(preprocess).pipe(load_coords)

    # combine the historical and future simulation datasets together
    ds_in = xr.concat([ds_hist, ds_scen],
                      dim='time',
                      data_vars=force_vars,
                      coords='minimal',
                      compat='override')

    for v in force_vars:
        ds_in[v] = ds_in[v].astype(np.float32)

    return ds_in
Esempio n. 2
0
def load_cmip(model, scenario, member, bias_corrected=False):

    if bias_corrected:
        prefix = f'cmip6/bias-corrected/conus/4000m/monthly/{model}.{scenario}.{member}.zarr'
    else:
        prefix = f'cmip6/regridded/conus/monthly/4000m/{model}.{scenario}.{member}.zarr'

    store = get_store(prefix)
    ds = xr.open_zarr(store, consolidated=True)
    return ds
Esempio n. 3
0
def get_out_mapper(account_key: str) -> zarr.storage.ABSStore:
    """Get output dataset mapper to Azure Blob store

    Parameters
    ----------
    account_key : str
        Secret key giving Zarr access to Azure store

    Returns
    -------
    mapper : zarr.storage.ABSStore
    """
    return get_store(target, account_key=account_key)
Esempio n. 4
0
def get_obs(region: dict = None, account_key: str = None) -> xr.Dataset:
    """Load observed climate data and auxillary variables

    Parameters
    ----------
    region : dict
        Dictionary of slices defining a single chunk, e.g. {'x': slice(1150, 1200), 'y': slice(700, 750), 'time': slice(None)}
    account_key : str
        Secret key giving Zarr access to Azure store
    """

    obs_mapper = get_store('obs/conus/4000m/monthly/terraclimate_plus_v3.zarr',
                           account_key=account_key)
    obs = xr.open_zarr(obs_mapper, consolidated=True).pipe(load_coords)
    obs = maybe_slice_region(obs, region)
    return obs
def main(model, scenario, member):
    print('---------->', model, scenario, member)

    # get the output store
    key = f'{model}.{scenario}.{member}'

    target_uri = target.format(key=key)
    print(target_uri)
    store = get_store(target_uri)

    if skip_existing and '.zmetadata' in store:
        print(f'{key} in store, skipping...')
        return 'skipped'

    y_hist = get_obs().pipe(load_coords)

    if xy_region:
        y_hist = y_hist.isel(**xy_region)

    print('y_hist:\n', y_hist)

    x_hist = open_single(model, 'historical', member).pipe(process_cmip)

    if xy_region:
        x_hist = x_hist.isel(**xy_region)

    print('x_hist:\n', x_hist)

    x_scen = open_single(model, scenario, member).pipe(process_cmip)

    if xy_region:
        x_scen = x_scen.isel(**xy_region)
    if 'hist' in scenario:
        x_scen = x_scen.sel(time=hist_time)
    else:
        x_scen = x_scen.sel(time=future_time)
    print('x_scen:\n', x_scen)

    print('fitting models')

    models = {}
    y_scen = xr.Dataset()

    for v in bc_vars:
        print(v)

        models[v] = PointWiseDownscaler(
            TrendAwareQuantileMappingRegressor(QuantileMappingReressor(extrapolate='1to1'))
        )

        # train models with historical data
        models[v].fit(x_hist[v].sel(time=train_time), y_hist[v].sel(time=train_time))

        # predict this ensemble member
        y_scen[v] = models[v].predict(x_scen[v])

    y_scen = y_scen.chunk(chunks)
    print('y_scen:\n', y_scen)

    if dry_run:
        print('skipping write of ... dry_run=True')
        return 'skipped'
    else:
        store.clear()
        write = y_scen.to_zarr(store, compute=False, mode='w')
        write.compute(retries=3)
        zarr.consolidate_metadata(store)
        return 'done'
def open_single(model, scenario, member):
    uri = f'cmip6/regridded/conus/4000m/monthly/{model}.{scenario}.{member}.zarr'
    store = get_store(uri)
    return xr.open_zarr(store, consolidated=True)
    ) as f:
        df = pd.read_csv(f)

    # target grid
    for grid in ['conus', 'ak']:

        grid_ds = cat.grids.albers4k(region=grid).read()

        for i, row in df.iterrows():

            if skip_unmatched and not row.has_match:
                continue

            target_key = f'{row.model}.{row.scenario}.{row.member}'
            target_path = target.format(region=grid, key=target_key)
            target_store = get_store(target_path)

            # skip if existing
            if skip_existing and '.zmetadata' in target_store:
                print(f'{target_key} in store, skipping...')
                continue

            source_store = get_store(row.path)
            source_ds = xr.open_zarr(source_store, consolidated=True)

            # perform the regridding
            print(f'regridding {target_path}')
            ds = regrid_one_model(source_ds, grid_ds).chunk({'time': 198, 'x': 50, 'y': 50})

            # write output dataset to store
            ds.update(grid_ds[update_vars])
Esempio n. 8
0
def main(model, scenario, member):
    print('---------->', model, scenario, member)

    # get the output store
    key = f'{model}.{scenario}.{member}'

    target_uri = target.format(key=key)
    print(target_uri)
    store = get_store(target_uri)

    if skip_existing and '.zmetadata' in store:
        print(f'{key} in store, skipping...')
        return

    absolute_model = MontlyBiasCorrection(correction='absolute')
    relative_model = MontlyBiasCorrection(correction='relative')

    y_hist = get_obs().pipe(load_coords)

    if xy_region:
        y_hist = y_hist.isel(**xy_region)

    print('y_hist:\n', y_hist)

    x_hist = open_single(model, 'historical', member).pipe(process_cmip)

    if xy_region:
        x_hist = x_hist.isel(**xy_region)

    print('x_hist:\n', x_hist)
    print('fitting models')

    # train models with historical data
    absolute_model.fit(x_hist[absolute_vars].sel(time=train_time),
                       y_hist[absolute_vars].sel(time=train_time))
    relative_model.fit(x_hist[relative_vars].sel(time=train_time),
                       y_hist[relative_vars].sel(time=train_time))
    print('absolute_model:\n', absolute_model.correction_)

    absolute_model = absolute_model.compute()
    relative_model = relative_model.compute()

    x_scen = open_single(model, scenario, member).pipe(process_cmip)

    if xy_region:
        x_scen = x_scen.isel(**xy_region)

    print('x_scen:\n', x_scen)

    if 'hist' in scenario:
        x_scen = x_scen.sel(time=hist_time)
    else:
        x_scen = x_scen.sel(time=future_time)

    # predict this emsemble member
    y_scen = absolute_model.predict(x_scen[absolute_vars])
    y_scen.update(relative_model.predict(x_scen[relative_vars]))

    y_scen = y_scen.chunk(chunks)
    print('y_scen:\n', y_scen)

    if dry_run:
        print('skipping write of ... dry_run=True')
        return 'skip'
    else:
        store.clear()
        write = y_scen.to_zarr(store, compute=False, mode='w')
        write.compute(retries=3)
        zarr.consolidate_metadata(store)
        return 'done'
Esempio n. 9
0
def main():

    model_dict, data = cmip()

    written_keys = []
    for full_key, ds in data.items():

        valid_members = 0

        for member_id in ds.member_id.values:

            # only extract `max_members` members (at most)
            if valid_members >= max_members:
                break

            # get the output zarr store
            member_key = slim_cmip_key(full_key, member_id)
            prefix = target.format(key=member_key)
            store = get_store(prefix)
            print(prefix)

            # extract a single member and rechunk
            member_ds = ds.sel(member_id=member_id).chunk({'lat': -1, 'lon': -1, 'time': 198})

            # check that there is data for the full record
            if (
                member_ds.isel(lon=0, lat=0)
                .isnull()
                .any()
                .to_array(name='variables')
                .any()
                .load()
                .item()
            ):
                print('--> skipping, missing some data')
                store.clear()
                continue

            # clean encoding
            for v in member_ds:
                if 'chunks' in member_ds[v].encoding:
                    del member_ds[v].encoding['chunks']

            # write store
            if skip_existing and '.zmetadata' in store:
                print('++++ skipping write', prefix)
            else:
                store.clear()
                member_ds.to_zarr(store, consolidated=True, mode='w')
            valid_members += 1
            written_keys.append(prefix)

    d = {}
    for k in written_keys:
        if 'historical' in k:
            if k not in d:
                d[k] = False
        else:
            pieces = k.split('.')
            pieces[1] = 'historical'
            k2 = '.'.join(pieces)
            if k2 in written_keys:
                d[k2] = True
                d[k] = True
            else:
                d[k] = False

    df = (
        pd.DataFrame.from_dict(d, orient='index')
        .reset_index()
        .rename(columns={0: 'has_match', 'index': 'path'})
    )
    for i, row in df.iterrows():
        model, scenario, member, _ = row.path.split('/')[-1].split('.')

        df.loc[i, 'model'] = model
        df.loc[i, 'scenario'] = scenario
        df.loc[i, 'member'] = member

    with fsspec.open(
        'az://carbonplan-downscaling/cmip6/ssps_with_matching_historical_members.csv',
        'w',
        account_name='carbonplan',
        account_key=os.environ['BLOB_ACCOUNT_KEY'],
    ) as f:
        df.to_csv(f)