def get_cmip(model: str, scenario: str, member: str, region: Dict = None, account_key: str = None) -> xr.Dataset: """Load downscaled CMIP data, concatenating historical and future scenarios along their time dimension Parameters ---------- model : str CMIP model_id scenario : str CMIP scenario member : str CMIP member_id region : dict Dictionary of slices defining a single chunk, e.g. {'x': slice(1150, 1200), 'y': slice(700, 750), 'time': slice(None)} account_key : str Secret key giving Zarr access to Azure store """ # open the historical simulation dataset hist_mapper = get_store( f'cmip6/{downscale_method}/conus/4000m/monthly/{model}.historical.{member}.zarr', account_key=account_key, ) ds_hist = xr.open_zarr(hist_mapper, consolidated=True)[force_vars] ds_hist = maybe_slice_region(ds_hist, region) ds_hist = ds_hist.pipe(preprocess).pipe(load_coords) # open the future simulation dataset scen_mapper = get_store( f'cmip6/{downscale_method}/conus/4000m/monthly/{model}.{scenario}.{member}.zarr' ) ds_scen = xr.open_zarr(scen_mapper, consolidated=True)[force_vars] ds_scen = maybe_slice_region(ds_scen, region) ds_scen = ds_scen.pipe(preprocess).pipe(load_coords) # combine the historical and future simulation datasets together ds_in = xr.concat([ds_hist, ds_scen], dim='time', data_vars=force_vars, coords='minimal', compat='override') for v in force_vars: ds_in[v] = ds_in[v].astype(np.float32) return ds_in
def load_cmip(model, scenario, member, bias_corrected=False): if bias_corrected: prefix = f'cmip6/bias-corrected/conus/4000m/monthly/{model}.{scenario}.{member}.zarr' else: prefix = f'cmip6/regridded/conus/monthly/4000m/{model}.{scenario}.{member}.zarr' store = get_store(prefix) ds = xr.open_zarr(store, consolidated=True) return ds
def get_out_mapper(account_key: str) -> zarr.storage.ABSStore: """Get output dataset mapper to Azure Blob store Parameters ---------- account_key : str Secret key giving Zarr access to Azure store Returns ------- mapper : zarr.storage.ABSStore """ return get_store(target, account_key=account_key)
def get_obs(region: dict = None, account_key: str = None) -> xr.Dataset: """Load observed climate data and auxillary variables Parameters ---------- region : dict Dictionary of slices defining a single chunk, e.g. {'x': slice(1150, 1200), 'y': slice(700, 750), 'time': slice(None)} account_key : str Secret key giving Zarr access to Azure store """ obs_mapper = get_store('obs/conus/4000m/monthly/terraclimate_plus_v3.zarr', account_key=account_key) obs = xr.open_zarr(obs_mapper, consolidated=True).pipe(load_coords) obs = maybe_slice_region(obs, region) return obs
def main(model, scenario, member): print('---------->', model, scenario, member) # get the output store key = f'{model}.{scenario}.{member}' target_uri = target.format(key=key) print(target_uri) store = get_store(target_uri) if skip_existing and '.zmetadata' in store: print(f'{key} in store, skipping...') return 'skipped' y_hist = get_obs().pipe(load_coords) if xy_region: y_hist = y_hist.isel(**xy_region) print('y_hist:\n', y_hist) x_hist = open_single(model, 'historical', member).pipe(process_cmip) if xy_region: x_hist = x_hist.isel(**xy_region) print('x_hist:\n', x_hist) x_scen = open_single(model, scenario, member).pipe(process_cmip) if xy_region: x_scen = x_scen.isel(**xy_region) if 'hist' in scenario: x_scen = x_scen.sel(time=hist_time) else: x_scen = x_scen.sel(time=future_time) print('x_scen:\n', x_scen) print('fitting models') models = {} y_scen = xr.Dataset() for v in bc_vars: print(v) models[v] = PointWiseDownscaler( TrendAwareQuantileMappingRegressor(QuantileMappingReressor(extrapolate='1to1')) ) # train models with historical data models[v].fit(x_hist[v].sel(time=train_time), y_hist[v].sel(time=train_time)) # predict this ensemble member y_scen[v] = models[v].predict(x_scen[v]) y_scen = y_scen.chunk(chunks) print('y_scen:\n', y_scen) if dry_run: print('skipping write of ... dry_run=True') return 'skipped' else: store.clear() write = y_scen.to_zarr(store, compute=False, mode='w') write.compute(retries=3) zarr.consolidate_metadata(store) return 'done'
def open_single(model, scenario, member): uri = f'cmip6/regridded/conus/4000m/monthly/{model}.{scenario}.{member}.zarr' store = get_store(uri) return xr.open_zarr(store, consolidated=True)
) as f: df = pd.read_csv(f) # target grid for grid in ['conus', 'ak']: grid_ds = cat.grids.albers4k(region=grid).read() for i, row in df.iterrows(): if skip_unmatched and not row.has_match: continue target_key = f'{row.model}.{row.scenario}.{row.member}' target_path = target.format(region=grid, key=target_key) target_store = get_store(target_path) # skip if existing if skip_existing and '.zmetadata' in target_store: print(f'{target_key} in store, skipping...') continue source_store = get_store(row.path) source_ds = xr.open_zarr(source_store, consolidated=True) # perform the regridding print(f'regridding {target_path}') ds = regrid_one_model(source_ds, grid_ds).chunk({'time': 198, 'x': 50, 'y': 50}) # write output dataset to store ds.update(grid_ds[update_vars])
def main(model, scenario, member): print('---------->', model, scenario, member) # get the output store key = f'{model}.{scenario}.{member}' target_uri = target.format(key=key) print(target_uri) store = get_store(target_uri) if skip_existing and '.zmetadata' in store: print(f'{key} in store, skipping...') return absolute_model = MontlyBiasCorrection(correction='absolute') relative_model = MontlyBiasCorrection(correction='relative') y_hist = get_obs().pipe(load_coords) if xy_region: y_hist = y_hist.isel(**xy_region) print('y_hist:\n', y_hist) x_hist = open_single(model, 'historical', member).pipe(process_cmip) if xy_region: x_hist = x_hist.isel(**xy_region) print('x_hist:\n', x_hist) print('fitting models') # train models with historical data absolute_model.fit(x_hist[absolute_vars].sel(time=train_time), y_hist[absolute_vars].sel(time=train_time)) relative_model.fit(x_hist[relative_vars].sel(time=train_time), y_hist[relative_vars].sel(time=train_time)) print('absolute_model:\n', absolute_model.correction_) absolute_model = absolute_model.compute() relative_model = relative_model.compute() x_scen = open_single(model, scenario, member).pipe(process_cmip) if xy_region: x_scen = x_scen.isel(**xy_region) print('x_scen:\n', x_scen) if 'hist' in scenario: x_scen = x_scen.sel(time=hist_time) else: x_scen = x_scen.sel(time=future_time) # predict this emsemble member y_scen = absolute_model.predict(x_scen[absolute_vars]) y_scen.update(relative_model.predict(x_scen[relative_vars])) y_scen = y_scen.chunk(chunks) print('y_scen:\n', y_scen) if dry_run: print('skipping write of ... dry_run=True') return 'skip' else: store.clear() write = y_scen.to_zarr(store, compute=False, mode='w') write.compute(retries=3) zarr.consolidate_metadata(store) return 'done'
def main(): model_dict, data = cmip() written_keys = [] for full_key, ds in data.items(): valid_members = 0 for member_id in ds.member_id.values: # only extract `max_members` members (at most) if valid_members >= max_members: break # get the output zarr store member_key = slim_cmip_key(full_key, member_id) prefix = target.format(key=member_key) store = get_store(prefix) print(prefix) # extract a single member and rechunk member_ds = ds.sel(member_id=member_id).chunk({'lat': -1, 'lon': -1, 'time': 198}) # check that there is data for the full record if ( member_ds.isel(lon=0, lat=0) .isnull() .any() .to_array(name='variables') .any() .load() .item() ): print('--> skipping, missing some data') store.clear() continue # clean encoding for v in member_ds: if 'chunks' in member_ds[v].encoding: del member_ds[v].encoding['chunks'] # write store if skip_existing and '.zmetadata' in store: print('++++ skipping write', prefix) else: store.clear() member_ds.to_zarr(store, consolidated=True, mode='w') valid_members += 1 written_keys.append(prefix) d = {} for k in written_keys: if 'historical' in k: if k not in d: d[k] = False else: pieces = k.split('.') pieces[1] = 'historical' k2 = '.'.join(pieces) if k2 in written_keys: d[k2] = True d[k] = True else: d[k] = False df = ( pd.DataFrame.from_dict(d, orient='index') .reset_index() .rename(columns={0: 'has_match', 'index': 'path'}) ) for i, row in df.iterrows(): model, scenario, member, _ = row.path.split('/')[-1].split('.') df.loc[i, 'model'] = model df.loc[i, 'scenario'] = scenario df.loc[i, 'member'] = member with fsspec.open( 'az://carbonplan-downscaling/cmip6/ssps_with_matching_historical_members.csv', 'w', account_name='carbonplan', account_key=os.environ['BLOB_ACCOUNT_KEY'], ) as f: df.to_csv(f)