def process_custom_climate_data(gdir): """Processes and writes the climate data from a user-defined climate file. The input file must have a specific format (see oggm-sample-data/test-files/histalp_merged_hef.nc for an example). Uses caching for faster retrieval. This is the way OGGM does it for the Alps (HISTALP). """ if not (('climate_file' in cfg.PATHS) and os.path.exists(cfg.PATHS['climate_file'])): raise IOError('Custom climate file not found') # read the file fpath = cfg.PATHS['climate_file'] nc_ts = salem.GeoNetcdf(fpath) # set temporal subset for the ts data (hydro years) yrs = nc_ts.time.year y0, y1 = yrs[0], yrs[-1] nc_ts.set_period(t0='{}-10-01'.format(y0), t1='{}-09-01'.format(y1)) time = nc_ts.time ny, r = divmod(len(time), 12) if r != 0: raise ValueError('Climate data should be N full years exclusively') # Units assert nc_ts._nc.variables['hgt'].units.lower() in [ 'm', 'meters', 'meter', 'metres', 'metre' ] assert nc_ts._nc.variables['temp'].units.lower() in [ 'degc', 'degrees', 'degree', 'c' ] assert nc_ts._nc.variables['prcp'].units.lower() in [ 'kg m-2', 'l m-2', 'mm', 'millimeters', 'millimeter' ] # geoloc lon = nc_ts._nc.variables['lon'][:] lat = nc_ts._nc.variables['lat'][:] # Gradient defaults use_grad = cfg.PARAMS['temp_use_local_gradient'] def_grad = cfg.PARAMS['temp_default_gradient'] g_minmax = cfg.PARAMS['temp_local_gradient_bounds'] ilon = np.argmin(np.abs(lon - gdir.cenlon)) ilat = np.argmin(np.abs(lat - gdir.cenlat)) ref_pix_lon = lon[ilon] ref_pix_lat = lat[ilat] iprcp, itemp, igrad, ihgt = utils.joblib_read_climate( fpath, ilon, ilat, def_grad, g_minmax, use_grad) gdir.write_monthly_climate_file(time, iprcp, itemp, igrad, ihgt, ref_pix_lon, ref_pix_lat) # metadata out = {'climate_source': fpath, 'hydro_yr_0': y0 + 1, 'hydro_yr_1': y1} gdir.write_pickle(out, 'climate_info')
def process_cru_data(gdir): """Processes and writes the climate data for this glacier. Interpolates the CRU TS data to the high-resolution CL2 climatologies (provided with OGGM) and writes everything to a NetCDF file. """ # read the climatology clfile = utils.get_cru_cl_file() ncclim = salem.GeoNetcdf(clfile) # and the TS data nc_ts_tmp = salem.GeoNetcdf(utils.get_cru_file('tmp'), monthbegin=True) nc_ts_pre = salem.GeoNetcdf(utils.get_cru_file('pre'), monthbegin=True) # set temporal subset for the ts data (hydro years) yrs = nc_ts_pre.time.year y0, y1 = yrs[0], yrs[-1] nc_ts_tmp.set_period(t0='{}-10-01'.format(y0), t1='{}-09-01'.format(y1)) nc_ts_pre.set_period(t0='{}-10-01'.format(y0), t1='{}-09-01'.format(y1)) time = nc_ts_pre.time ny, r = divmod(len(time), 12) assert r == 0 # gradient default params use_grad = cfg.PARAMS['temp_use_local_gradient'] def_grad = cfg.PARAMS['temp_default_gradient'] g_minmax = cfg.PARAMS['temp_local_gradient_bounds'] lon = gdir.cenlon lat = gdir.cenlat # This is guaranteed to work because I prepared the file (I hope) ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # get climatology data loc_hgt = ncclim.get_vardata('elev') loc_tmp = ncclim.get_vardata('temp') loc_pre = ncclim.get_vardata('prcp') loc_lon = ncclim.get_vardata('lon') loc_lat = ncclim.get_vardata('lat') # see if the center is ok if not np.isfinite(loc_hgt[1, 1]): # take another candidate where finite isok = np.isfinite(loc_hgt) # wait: some areas are entirely NaNs, make the subset larger _margin = 1 while not np.any(isok): _margin += 1 ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=_margin) loc_hgt = ncclim.get_vardata('elev') isok = np.isfinite(loc_hgt) if _margin > 1: log.debug('%s: I had to look up for far climate pixels: %s', gdir.rgi_id, _margin) # Take the first candidate (doesn't matter which) lon, lat = ncclim.grid.ll_coordinates lon = lon[isok][0] lat = lat[isok][0] # Resubset ncclim.set_subset() ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1) loc_hgt = ncclim.get_vardata('elev') loc_tmp = ncclim.get_vardata('temp') loc_pre = ncclim.get_vardata('prcp') loc_lon = ncclim.get_vardata('lon') loc_lat = ncclim.get_vardata('lat') assert np.isfinite(loc_hgt[1, 1]) isok = np.isfinite(loc_hgt) hgt_f = loc_hgt[isok].flatten() assert len(hgt_f) > 0. ts_grad = np.zeros(12) + def_grad if use_grad and len(hgt_f) >= 5: for i in range(12): loc_tmp_mth = loc_tmp[i, ...][isok].flatten() slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth) ts_grad[i] = slope if (p_val < 0.01) else def_grad # ... but dont exaggerate too much ts_grad = np.clip(ts_grad, g_minmax[0], g_minmax[1]) # convert to timeserie and hydroyears ts_grad = ts_grad.tolist() ts_grad = ts_grad[9:] + ts_grad[0:9] ts_grad = np.asarray(ts_grad * ny) # maybe this will throw out of bounds warnings nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1) nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # compute monthly anomalies # of temp ts_tmp = nc_ts_tmp.get_vardata('tmp', as_xarray=True) ts_tmp_avg = ts_tmp.sel(time=slice('1961-01-01', '1990-12-01')) ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time') ts_tmp = ts_tmp.groupby('time.month') - ts_tmp_avg # of precip ts_pre = nc_ts_pre.get_vardata('pre', as_xarray=True) ts_pre_avg = ts_pre.sel(time=slice('1961-01-01', '1990-12-01')) ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time') ts_pre = ts_pre.groupby('time.month') - ts_pre_avg # interpolate to HR grid if np.any(~np.isfinite(ts_tmp[:, 1, 1])): # Extreme case, middle pix is not valid # take any valid pix from the 3*3 (and hope there's one) found_it = False for idi in range(2): for idj in range(2): if np.all(np.isfinite(ts_tmp[:, idj, idi])): ts_tmp[:, 1, 1] = ts_tmp[:, idj, idi] ts_pre[:, 1, 1] = ts_pre[:, idj, idi] found_it = True if not found_it: msg = '{}: OMG there is no climate data'.format(gdir.rgi_id) raise RuntimeError(msg) elif np.any(~np.isfinite(ts_tmp)): # maybe the side is nan, but we can do nearest ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid, interp='nearest') ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid, interp='nearest') else: # We can do bilinear ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid, interp='linear') ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid, interp='linear') # take the center pixel and add it to the CRU CL clim # for temp loc_tmp = xr.DataArray(loc_tmp[:, 1, 1], dims=['month'], coords={'month':ts_tmp_avg.month}) ts_tmp = xr.DataArray(ts_tmp[:, 1, 1], dims=['time'], coords={'time':time}) ts_tmp = ts_tmp.groupby('time.month') + loc_tmp # for prcp loc_pre = xr.DataArray(loc_pre[:, 1, 1], dims=['month'], coords={'month':ts_pre_avg.month}) ts_pre = xr.DataArray(ts_pre[:, 1, 1], dims=['time'], coords={'time':time}) ts_pre = ts_pre.groupby('time.month') + loc_pre # done loc_hgt = loc_hgt[1, 1] loc_lon = loc_lon[1] loc_lat = loc_lat[1] assert np.isfinite(loc_hgt) assert np.all(np.isfinite(ts_pre.values)) assert np.all(np.isfinite(ts_tmp.values)) assert np.all(np.isfinite(ts_grad)) gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values, ts_grad, loc_hgt, loc_lon, loc_lat) ncclim._nc.close() nc_ts_tmp._nc.close() nc_ts_pre._nc.close() # metadata out = {'climate_source': 'CRU data', 'hydro_yr_0': y0+1, 'hydro_yr_1': y1} gdir.write_pickle(out, 'climate_info')
def process_cesm_data(gdir, filesuffix=''): """Processes and writes the climate data for this glacier. This function is made for interpolating the Community Earth System Model Last Millenial Ensemble (CESM-LME) climate simulations, from Otto-Bliesner et al. (2016), to the high-resolution CL2 climatologies (provided with OGGM) and writes everything to a NetCDF file. Parameters ---------- filesuffix : str append a suffix to the filename (useful for model runs). """ # GCM temperature and precipitation data if not (('gcm_temp_file' in cfg.PATHS) and os.path.exists(cfg.PATHS['gcm_temp_file'])): raise IOError('GCM temp file not found') if not (('gcm_precc_file' in cfg.PATHS) and os.path.exists(cfg.PATHS['gcm_precc_file'])): raise IOError('GCM precc file not found') if not (('gcm_precl_file' in cfg.PATHS) and os.path.exists(cfg.PATHS['gcm_precl_file'])): raise IOError('GCM precl file not found') # read the files fpath_temp = cfg.PATHS['gcm_temp_file'] fpath_precc = cfg.PATHS['gcm_precc_file'] fpath_precl = cfg.PATHS['gcm_precl_file'] tempds = xr.open_dataset(fpath_temp) precpcds = xr.open_dataset(fpath_precc, decode_times=False) preclpds = xr.open_dataset(fpath_precl, decode_times=False) # select for location lon = gdir.cenlon lat = gdir.cenlat # CESM files are in 0-360 if lon <= 0: lon += 360 # take the closest # TODO: consider GCM interpolation? temp = tempds.TREFHT.sel(lat=lat, lon=lon, method='nearest') precp = precpcds.PRECC.sel(lat=lat, lon=lon, method='nearest') + \ preclpds.PRECL.sel(lat=lat, lon=lon, method='nearest') # from normal years to hydrological years precp = precp[9:-3] temp = temp[9:-3] y0 = int(temp.time.values[0].strftime('%Y')) y1 = int(temp.time.values[-1].strftime('%Y')) temp['time'] = pd.period_range('{}-10'.format(y0), '{}-9'.format(y1), freq='M') precp['time'] = pd.period_range('{}-10'.format(y0), '{}-9'.format(y1), freq='M') ny, r = divmod(len(temp.time), 12) assert r == 0 # Convert m s-1 to mm mth-1 ndays = np.tile([31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30], (y1 - y0)) precp = precp * ndays * (60 * 60 * 24 * 1000) # compute monthly anomalies year = np.array([t.year for t in temp.time.values]) # of temp ts_tmp_avg = temp.isel(time=(year >= 1961) & (year <= 1990)) ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time') ts_tmp = temp.groupby('time.month') - ts_tmp_avg # of precip ts_pre_avg = precp.isel(time=(year >= 1961) & (year <= 1990)) ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time') ts_pre = precp.groupby('time.month') - ts_pre_avg # Get CRU to apply the anomaly to fpath = gdir.get_filepath('climate_monthly') dscru = xr.open_dataset(fpath) # Here we assume the gradient is a monthly average ts_grad = np.tile(dscru.grad[0:12], ny) # Add climate anomaly to CRU clim dscru = dscru.sel(time=slice('1961', '1990')) # for temp loc_tmp = dscru.temp.groupby('time.month').mean() ts_tmp = ts_tmp.groupby('time.month') + loc_tmp # for prcp loc_pre = dscru.prcp.groupby('time.month').mean() ts_pre = ts_pre.groupby('time.month') + loc_pre # load dates in right format to save dsindex = salem.GeoNetcdf(fpath_temp, monthbegin=True) time1 = dsindex.variables['time'] time2 = time1[9:-3] - ndays # from normal years to hydrological years time2 = netCDF4.num2date(time2, time1.units, calendar='noleap') assert np.all(np.isfinite(ts_pre.values)) assert np.all(np.isfinite(ts_tmp.values)) assert np.all(np.isfinite(ts_grad)) # back to -180 - 180 loc_lon = precp.lon if precp.lon <= 180 else precp.lon - 360 gdir.write_monthly_climate_file(time2, ts_pre.values, ts_tmp.values, ts_grad, dscru.ref_hgt, loc_lon, precp.lat.values, time_unit=time1.units, file_name='cesm_data', filesuffix=filesuffix) dsindex._nc.close() tempds.close() precpcds.close() preclpds.close()
def process_custom_climate_data(gdir): """Processes and writes the climate data from a user-defined climate file. The input file must have a specific format (see oggm-sample-data/test-files/histalp_merged_hef.nc for an example). Uses caching for faster retrieval. This is the way OGGM does it for the Alps (HISTALP). """ if not (('climate_file' in cfg.PATHS) and os.path.exists(cfg.PATHS['climate_file'])): raise IOError('Custom climate file not found') # read the file fpath = cfg.PATHS['climate_file'] nc_ts = salem.GeoNetcdf(fpath) # set temporal subset for the ts data (hydro years) sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere] em = sm - 1 if (sm > 1) else 12 yrs = nc_ts.time.year y0, y1 = yrs[0], yrs[-1] nc_ts.set_period(t0='{}-{:02d}-01'.format(y0, sm), t1='{}-{:02d}-01'.format(y1, em)) time = nc_ts.time ny, r = divmod(len(time), 12) if r != 0: raise ValueError('Climate data should be N full years exclusively') # Units assert nc_ts._nc.variables['hgt'].units.lower() in [ 'm', 'meters', 'meter', 'metres', 'metre' ] assert nc_ts._nc.variables['temp'].units.lower() in [ 'degc', 'degrees', 'degree', 'c' ] assert nc_ts._nc.variables['prcp'].units.lower() in [ 'kg m-2', 'l m-2', 'mm', 'millimeters', 'millimeter' ] # geoloc lon = nc_ts._nc.variables['lon'][:] lat = nc_ts._nc.variables['lat'][:] # Gradient defaults use_grad = cfg.PARAMS['temp_use_local_gradient'] def_grad = cfg.PARAMS['temp_default_gradient'] g_minmax = cfg.PARAMS['temp_local_gradient_bounds'] ilon = np.argmin(np.abs(lon - gdir.cenlon)) ilat = np.argmin(np.abs(lat - gdir.cenlat)) ref_pix_lon = lon[ilon] ref_pix_lat = lat[ilat] # read the data temp = nc_ts.get_vardata('temp') prcp = nc_ts.get_vardata('prcp') hgt = nc_ts.get_vardata('hgt') igrad = np.zeros(len(time)) + def_grad ttemp = temp[:, ilat - 1:ilat + 2, ilon - 1:ilon + 2] itemp = ttemp[:, 1, 1] thgt = hgt[ilat - 1:ilat + 2, ilon - 1:ilon + 2] ihgt = thgt[1, 1] thgt = thgt.flatten() iprcp = prcp[:, ilat, ilon] nc_ts.close() # Now the gradient if use_grad: for t, loct in enumerate(ttemp): slope, _, _, p_val, _ = stats.linregress(thgt, loct.flatten()) igrad[t] = slope if (p_val < 0.01) else def_grad # dont exagerate too much igrad = np.clip(igrad, g_minmax[0], g_minmax[1]) gdir.write_monthly_climate_file(time, iprcp, itemp, igrad, ihgt, ref_pix_lon, ref_pix_lat) # metadata out = {'climate_source': fpath, 'hydro_yr_0': y0 + 1, 'hydro_yr_1': y1} gdir.write_pickle(out, 'climate_info')
def process_cesm_data(gdir, filesuffix='', fpath_temp=None, fpath_precc=None, fpath_precl=None): """Processes and writes the climate data for this glacier. This function is made for interpolating the Community Earth System Model Last Millenial Ensemble (CESM-LME) climate simulations, from Otto-Bliesner et al. (2016), to the high-resolution CL2 climatologies (provided with OGGM) and writes everything to a NetCDF file. Parameters ---------- filesuffix : str append a suffix to the filename (useful for ensemble experiments). fpath_temp : str path to the temp file (default: cfg.PATHS['gcm_temp_file']) fpath_precc : str path to the precc file (default: cfg.PATHS['gcm_precc_file']) fpath_precl : str path to the precl file (default: cfg.PATHS['gcm_precl_file']) """ # GCM temperature and precipitation data if fpath_temp is None: if not ('gcm_temp_file' in cfg.PATHS): raise ValueError("Need to set cfg.PATHS['gcm_temp_file']") fpath_temp = cfg.PATHS['gcm_temp_file'] if fpath_precc is None: if not ('gcm_precc_file' in cfg.PATHS): raise ValueError("Need to set cfg.PATHS['gcm_precc_file']") fpath_precc = cfg.PATHS['gcm_precc_file'] if fpath_precl is None: if not ('gcm_precl_file' in cfg.PATHS): raise ValueError("Need to set cfg.PATHS['gcm_precl_file']") fpath_precl = cfg.PATHS['gcm_precl_file'] # read the files with warnings.catch_warnings(): # Long time series are currently a pain pandas warnings.filterwarnings("ignore", message='Unable to decode time axis') tempds = xr.open_dataset(fpath_temp) precpcds = xr.open_dataset(fpath_precc, decode_times=False) preclpds = xr.open_dataset(fpath_precl, decode_times=False) # select for location lon = gdir.cenlon lat = gdir.cenlat # CESM files are in 0-360 if lon <= 0: lon += 360 # take the closest # TODO: consider GCM interpolation? temp = tempds.TREFHT.sel(lat=lat, lon=lon, method='nearest') precp = precpcds.PRECC.sel(lat=lat, lon=lon, method='nearest') + \ preclpds.PRECL.sel(lat=lat, lon=lon, method='nearest') # from normal years to hydrological years sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere] em = sm - 1 if (sm > 1) else 12 # TODO: we don't check if the files actually start in January but we should precp = precp[sm - 1:sm - 13].load() temp = temp[sm - 1:sm - 13].load() y0 = int(temp.time.values[0].strftime('%Y')) y1 = int(temp.time.values[-1].strftime('%Y')) time = pd.period_range('{}-{:02d}'.format(y0, sm), '{}-{:02d}'.format(y1, em), freq='M') temp['time'] = time precp['time'] = time # Workaround for https://github.com/pydata/xarray/issues/1565 temp['month'] = ('time', time.month) precp['month'] = ('time', time.month) temp['year'] = ('time', time.year) precp['year'] = ('time', time.year) ny, r = divmod(len(time), 12) assert r == 0 # Convert m s-1 to mm mth-1 ndays = np.tile(np.roll(cfg.DAYS_IN_MONTH, 13 - sm), y1 - y0) precp = precp * ndays * (60 * 60 * 24 * 1000) # compute monthly anomalies # of temp ts_tmp_avg = temp.sel(time=(temp.year >= 1961) & (temp.year <= 1990)) ts_tmp_avg = ts_tmp_avg.groupby(ts_tmp_avg.month).mean(dim='time') ts_tmp = temp.groupby(temp.month) - ts_tmp_avg # of precip -- scaled anomalies ts_pre_avg = precp.isel(time=(precp.year >= 1961) & (precp.year <= 1990)) ts_pre_avg = ts_pre_avg.groupby(ts_pre_avg.month).mean(dim='time') ts_pre_ano = precp.groupby(precp.month) - ts_pre_avg # scaled anomalies is the default. Standard anomalies above # are used later for where ts_pre_avg == 0 ts_pre = precp.groupby(precp.month) / ts_pre_avg # Get CRU to apply the anomaly to fpath = gdir.get_filepath('climate_monthly') ds_cru = xr.open_dataset(fpath) # Here we assume the gradient is a monthly average ts_grad = np.tile(ds_cru.grad[0:12], ny) # Add climate anomaly to CRU clim dscru = ds_cru.sel(time=slice('1961', '1990')) # for temp loc_tmp = dscru.temp.groupby('time.month').mean() ts_tmp = ts_tmp.groupby(ts_tmp.month) + loc_tmp # for prcp loc_pre = dscru.prcp.groupby('time.month').mean() # scaled anomalies ts_pre = ts_pre.groupby(ts_pre.month) * loc_pre # standard anomalies ts_pre_ano = ts_pre_ano.groupby(ts_pre_ano.month) + loc_pre # Correct infinite values with standard anomalies ts_pre.values = np.where(np.isfinite(ts_pre.values), ts_pre.values, ts_pre_ano.values) # The last step might create negative values (unlikely). Clip them ts_pre.values = ts_pre.values.clip(0) # load dates in right format to save dsindex = salem.GeoNetcdf(fpath_temp, monthbegin=True) time1 = dsindex.variables['time'] time2 = time1[sm - 1:sm - 13] - ndays # to hydrological years time2 = netCDF4.num2date(time2, time1.units, calendar='noleap') assert np.all(np.isfinite(ts_pre.values)) assert np.all(np.isfinite(ts_tmp.values)) assert np.all(np.isfinite(ts_grad)) # back to -180 - 180 loc_lon = precp.lon if precp.lon <= 180 else precp.lon - 360 gdir.write_monthly_climate_file(time2, ts_pre.values, ts_tmp.values, ts_grad, float(dscru.ref_hgt), loc_lon, precp.lat.values, time_unit=time1.units, file_name='cesm_data', filesuffix=filesuffix) dsindex._nc.close() tempds.close() precpcds.close() preclpds.close() ds_cru.close()
def _distribute_cru_style_nonparallel(gdirs): """More general solution for OGGM globally. It uses the CRU CL2 ten-minutes climatology as baseline (provided with OGGM) """ # read the climatology clfile = utils.get_cru_cl_file() ncclim = salem.GeoNetcdf(clfile) # and the TS data nc_ts_tmp = salem.GeoNetcdf(utils.get_cru_file('tmp'), monthbegin=True) nc_ts_pre = salem.GeoNetcdf(utils.get_cru_file('pre'), monthbegin=True) # set temporal subset for the ts data (hydro years) nc_ts_tmp.set_period(t0='1901-10-01', t1='2014-09-01') nc_ts_pre.set_period(t0='1901-10-01', t1='2014-09-01') time = nc_ts_pre.time ny, r = divmod(len(time), 12) assert r == 0 # gradient default params use_grad = cfg.PARAMS['temp_use_local_gradient'] def_grad = cfg.PARAMS['temp_default_gradient'] g_minmax = cfg.PARAMS['temp_local_gradient_bounds'] prcp_scaling_factor = cfg.PARAMS['prcp_scaling_factor'] for gdir in gdirs: log.info('%s: %s', gdir.rgi_id, 'distribute_cru_style') lon = gdir.cenlon lat = gdir.cenlat # This is guaranteed to work because I prepared the file (I hope) ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # get monthly gradient ... loc_hgt = ncclim.get_vardata('elev') loc_tmp = ncclim.get_vardata('temp') loc_pre = ncclim.get_vardata('prcp') isok = np.isfinite(loc_hgt) hgt_f = loc_hgt[isok].flatten() ts_grad = np.zeros(12) + def_grad if use_grad and len(hgt_f) >= 5: for i in range(12): loc_tmp_mth = loc_tmp[i, ...][isok].flatten() slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth) ts_grad[i] = slope if (p_val < 0.01) else def_grad # ... but dont exaggerate too much ts_grad = np.clip(ts_grad, g_minmax[0], g_minmax[1]) # convert to timeserie and hydroyears ts_grad = ts_grad.tolist() ts_grad = ts_grad[9:] + ts_grad[0:9] ts_grad = np.asarray(ts_grad * ny) # maybe this will throw out of bounds warnings nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1) nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # compute monthly anomalies # of temp ts_tmp = nc_ts_tmp.get_vardata('tmp', as_xarray=True) ts_tmp_avg = ts_tmp.sel(time=slice('1961-01-01', '1990-12-01')) ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time') ts_tmp = ts_tmp.groupby('time.month') - ts_tmp_avg # of precip ts_pre = nc_ts_pre.get_vardata('pre', as_xarray=True) ts_pre_avg = ts_pre.sel(time=slice('1961-01-01', '1990-12-01')) ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time') ts_pre = ts_pre.groupby('time.month') - ts_pre_avg # interpolate to HR grid if np.any(~np.isfinite(ts_tmp[:, 1, 1])): # Extreme case, middle pix is not valid # take any valid pix from the 3*3 (and hope theres one) found_it = False for idi in range(2): for idj in range(2): if np.all(np.isfinite(ts_tmp[:, idj, idi])): ts_tmp[:, 1, 1] = ts_tmp[:, idj, idi] ts_pre[:, 1, 1] = ts_pre[:, idj, idi] found_it = True if not found_it: msg = '{}: OMG there is no climate data'.format(gdir.rgi_id) raise RuntimeError(msg) elif np.any(~np.isfinite(ts_tmp)): # maybe the side is nan, but we can do nearest ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid, interp='nearest') ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid, interp='nearest') else: # We can do bilinear ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid, interp='linear') ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid, interp='linear') # take the center pixel and add it to the CRU CL clim # for temp loc_tmp = xr.DataArray(loc_tmp[:, 1, 1], dims=['month'], coords={'month': ts_pre_avg.month}) ts_tmp = xr.DataArray(ts_tmp[:, 1, 1], dims=['time'], coords={'time': time}) ts_tmp = ts_tmp.groupby('time.month') + loc_tmp # for prcp loc_pre = xr.DataArray(loc_pre[:, 1, 1], dims=['month'], coords={'month': ts_pre_avg.month}) ts_pre = xr.DataArray(ts_pre[:, 1, 1], dims=['time'], coords={'time': time}) ts_pre = ts_pre.groupby('time.month') + loc_pre * prcp_scaling_factor # done loc_hgt = loc_hgt[1, 1] assert np.isfinite(loc_hgt) assert np.all(np.isfinite(ts_pre.values)) assert np.all(np.isfinite(ts_tmp.values)) assert np.all(np.isfinite(ts_grad)) gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values, ts_grad, loc_hgt)
def process_histalp_data(gdir): """Processes and writes the climate data for this glacier. Extracts the nearest timeseries and writes everything to a NetCDF file. """ if cfg.PARAMS['baseline_climate'] != 'HISTALP': raise ValueError("cfg.PARAMS['baseline_climate'] should be set to " "HISTALP.") # read the time out of the pure netcdf file ft = utils.get_histalp_file('tmp') fp = utils.get_histalp_file('pre') with utils.ncDataset(ft) as nc: vt = nc.variables['time'] assert vt[0] == 0 assert vt[-1] == vt.shape[0] - 1 t0 = vt.units.split(' since ')[1][:7] time_t = pd.date_range(start=t0, periods=vt.shape[0], freq='MS') with utils.ncDataset(fp) as nc: vt = nc.variables['time'] assert vt[0] == 0.5 assert vt[-1] == vt.shape[0] - .5 t0 = vt.units.split(' since ')[1][:7] time_p = pd.date_range(start=t0, periods=vt.shape[0], freq='MS') # Now open with salem nc_ts_tmp = salem.GeoNetcdf(ft, time=time_t) nc_ts_pre = salem.GeoNetcdf(fp, time=time_p) # set temporal subset for the ts data (hydro years) # the reference time is given by precip, which is shorter sm = cfg.PARAMS['hydro_month_nh'] em = sm - 1 if (sm > 1) else 12 yrs = nc_ts_pre.time.year y0, y1 = yrs[0], yrs[-1] if cfg.PARAMS['baseline_y0'] != 0: y0 = cfg.PARAMS['baseline_y0'] if cfg.PARAMS['baseline_y1'] != 0: y1 = cfg.PARAMS['baseline_y1'] nc_ts_tmp.set_period(t0='{}-{:02d}-01'.format(y0, sm), t1='{}-{:02d}-01'.format(y1, em)) nc_ts_pre.set_period(t0='{}-{:02d}-01'.format(y0, sm), t1='{}-{:02d}-01'.format(y1, em)) time = nc_ts_pre.time ny, r = divmod(len(time), 12) assert r == 0 # Units assert nc_ts_tmp._nc.variables['HSURF'].units.lower() in ['m', 'meters', 'meter', 'metres', 'metre'] assert nc_ts_tmp._nc.variables['T_2M'].units.lower() in ['degc', 'degrees', 'degrees celcius', 'degree', 'c'] assert nc_ts_pre._nc.variables['TOT_PREC'].units.lower() in ['kg m-2', 'l m-2', 'mm', 'millimeters', 'millimeter'] # geoloc lon = gdir.cenlon lat = gdir.cenlat nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1) nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # read the data temp = nc_ts_tmp.get_vardata('T_2M') prcp = nc_ts_pre.get_vardata('TOT_PREC') hgt = nc_ts_tmp.get_vardata('HSURF') ref_lon = nc_ts_tmp.get_vardata('lon') ref_lat = nc_ts_tmp.get_vardata('lat') source = nc_ts_tmp._nc.title[:7] nc_ts_tmp._nc.close() nc_ts_pre._nc.close() # Should we compute the gradient? use_grad = cfg.PARAMS['temp_use_local_gradient'] igrad = None if use_grad: igrad = np.zeros(len(time)) * np.NaN for t, loct in enumerate(temp): slope, _, _, p_val, _ = stats.linregress(hgt.flatten(), loct.flatten()) igrad[t] = slope if (p_val < 0.01) else np.NaN gdir.write_monthly_climate_file(time, prcp[:, 1, 1], temp[:, 1, 1], hgt[1, 1], ref_lon[1], ref_lat[1], gradient=igrad) # metadata out = {'baseline_climate_source': source, 'baseline_hydro_yr_0': y0 + 1, 'baseline_hydro_yr_1': y1} gdir.write_pickle(out, 'climate_info')
def process_cru_data(gdir): """Processes and writes the climate data for this glacier. Interpolates the CRU TS data to the high-resolution CL2 climatologies (provided with OGGM) and writes everything to a NetCDF file. """ if cfg.PARAMS['baseline_climate'] != 'CRU': raise ValueError("cfg.PARAMS['baseline_climate'] should be set to CRU") # read the climatology clfile = utils.get_cru_cl_file() ncclim = salem.GeoNetcdf(clfile) # and the TS data nc_ts_tmp = salem.GeoNetcdf(utils.get_cru_file('tmp'), monthbegin=True) nc_ts_pre = salem.GeoNetcdf(utils.get_cru_file('pre'), monthbegin=True) # set temporal subset for the ts data (hydro years) sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere] em = sm - 1 if (sm > 1) else 12 yrs = nc_ts_pre.time.year y0, y1 = yrs[0], yrs[-1] if cfg.PARAMS['baseline_y0'] != 0: y0 = cfg.PARAMS['baseline_y0'] if cfg.PARAMS['baseline_y1'] != 0: y1 = cfg.PARAMS['baseline_y1'] nc_ts_tmp.set_period(t0='{}-{:02d}-01'.format(y0, sm), t1='{}-{:02d}-01'.format(y1, em)) nc_ts_pre.set_period(t0='{}-{:02d}-01'.format(y0, sm), t1='{}-{:02d}-01'.format(y1, em)) time = nc_ts_pre.time ny, r = divmod(len(time), 12) assert r == 0 lon = gdir.cenlon lat = gdir.cenlat # This is guaranteed to work because I prepared the file (I hope) ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # get climatology data loc_hgt = ncclim.get_vardata('elev') loc_tmp = ncclim.get_vardata('temp') loc_pre = ncclim.get_vardata('prcp') loc_lon = ncclim.get_vardata('lon') loc_lat = ncclim.get_vardata('lat') # see if the center is ok if not np.isfinite(loc_hgt[1, 1]): # take another candidate where finite isok = np.isfinite(loc_hgt) # wait: some areas are entirely NaNs, make the subset larger _margin = 1 while not np.any(isok): _margin += 1 ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=_margin) loc_hgt = ncclim.get_vardata('elev') isok = np.isfinite(loc_hgt) if _margin > 1: log.debug('(%s) I had to look up for far climate pixels: %s', gdir.rgi_id, _margin) # Take the first candidate (doesn't matter which) lon, lat = ncclim.grid.ll_coordinates lon = lon[isok][0] lat = lat[isok][0] # Resubset ncclim.set_subset() ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1) loc_hgt = ncclim.get_vardata('elev') loc_tmp = ncclim.get_vardata('temp') loc_pre = ncclim.get_vardata('prcp') loc_lon = ncclim.get_vardata('lon') loc_lat = ncclim.get_vardata('lat') assert np.isfinite(loc_hgt[1, 1]) isok = np.isfinite(loc_hgt) hgt_f = loc_hgt[isok].flatten() assert len(hgt_f) > 0. # Should we compute the gradient? use_grad = cfg.PARAMS['temp_use_local_gradient'] ts_grad = None if use_grad and len(hgt_f) >= 5: ts_grad = np.zeros(12) * np.NaN for i in range(12): loc_tmp_mth = loc_tmp[i, ...][isok].flatten() slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth) ts_grad[i] = slope if (p_val < 0.01) else np.NaN # convert to a timeseries and hydrological years ts_grad = ts_grad.tolist() ts_grad = ts_grad[em:] + ts_grad[0:em] ts_grad = np.asarray(ts_grad * ny) # maybe this will throw out of bounds warnings nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1) nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # compute monthly anomalies # of temp ts_tmp = nc_ts_tmp.get_vardata('tmp', as_xarray=True) ts_tmp_avg = ts_tmp.sel(time=slice('1961-01-01', '1990-12-01')) ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time') ts_tmp = ts_tmp.groupby('time.month') - ts_tmp_avg # of precip ts_pre = nc_ts_pre.get_vardata('pre', as_xarray=True) ts_pre_avg = ts_pre.sel(time=slice('1961-01-01', '1990-12-01')) ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time') ts_pre_ano = ts_pre.groupby('time.month') - ts_pre_avg # scaled anomalies is the default. Standard anomalies above # are used later for where ts_pre_avg == 0 ts_pre = ts_pre.groupby('time.month') / ts_pre_avg # interpolate to HR grid if np.any(~np.isfinite(ts_tmp[:, 1, 1])): # Extreme case, middle pix is not valid # take any valid pix from the 3*3 (and hope there's one) found_it = False for idi in range(2): for idj in range(2): if np.all(np.isfinite(ts_tmp[:, idj, idi])): ts_tmp[:, 1, 1] = ts_tmp[:, idj, idi] ts_pre[:, 1, 1] = ts_pre[:, idj, idi] ts_pre_ano[:, 1, 1] = ts_pre_ano[:, idj, idi] found_it = True if not found_it: msg = '({}) there is no climate data'.format(gdir.rgi_id) raise RuntimeError(msg) elif np.any(~np.isfinite(ts_tmp)): # maybe the side is nan, but we can do nearest ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid, interp='nearest') ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid, interp='nearest') ts_pre_ano = ncclim.grid.map_gridded_data(ts_pre_ano.values, nc_ts_pre.grid, interp='nearest') else: # We can do bilinear ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid, interp='linear') ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid, interp='linear') ts_pre_ano = ncclim.grid.map_gridded_data(ts_pre_ano.values, nc_ts_pre.grid, interp='linear') # take the center pixel and add it to the CRU CL clim # for temp loc_tmp = xr.DataArray(loc_tmp[:, 1, 1], dims=['month'], coords={'month': ts_tmp_avg.month}) ts_tmp = xr.DataArray(ts_tmp[:, 1, 1], dims=['time'], coords={'time': time}) ts_tmp = ts_tmp.groupby('time.month') + loc_tmp # for prcp loc_pre = xr.DataArray(loc_pre[:, 1, 1], dims=['month'], coords={'month': ts_pre_avg.month}) ts_pre = xr.DataArray(ts_pre[:, 1, 1], dims=['time'], coords={'time': time}) ts_pre_ano = xr.DataArray(ts_pre_ano[:, 1, 1], dims=['time'], coords={'time': time}) # scaled anomalies ts_pre = ts_pre.groupby('time.month') * loc_pre # standard anomalies ts_pre_ano = ts_pre_ano.groupby('time.month') + loc_pre # Correct infinite values with standard anomalies ts_pre.values = np.where(np.isfinite(ts_pre.values), ts_pre.values, ts_pre_ano.values) # The last step might create negative values (unlikely). Clip them ts_pre.values = ts_pre.values.clip(0) # done loc_hgt = loc_hgt[1, 1] loc_lon = loc_lon[1] loc_lat = loc_lat[1] assert np.isfinite(loc_hgt) assert np.all(np.isfinite(ts_pre.values)) assert np.all(np.isfinite(ts_tmp.values)) gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values, loc_hgt, loc_lon, loc_lat, gradient=ts_grad) source = nc_ts_tmp._nc.title[:10] ncclim._nc.close() nc_ts_tmp._nc.close() nc_ts_pre._nc.close() # metadata out = {'baseline_climate_source': source, 'baseline_hydro_yr_0': y0+1, 'baseline_hydro_yr_1': y1} gdir.write_pickle(out, 'climate_info')
def process_custom_climate_data(gdir): """Processes and writes the climate data from a user-defined climate file. The input file must have a specific format (see oggm-sample-data/test-files/histalp_merged_hef.nc for an example). This is the way OGGM used to do it for HISTALP before it got automatised. """ if not (('climate_file' in cfg.PATHS) and os.path.exists(cfg.PATHS['climate_file'])): raise IOError('Custom climate file not found') if cfg.PARAMS['baseline_climate'] not in ['', 'CUSTOM']: raise ValueError("When using custom climate data please set " "PARAMS['baseline_climate'] to an empty string " "or `CUSTOM`. Note that you can now use the " "`process_histalp_data` task for automated HISTALP " "data processing.") # read the file fpath = cfg.PATHS['climate_file'] nc_ts = salem.GeoNetcdf(fpath) # set temporal subset for the ts data (hydro years) sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere] em = sm - 1 if (sm > 1) else 12 yrs = nc_ts.time.year y0, y1 = yrs[0], yrs[-1] nc_ts.set_period(t0='{}-{:02d}-01'.format(y0, sm), t1='{}-{:02d}-01'.format(y1, em)) time = nc_ts.time ny, r = divmod(len(time), 12) if r != 0: raise ValueError('Climate data should be N full years exclusively') # Units assert nc_ts._nc.variables['hgt'].units.lower() in ['m', 'meters', 'meter', 'metres', 'metre'] assert nc_ts._nc.variables['temp'].units.lower() in ['degc', 'degrees', 'degree', 'c'] assert nc_ts._nc.variables['prcp'].units.lower() in ['kg m-2', 'l m-2', 'mm', 'millimeters', 'millimeter'] # geoloc lon = nc_ts._nc.variables['lon'][:] lat = nc_ts._nc.variables['lat'][:] ilon = np.argmin(np.abs(lon - gdir.cenlon)) ilat = np.argmin(np.abs(lat - gdir.cenlat)) ref_pix_lon = lon[ilon] ref_pix_lat = lat[ilat] # read the data temp = nc_ts.get_vardata('temp') prcp = nc_ts.get_vardata('prcp') hgt = nc_ts.get_vardata('hgt') ttemp = temp[:, ilat-1:ilat+2, ilon-1:ilon+2] itemp = ttemp[:, 1, 1] thgt = hgt[ilat-1:ilat+2, ilon-1:ilon+2] ihgt = thgt[1, 1] thgt = thgt.flatten() iprcp = prcp[:, ilat, ilon] nc_ts.close() # Should we compute the gradient? use_grad = cfg.PARAMS['temp_use_local_gradient'] igrad = None if use_grad: igrad = np.zeros(len(time)) * np.NaN for t, loct in enumerate(ttemp): slope, _, _, p_val, _ = stats.linregress(thgt, loct.flatten()) igrad[t] = slope if (p_val < 0.01) else np.NaN gdir.write_monthly_climate_file(time, iprcp, itemp, ihgt, ref_pix_lon, ref_pix_lat, gradient=igrad) # metadata out = {'baseline_climate_source': fpath, 'baseline_hydro_yr_0': y0+1, 'baseline_hydro_yr_1': y1} gdir.write_pickle(out, 'climate_info')
def process_cru_data(gdir, tmp_file=None, pre_file=None, y0=None, y1=None, output_filesuffix=None): """Processes and writes the CRU baseline climate data for this glacier. Interpolates the CRU TS data to the high-resolution CL2 climatologies (provided with OGGM) and writes everything to a NetCDF file. Parameters ---------- gdir : :py:class:`oggm.GlacierDirectory` the glacier directory to process tmp_file : str path to the CRU temperature file (defaults to the current OGGM chosen CRU version) pre_file : str path to the CRU precip file (defaults to the current OGGM chosen CRU version) y0 : int the starting year of the timeseries to write. The default is to take the entire time period available in the file, but with this kwarg you can shorten it (to save space or to crop bad data) y1 : int the starting year of the timeseries to write. The default is to take the entire time period available in the file, but with this kwarg you can shorten it (to save space or to crop bad data) output_filesuffix : str this add a suffix to the output file (useful to avoid overwriting previous experiments) """ if cfg.PATHS.get('climate_file', None): warnings.warn("You seem to have set a custom climate file for this " "run, but are using the default CRU climate " "file instead.") if cfg.PARAMS['baseline_climate'] != 'CRU': raise InvalidParamsError("cfg.PARAMS['baseline_climate'] should be " "set to CRU") # read the climatology ncclim = salem.GeoNetcdf(get_cru_cl_file()) # and the TS data if tmp_file is None: tmp_file = get_cru_file('tmp') if pre_file is None: pre_file = get_cru_file('pre') nc_ts_tmp = salem.GeoNetcdf(tmp_file, monthbegin=True) nc_ts_pre = salem.GeoNetcdf(pre_file, monthbegin=True) # set temporal subset for the ts data (hydro years) sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere] em = sm - 1 if (sm > 1) else 12 yrs = nc_ts_pre.time.year y0 = yrs[0] if y0 is None else y0 y1 = yrs[-1] if y1 is None else y1 nc_ts_tmp.set_period(t0='{}-{:02d}-01'.format(y0, sm), t1='{}-{:02d}-01'.format(y1, em)) nc_ts_pre.set_period(t0='{}-{:02d}-01'.format(y0, sm), t1='{}-{:02d}-01'.format(y1, em)) time = nc_ts_pre.time ny, r = divmod(len(time), 12) assert r == 0 lon = gdir.cenlon lat = gdir.cenlat # This is guaranteed to work because I prepared the file (I hope) ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # get climatology data loc_hgt = ncclim.get_vardata('elev') loc_tmp = ncclim.get_vardata('temp') loc_pre = ncclim.get_vardata('prcp') loc_lon = ncclim.get_vardata('lon') loc_lat = ncclim.get_vardata('lat') # see if the center is ok if not np.isfinite(loc_hgt[1, 1]): # take another candidate where finite isok = np.isfinite(loc_hgt) # wait: some areas are entirely NaNs, make the subset larger _margin = 1 while not np.any(isok): _margin += 1 ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=_margin) loc_hgt = ncclim.get_vardata('elev') isok = np.isfinite(loc_hgt) if _margin > 1: log.debug('(%s) I had to look up for far climate pixels: %s', gdir.rgi_id, _margin) # Take the first candidate (doesn't matter which) lon, lat = ncclim.grid.ll_coordinates lon = lon[isok][0] lat = lat[isok][0] # Resubset ncclim.set_subset() ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1) loc_hgt = ncclim.get_vardata('elev') loc_tmp = ncclim.get_vardata('temp') loc_pre = ncclim.get_vardata('prcp') loc_lon = ncclim.get_vardata('lon') loc_lat = ncclim.get_vardata('lat') assert np.isfinite(loc_hgt[1, 1]) isok = np.isfinite(loc_hgt) hgt_f = loc_hgt[isok].flatten() assert len(hgt_f) > 0. # Should we compute the gradient? use_grad = cfg.PARAMS['temp_use_local_gradient'] ts_grad = None if use_grad and len(hgt_f) >= 5: ts_grad = np.zeros(12) * np.NaN for i in range(12): loc_tmp_mth = loc_tmp[i, ...][isok].flatten() slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth) ts_grad[i] = slope if (p_val < 0.01) else np.NaN # convert to a timeseries and hydrological years ts_grad = ts_grad.tolist() ts_grad = ts_grad[em:] + ts_grad[0:em] ts_grad = np.asarray(ts_grad * ny) # maybe this will throw out of bounds warnings nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1) nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # compute monthly anomalies # of temp ts_tmp = nc_ts_tmp.get_vardata('tmp', as_xarray=True) ts_tmp_avg = ts_tmp.sel(time=slice('1961-01-01', '1990-12-01')) ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time') ts_tmp = ts_tmp.groupby('time.month') - ts_tmp_avg # of precip ts_pre = nc_ts_pre.get_vardata('pre', as_xarray=True) ts_pre_avg = ts_pre.sel(time=slice('1961-01-01', '1990-12-01')) ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time') ts_pre_ano = ts_pre.groupby('time.month') - ts_pre_avg # scaled anomalies is the default. Standard anomalies above # are used later for where ts_pre_avg == 0 ts_pre = ts_pre.groupby('time.month') / ts_pre_avg # interpolate to HR grid if np.any(~np.isfinite(ts_tmp[:, 1, 1])): # Extreme case, middle pix is not valid # take any valid pix from the 3*3 (and hope there's one) found_it = False for idi in range(2): for idj in range(2): if np.all(np.isfinite(ts_tmp[:, idj, idi])): ts_tmp[:, 1, 1] = ts_tmp[:, idj, idi] ts_pre[:, 1, 1] = ts_pre[:, idj, idi] ts_pre_ano[:, 1, 1] = ts_pre_ano[:, idj, idi] found_it = True if not found_it: msg = '({}) there is no climate data'.format(gdir.rgi_id) raise MassBalanceCalibrationError(msg) elif np.any(~np.isfinite(ts_tmp)): # maybe the side is nan, but we can do nearest ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid, interp='nearest') ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid, interp='nearest') ts_pre_ano = ncclim.grid.map_gridded_data(ts_pre_ano.values, nc_ts_pre.grid, interp='nearest') else: # We can do bilinear ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid, interp='linear') ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid, interp='linear') ts_pre_ano = ncclim.grid.map_gridded_data(ts_pre_ano.values, nc_ts_pre.grid, interp='linear') # take the center pixel and add it to the CRU CL clim # for temp loc_tmp = xr.DataArray(loc_tmp[:, 1, 1], dims=['month'], coords={'month': ts_tmp_avg.month}) ts_tmp = xr.DataArray(ts_tmp[:, 1, 1], dims=['time'], coords={'time': time}) ts_tmp = ts_tmp.groupby('time.month') + loc_tmp # for prcp loc_pre = xr.DataArray(loc_pre[:, 1, 1], dims=['month'], coords={'month': ts_pre_avg.month}) ts_pre = xr.DataArray(ts_pre[:, 1, 1], dims=['time'], coords={'time': time}) ts_pre_ano = xr.DataArray(ts_pre_ano[:, 1, 1], dims=['time'], coords={'time': time}) # scaled anomalies ts_pre = ts_pre.groupby('time.month') * loc_pre # standard anomalies ts_pre_ano = ts_pre_ano.groupby('time.month') + loc_pre # Correct infinite values with standard anomalies ts_pre.values = np.where(np.isfinite(ts_pre.values), ts_pre.values, ts_pre_ano.values) # The last step might create negative values (unlikely). Clip them ts_pre.values = utils.clip_min(ts_pre.values, 0) # done loc_hgt = loc_hgt[1, 1] loc_lon = loc_lon[1] loc_lat = loc_lat[1] assert np.isfinite(loc_hgt) assert np.all(np.isfinite(ts_pre.values)) assert np.all(np.isfinite(ts_tmp.values)) gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values, loc_hgt, loc_lon, loc_lat, filesuffix=output_filesuffix, gradient=ts_grad, source=nc_ts_tmp._nc.title[:10]) ncclim._nc.close() nc_ts_tmp._nc.close() nc_ts_pre._nc.close()
def process_dummy_cru_file(gdir, sigma_temp=2, sigma_prcp=0.5, seed=None, y0=None, y1=None, output_filesuffix=None): """Create a simple baseline climate file for this glacier - for testing! This simply reproduces the climatology with a little randomness in it. TODO: extend the functionality by allowing a monthly varying sigma Parameters ---------- gdir : GlacierDirectory the glacier directory sigma_temp : float the standard deviation of the random timeseries (set to 0 for constant ts) sigma_prcp : float the standard deviation of the random timeseries (set to 0 for constant ts) seed : int the RandomState seed y0 : int the starting year of the timeseries to write. The default is to take the entire time period available in the file, but with this kwarg you can shorten it (to save space or to crop bad data) y1 : int the starting year of the timeseries to write. The default is to take the entire time period available in the file, but with this kwarg you can shorten it (to save space or to crop bad data) output_filesuffix : str this add a suffix to the output file (useful to avoid overwriting previous experiments) """ # read the climatology clfile = get_cru_cl_file() ncclim = salem.GeoNetcdf(clfile) # set temporal subset for the ts data (hydro years) sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere] em = sm - 1 if (sm > 1) else 12 y0 = 1901 if y0 is None else y0 y1 = 2018 if y1 is None else y1 time = pd.date_range(start='{}-{:02d}-01'.format(y0, sm), end='{}-{:02d}-01'.format(y1, em), freq='MS') ny, r = divmod(len(time), 12) assert r == 0 lon = gdir.cenlon lat = gdir.cenlat # This is guaranteed to work because I prepared the file (I hope) ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # get climatology data loc_hgt = ncclim.get_vardata('elev') loc_tmp = ncclim.get_vardata('temp') loc_pre = ncclim.get_vardata('prcp') loc_lon = ncclim.get_vardata('lon') loc_lat = ncclim.get_vardata('lat') # see if the center is ok if not np.isfinite(loc_hgt[1, 1]): # take another candidate where finite isok = np.isfinite(loc_hgt) # wait: some areas are entirely NaNs, make the subset larger _margin = 1 while not np.any(isok): _margin += 1 ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=_margin) loc_hgt = ncclim.get_vardata('elev') isok = np.isfinite(loc_hgt) if _margin > 1: log.debug('(%s) I had to look up for far climate pixels: %s', gdir.rgi_id, _margin) # Take the first candidate (doesn't matter which) lon, lat = ncclim.grid.ll_coordinates lon = lon[isok][0] lat = lat[isok][0] # Resubset ncclim.set_subset() ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1) loc_hgt = ncclim.get_vardata('elev') loc_tmp = ncclim.get_vardata('temp') loc_pre = ncclim.get_vardata('prcp') loc_lon = ncclim.get_vardata('lon') loc_lat = ncclim.get_vardata('lat') assert np.isfinite(loc_hgt[1, 1]) isok = np.isfinite(loc_hgt) hgt_f = loc_hgt[isok].flatten() assert len(hgt_f) > 0. # Should we compute the gradient? use_grad = cfg.PARAMS['temp_use_local_gradient'] ts_grad = None if use_grad and len(hgt_f) >= 5: ts_grad = np.zeros(12) * np.NaN for i in range(12): loc_tmp_mth = loc_tmp[i, ...][isok].flatten() slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth) ts_grad[i] = slope if (p_val < 0.01) else np.NaN # convert to a timeseries and hydrological years ts_grad = ts_grad.tolist() ts_grad = ts_grad[em:] + ts_grad[0:em] ts_grad = np.asarray(ts_grad * ny) # Make DataArrays rng = np.random.RandomState(seed) loc_tmp = xr.DataArray(loc_tmp[:, 1, 1], dims=['month'], coords={'month': np.arange(1, 13)}) ts_tmp = rng.randn(len(time)) * sigma_temp ts_tmp = xr.DataArray(ts_tmp, dims=['time'], coords={'time': time}) loc_pre = xr.DataArray(loc_pre[:, 1, 1], dims=['month'], coords={'month': np.arange(1, 13)}) ts_pre = utils.clip_min(rng.randn(len(time)) * sigma_prcp + 1, 0) ts_pre = xr.DataArray(ts_pre, dims=['time'], coords={'time': time}) # Create the time series ts_tmp = ts_tmp.groupby('time.month') + loc_tmp ts_pre = ts_pre.groupby('time.month') * loc_pre # done loc_hgt = loc_hgt[1, 1] loc_lon = loc_lon[1] loc_lat = loc_lat[1] assert np.isfinite(loc_hgt) gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values, loc_hgt, loc_lon, loc_lat, gradient=ts_grad, filesuffix=output_filesuffix, source='CRU CL2 and some randomness') ncclim._nc.close()
def process_histalp_data(gdir, y0=None, y1=None, output_filesuffix=None): """Processes and writes the HISTALP baseline climate data for this glacier. Extracts the nearest timeseries and writes everything to a NetCDF file. Parameters ---------- gdir : :py:class:`oggm.GlacierDirectory` the glacier directory to process y0 : int the starting year of the timeseries to write. The default is to take 1850 (because the data is quite bad before that) y1 : int the starting year of the timeseries to write. The default is to take the entire time period available in the file, but with this kwarg you can shorten it (to save space or to crop bad data) output_filesuffix : str this add a suffix to the output file (useful to avoid overwriting previous experiments) """ if cfg.PATHS.get('climate_file', None): warnings.warn("You seem to have set a custom climate file for this " "run, but are using the default HISTALP climate file " "instead.") if cfg.PARAMS['baseline_climate'] != 'HISTALP': raise InvalidParamsError("cfg.PARAMS['baseline_climate'] should be " "set to HISTALP.") # read the time out of the pure netcdf file ft = get_histalp_file('tmp') fp = get_histalp_file('pre') with utils.ncDataset(ft) as nc: vt = nc.variables['time'] assert vt[0] == 0 assert vt[-1] == vt.shape[0] - 1 t0 = vt.units.split(' since ')[1][:7] time_t = pd.date_range(start=t0, periods=vt.shape[0], freq='MS') with utils.ncDataset(fp) as nc: vt = nc.variables['time'] assert vt[0] == 0.5 assert vt[-1] == vt.shape[0] - .5 t0 = vt.units.split(' since ')[1][:7] time_p = pd.date_range(start=t0, periods=vt.shape[0], freq='MS') # Now open with salem nc_ts_tmp = salem.GeoNetcdf(ft, time=time_t) nc_ts_pre = salem.GeoNetcdf(fp, time=time_p) # Some default if y0 is None: y0 = 1850 # set temporal subset for the ts data (hydro years) # the reference time is given by precip, which is shorter sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere] em = sm - 1 if (sm > 1) else 12 yrs = nc_ts_pre.time.year y0 = yrs[0] if y0 is None else y0 y1 = yrs[-1] if y1 is None else y1 nc_ts_tmp.set_period(t0='{}-{:02d}-01'.format(y0, sm), t1='{}-{:02d}-01'.format(y1, em)) nc_ts_pre.set_period(t0='{}-{:02d}-01'.format(y0, sm), t1='{}-{:02d}-01'.format(y1, em)) time = nc_ts_pre.time ny, r = divmod(len(time), 12) assert r == 0 # Units assert nc_ts_tmp._nc.variables['HSURF'].units.lower() in [ 'm', 'meters', 'meter', 'metres', 'metre' ] assert nc_ts_tmp._nc.variables['T_2M'].units.lower() in [ 'degc', 'degrees', 'degrees celcius', 'degree', 'c' ] assert nc_ts_pre._nc.variables['TOT_PREC'].units.lower() in [ 'kg m-2', 'l m-2', 'mm', 'millimeters', 'millimeter' ] # geoloc lon = gdir.cenlon lat = gdir.cenlat nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1) nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # read the data temp = nc_ts_tmp.get_vardata('T_2M') prcp = nc_ts_pre.get_vardata('TOT_PREC') hgt = nc_ts_tmp.get_vardata('HSURF') ref_lon = nc_ts_tmp.get_vardata('lon') ref_lat = nc_ts_tmp.get_vardata('lat') source = nc_ts_tmp._nc.title[:7] nc_ts_tmp._nc.close() nc_ts_pre._nc.close() # Should we compute the gradient? use_grad = cfg.PARAMS['temp_use_local_gradient'] igrad = None if use_grad: igrad = np.zeros(len(time)) * np.NaN for t, loct in enumerate(temp): slope, _, _, p_val, _ = stats.linregress(hgt.flatten(), loct.flatten()) igrad[t] = slope if (p_val < 0.01) else np.NaN gdir.write_monthly_climate_file(time, prcp[:, 1, 1], temp[:, 1, 1], hgt[1, 1], ref_lon[1], ref_lat[1], gradient=igrad, filesuffix=output_filesuffix, source=source)
def process_ccsm_data(gdir, PI_path): """ First attempt at a method to process the CCSM data into temperature/precip anomalies. """ #Put this in the function def (see process_cesm_data) filesuffix='' if not (('climate_file' in cfg.PATHS) and os.path.exists(cfg.PATHS['climate_file'])): raise IOError('Custom climate file not found') #open dataset for precp use fpath = cfg.PATHS['climate_file'] xr_ccsm = xr.open_dataset(fpath, decode_times=False) #open dataset for tmp use xr_ccsm_ts = xr.open_dataset(fpath) #repeating for pi xr_pi = xr.open_dataset(PI_path, decode_times=False) # selecting location lon = gdir.cenlon lat = gdir.cenlat #Setting the longitude to a 0-360 grid [I think...] "CESM files are in 0-360" if lon <= 0: lon += 360 #"take the closest" #"TODO: consider GCM interpolation?" precp = xr_ccsm.PRECC.sel(lat=lat, lon=lon, method='nearest') + xr_ccsm.PRECL.sel(lat=lat, lon=lon, method='nearest') temp = xr_ccsm_ts.TS.sel(lat=lat, lon=lon, method='nearest') precp_pi = xr_pi.PRECC.sel(lat=lat, lon=lon, method='nearest') + xr_pi.PRECL.sel(lat=lat, lon=lon, method='nearest') temp_pi = xr_pi.TS.sel(lat=lat, lon=lon, method='nearest') #convert temp from K to C temp = temp - 273.15 temp_pi = temp_pi - 273.15 #Take anomaly for CCSM data (with preindustrial control) for i in range(12): temp.values[i] = temp.values[i] - temp_pi.values[i] for i in range(12): precp.values[i] = precp.values[i] - precp_pi.values[i] #from normal years to hydrological years sm = cfg.PARAMS['hydro_month_'+gdir.hemisphere] em = sm - 1 if (sm > 1) else 12 y0 = int(pd.to_datetime(str(temp.time.values[0])).strftime('%Y')) y1 = int(pd.to_datetime(str(temp.time.values[-1])).strftime('%Y')) #time string for temp/precip (hydro years) time = pd.period_range('{}-{:02d}'.format(y0, sm),'{}-{:02d}'.format(y1, em), freq='M') #reorder single year of equil data & add correct time #calculate place in array to concat from (2 for ccsm data) conc_start = sm-2 conc_end = sm-14 temp_hydro = xr.concat([temp[conc_start:],temp[:conc_end]],dim="time") precp_hydro = xr.concat([precp[conc_start:],precp[:conc_end]],dim="time") temp_hydro['time'] = time precp_hydro['time'] = time temp = temp_hydro precp = precp_hydro # Workaround for https://github.com/pydata/xarray/issues/1565 temp['month'] = ('time', time.month) precp['month'] = ('time', time.month) temp['year'] = ('time', time.year) temp['year'] = ('time', time.year) ny, r = divmod(len(temp.time), 12) assert r == 0 #Convert m s-1 to mm mth-1 (for precp) ndays = np.tile(cfg.DAYS_IN_MONTH, y1-y0) precp = precp * ndays * (60 * 60 * 24 * 1000) #"Get CRU to apply the anomaly to" fpath = gdir.get_filepath('climate_monthly') ds_cru = xr.open_dataset(fpath) #"Add the climate anomaly to CRU clim" dscru = ds_cru.sel(time=slice('1961', '1990')) # temp loc_temp = dscru.temp.groupby('time.month').mean() #Oct-Sept format preserved ts_tmp = temp.groupby(temp.month) + loc_temp #for prcp loc_pre = dscru.prcp.groupby('time.month').mean() ts_pre = precp.groupby(precp.month) + loc_pre #load dates into save format fpath = cfg.PATHS['climate_file'] dsindex = salem.GeoNetcdf(fpath, monthbegin=True) time1 = dsindex.variables['time'] #weird recursive way to getting the dates in the correct format to save #only nessisary for 1 year of data, in order to rearrange the months and #get the correct year. time_array = [datetime(temp.time.year[i], temp.time.month[i],1) for i in range(12)] time_nums = netCDF4.date2num(time_array, time1.units, calendar='noleap') time2 = netCDF4.num2date(time_nums[:], time1.units, calendar='noleap') assert np.all(np.isfinite(ts_pre.values)) assert np.all(np.isfinite(ts_tmp.values)) #"back to -180 - 180" loc_lon = precp.lon if precp.lon <= 180 else precp.lon - 360 #write to netcdf gdir.write_monthly_climate_file(time2, ts_pre.values, ts_tmp.values, float(dscru.ref_hgt), loc_lon, precp.lat.values, time_unit=time1.units, file_name='gcm_data', filesuffix=filesuffix) #dsindex._nc.close() xr_ccsm.close() xr_ccsm_ts.close() xr_pi.close() ds_cru.close()