def test_get_ecmwf_file(self): from oggm.shop import ecmwf for d, vars in ecmwf.BASENAMES.items(): for v, _ in vars.items(): assert os.path.isfile(ecmwf.get_ecmwf_file(d, v)) with pytest.raises(ValueError): ecmwf.get_ecmwf_file('ERA5', 'zoup') with pytest.raises(ValueError): ecmwf.get_ecmwf_file('zoup', 'tmp')
def test_ERA5_daily_dataset(self): dataset = 'ERA5_daily' ds_ERA5_daily = xr.open_dataset(get_ecmwf_file(dataset, 'tmp')) ds_ERA5_daily['time.month'][0] == 1 ds_ERA5_daily['time.month'][-1] == 12 # checks if it is in Kelvin assert np.all(ds_ERA5_daily.t2m > 0) # not too high temperatures assert np.max(ds_ERA5_daily.t2m) < 350 # ERA5 daily should start in 1979 and end in 2018 assert ds_ERA5_daily['time.year'][0] == 1979 assert ds_ERA5_daily['time.year'][-1] == 2018 assert ds_ERA5_daily['time.month'][0] == 1 assert ds_ERA5_daily['time.month'][-1] == 12 # compare the daily dataset to the monthly: ds_ERA5 = xr.open_dataset(get_ecmwf_file('ERA5', 'tmp')) # check if for Hintereisferner, both datasets produce similar monthly # temperature time series if ERA5_daily is resampled over month lon = 10.7584 lat = 46.8003 # compute all the distances and choose nearest gridpoint # this also checks if the flattened version is used! c = (ds_ERA5_daily.longitude - lon)**2 + (ds_ERA5_daily.latitude - lat)**2 ds_ERA5_daily_g = ds_ERA5_daily.isel(points=c.argmin()) ds_ERA5_g = ds_ERA5.sel(time=slice('1979-01-01', '2018-12-01')).sel(longitude=lon, latitude=lat, method='nearest') # do we use the same longitude/latitudes assert ds_ERA5_daily_g.longitude.values == ds_ERA5_g.longitude.values assert ds_ERA5_daily_g.latitude.values == ds_ERA5_g.latitude.values # do the two datasets have the same monthly temperatures? # at the neares gridpoint to HEF (some rounding errors are allowed) assert_allclose(ds_ERA5_daily_g.resample(time='1M').mean().t2m.values, ds_ERA5_g.t2m.values, rtol=1e-4)
def test_WFDE5_W5E5_daily_dataset(self): dataset = 'WFDE5_CRU_daily' path_tmp = get_w5e5_file(dataset, 'tmp') path_prcp = get_w5e5_file(dataset, 'prcp') ds_WFDE5_daily_tmp = xr.open_dataset(path_tmp) ds_WFDE5_daily_prcp = xr.open_dataset(path_prcp) dataset = 'W5E5_daily' path_tmp = get_w5e5_file(dataset, 'tmp') path_prcp = get_w5e5_file(dataset, 'prcp') #pathi = ('/home/lilianschuster/Schreibtisch/PhD/WP0_bayesian/' # 'WPx_WFDE5/wfde5_cru/daily/v1.1/' # 'wfde5_cru_tmp_1979-2018_flat.nc') ds_W5E5_daily_tmp = xr.open_dataset(path_tmp) ds_W5E5_daily_prcp = xr.open_dataset(path_prcp) # xr.open_dataset(get_ecmwf_file(dataset, 'tmp')) for ds in [ds_WFDE5_daily_tmp, ds_WFDE5_daily_prcp, ds_W5E5_daily_tmp, ds_W5E5_daily_prcp]: assert ds['time.month'][0] == 1 assert ds['time.month'][-1] == 12 assert ds['time.year'][0] == 1979 for ds_W5E5 in [ds_W5E5_daily_tmp, ds_W5E5_daily_prcp]: assert ds_W5E5['time.year'][-1] == 2019 # WFDE5 old version should start in 1979 and end in 2018 for ds_WFDE5 in [ds_WFDE5_daily_tmp, ds_WFDE5_daily_prcp]: assert ds_WFDE5['time.year'][-1] == 2018 # temperature checks # checks if it is in Kelvin assert np.all(ds_WFDE5_daily_tmp.Tair > 0) assert np.all(ds_W5E5_daily_tmp.tas>0) # not too high temperatures assert np.max(ds_WFDE5_daily_tmp.Tair) < 350 assert np.max(ds_W5E5_daily_tmp.tas) < 350 # prcp checks assert np.all(ds_WFDE5_daily_prcp.tp >= 0) assert np.all(ds_W5E5_daily_prcp.pr >= 0) # they have different prcp units # (is accounted for in process_W5E5_data) assert ds_W5E5_daily_prcp.pr.units == 'kg m-2 s-1' assert ds_W5E5_daily_prcp.pr.max() < 1e-2 assert ds_WFDE5_daily_prcp.tp.units == 'kg m-2 day-1 ~ mm/day' assert ds_WFDE5_daily_prcp.tp.max() > 100 # compare the daily dataset to the monthly: ds_ERA5 = xr.open_dataset(get_ecmwf_file('ERA5', 'tmp')) # check if for Hintereisferner, both datasets produce similar monthly # temperature time series if ERA5_daily is resampled over month lon = 10.7584 lat = 46.8003 # compute all the distances and choose nearest gridpoint # this also checks if the flattened version is used! c_wfde5 = ((ds_WFDE5_daily_tmp.longitude - lon)**2 + (ds_WFDE5_daily_tmp.latitude - lat)**2) ds_WFDE5_daily_tmp_g = ds_WFDE5_daily_tmp.isel(points=c_wfde5.argmin()) c_w5e5 = ((ds_W5E5_daily_tmp.longitude - lon)**2 + (ds_W5E5_daily_tmp.latitude - lat)**2) ds_W5E5_daily_tmp_g = ds_W5E5_daily_tmp.isel(points=c_w5e5.argmin()) ds_W5E5_daily_tmp_g = ds_W5E5_daily_tmp_g.sel(time=slice('1979-01-01', '2018-12-01')) ds_ERA5_g = ds_ERA5.sel(time=slice('1979-01-01', '2018-12-01')).sel(longitude=lon, latitude=lat, method='nearest') # do we use a similar longitude/latitude? (not exactly the same, as ERA5 is # finer than WFDE5/W5E5) assert_allclose(ds_WFDE5_daily_tmp_g.longitude.values, ds_ERA5_g.longitude.values, atol=0.6) assert_allclose(ds_W5E5_daily_tmp_g.longitude.values, ds_ERA5_g.longitude.values, atol=0.6) assert_allclose(ds_WFDE5_daily_tmp_g.latitude.values, ds_ERA5_g.latitude.values, atol=0.6) assert_allclose(ds_W5E5_daily_tmp_g.latitude.values, ds_ERA5_g.latitude.values, atol=0.6) # do the three datasets have similar monthly temperature for HEF # at the nearest gridpoint to HEF (wfde5 temp against HEF temp. ) wfde5_tmp_m = ds_WFDE5_daily_tmp_g.resample(time='MS').mean().Tair.values w5e5_tmp_m = ds_W5E5_daily_tmp_g.resample(time='MS').mean().tas.values tmp_corr_wfde5 = np.corrcoef(wfde5_tmp_m, ds_ERA5_g.t2m.values)[0][1] tmp_corr_w5e5 = np.corrcoef(w5e5_tmp_m, ds_ERA5_g.t2m.values)[0][1] assert tmp_corr_wfde5 > 0.95 assert tmp_corr_w5e5 > 0.95
def process_era5_daily_data(gdir, y0=None, y1=None, output_filesuffix='_daily', cluster=False): """Processes and writes the era5 daily baseline climate data for a glacier. into climate_historical_daily.nc Extracts the nearest timeseries and writes everything to a NetCDF file. This uses only the ERA5 daily temperatures. The precipitation, lapse rate and standard deviations are used from ERA5dr. TODO: see _verified_download_helper no known hash for era5_daily_t2m_1979-2018_flat.nc and era5_glacier_invariant_flat ---------- y0 : int the starting year of the timeseries to write. The default is to take the entire time period available in the file, but with this kwarg you can shorten it (to save space or to crop bad data) y1 : int the starting year of the timeseries to write. The default is to take the entire time period available in the file, but with this kwarg you can shorten it (to save space or to crop bad data) output_filesuffix : str this add a suffix to the output file (useful to avoid overwriting previous experiments) cluster : bool default is False, if this is run on the cluster, set it to True, because we do not need to download the files """ # era5daily only for temperature dataset = 'ERA5_daily' # for the other variables use the data of ERA5dr dataset_othervars = 'ERA5dr' # get the central longitude/latidudes of the glacier lon = gdir.cenlon + 360 if gdir.cenlon < 0 else gdir.cenlon lat = gdir.cenlat cluster_path = '/home/www/oggm/climate/' if cluster: path = cluster_path + BASENAMES[dataset]['tmp'] else: path = get_ecmwf_file(dataset, 'tmp') # Use xarray to read the data # would go faster with netCDF -.- with xr.open_dataset(path) as ds: assert ds.longitude.min() >= 0 # set temporal subset for the ts data (hydro years) if gdir.hemisphere == 'nh': sm = cfg.PARAMS['hydro_month_nh'] elif gdir.hemisphere == 'sh': sm = cfg.PARAMS['hydro_month_sh'] em = sm - 1 if (sm > 1) else 12 yrs = ds['time.year'].data y0 = yrs[0] if y0 is None else y0 y1 = yrs[-1] if y1 is None else y1 if y1 > 2018 or y0 < 1979: text = 'The climate files only go from 1979--2018,\ choose another y0 and y1' raise InvalidParamsError(text) # if default settings: this is the last day in March or September time_f = '{}-{:02d}'.format(y1, em) end_day = int(ds.sel(time=time_f).time.dt.daysinmonth[-1].values) # this was tested also for hydro_month = 1 ds = ds.sel(time=slice('{}-{:02d}-01'.format(y0, sm), '{}-{:02d}-{}'.format(y1, em, end_day))) try: # computing all the distances and choose the nearest gridpoint c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2 ds = ds.isel(points=c.argmin()) # I turned this around except ValueError: ds = ds.sel(longitude=lon, latitude=lat, method='nearest') # normally if I do the flattening, this here should not occur # temperature should be in degree Celsius for the glacier climate files temp = ds['t2m'].data - 273.15 time = ds.time.data ref_lon = float(ds['longitude']) ref_lat = float(ds['latitude']) ref_lon = ref_lon - 360 if ref_lon > 180 else ref_lon # pre should be done as in ERA5dr datasets with xr.open_dataset(get_ecmwf_file(dataset_othervars, 'pre')) as ds: assert ds.longitude.min() >= 0 yrs = ds['time.year'].data y0 = yrs[0] if y0 is None else y0 y1 = yrs[-1] if y1 is None else y1 # Attention here we take the same y0 and y1 as given from the # daily tmp dataset (goes till end of 2018) ds = ds.sel(time=slice('{}-{:02d}-01'.format(y0, sm), '{}-{:02d}-01'.format(y1, em))) try: # prcp is not flattened, so this here should work normally ds = ds.sel(longitude=lon, latitude=lat, method='nearest') except ValueError: # if Flattened ERA5_precipitation? c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2 ds = ds.isel(points=c.argmin()) # the prcp dataset needs to be restructured to have values for each day prcp = ds['tp'].data * 1000 # just assume that precipitation is every day the same: prcp = np.repeat(prcp, ds['time.daysinmonth']) # Attention the unit is now prcp per day # (not per month as in OGGM default: # prcp = ds['tp'].data * 1000 * ds['time.daysinmonth'] if cluster: path_inv = cluster_path + BASENAMES[dataset]['inv'] else: path_inv = get_ecmwf_file(dataset, 'inv') with xr.open_dataset(path_inv) as ds: assert ds.longitude.min() >= 0 ds = ds.isel(time=0) try: # Flattened ERA5_invariant (only possibility at the moment) c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2 ds = ds.isel(points=c.argmin()) except ValueError: # this should not occur ds = ds.sel(longitude=lon, latitude=lat, method='nearest') G = cfg.G # 9.80665 hgt = ds['z'].data / G gradient = None temp_std = None path_lapserates = get_ecmwf_file(dataset_othervars, 'lapserates') with xr.open_dataset(path_lapserates) as ds: assert ds.longitude.min() >= 0 yrs = ds['time.year'].data y0 = yrs[0] if y0 is None else y0 y1 = yrs[-1] if y1 is None else y1 # Attention here we take the same y0 and y1 as given from the # daily tmp dataset (goes till end of 2018) ds = ds.sel(time=slice('{}-{:02d}-01'.format(y0, sm), '{}-{:02d}-01'.format(y1, em))) # no flattening done for the ERA5dr gradient dataset ds = ds.sel(longitude=lon, latitude=lat, method='nearest') # get the monthly gradient values gradient = ds['lapserate'].data # gradient needs to be restructured to have values for each day gradient = np.repeat(gradient, ds['time.daysinmonth']) # assume same gradient for each day # OK, ready to write write_climate_file(gdir, time, prcp, temp, hgt, ref_lon, ref_lat, filesuffix=output_filesuffix, gradient=gradient, temp_std=temp_std, source=dataset, file_name='climate_historical')