Exemple #1
0
    def test_get_ecmwf_file(self):
        from oggm.shop import ecmwf
        for d, vars in ecmwf.BASENAMES.items():
            for v, _ in vars.items():
                assert os.path.isfile(ecmwf.get_ecmwf_file(d, v))

        with pytest.raises(ValueError):
            ecmwf.get_ecmwf_file('ERA5', 'zoup')
        with pytest.raises(ValueError):
            ecmwf.get_ecmwf_file('zoup', 'tmp')
    def test_ERA5_daily_dataset(self):

        dataset = 'ERA5_daily'

        ds_ERA5_daily = xr.open_dataset(get_ecmwf_file(dataset, 'tmp'))
        ds_ERA5_daily['time.month'][0] == 1
        ds_ERA5_daily['time.month'][-1] == 12

        # checks if it is in Kelvin
        assert np.all(ds_ERA5_daily.t2m > 0)
        # not too high temperatures
        assert np.max(ds_ERA5_daily.t2m) < 350

        # ERA5 daily should start in 1979 and end in 2018
        assert ds_ERA5_daily['time.year'][0] == 1979
        assert ds_ERA5_daily['time.year'][-1] == 2018
        assert ds_ERA5_daily['time.month'][0] == 1
        assert ds_ERA5_daily['time.month'][-1] == 12

        # compare the daily dataset to the monthly:
        ds_ERA5 = xr.open_dataset(get_ecmwf_file('ERA5', 'tmp'))

        # check if for Hintereisferner, both datasets produce similar monthly
        # temperature time series if ERA5_daily is resampled over month
        lon = 10.7584
        lat = 46.8003

        # compute all the distances and choose nearest gridpoint
        # this also checks if the flattened version is used!
        c = (ds_ERA5_daily.longitude - lon)**2 + (ds_ERA5_daily.latitude - lat)**2
        ds_ERA5_daily_g = ds_ERA5_daily.isel(points=c.argmin())

        ds_ERA5_g = ds_ERA5.sel(time=slice('1979-01-01',
                                           '2018-12-01')).sel(longitude=lon,
                                                              latitude=lat,
                                                              method='nearest')
        # do we use the same longitude/latitudes
        assert ds_ERA5_daily_g.longitude.values == ds_ERA5_g.longitude.values
        assert ds_ERA5_daily_g.latitude.values == ds_ERA5_g.latitude.values

        # do the two datasets have the same monthly temperatures?
        # at the neares gridpoint to HEF (some rounding errors are allowed)
        assert_allclose(ds_ERA5_daily_g.resample(time='1M').mean().t2m.values,
                        ds_ERA5_g.t2m.values, rtol=1e-4)
    def test_WFDE5_W5E5_daily_dataset(self):

        dataset = 'WFDE5_CRU_daily'
        path_tmp = get_w5e5_file(dataset, 'tmp')
        path_prcp = get_w5e5_file(dataset, 'prcp')
        ds_WFDE5_daily_tmp = xr.open_dataset(path_tmp)
        ds_WFDE5_daily_prcp = xr.open_dataset(path_prcp)

        dataset = 'W5E5_daily'
        path_tmp = get_w5e5_file(dataset, 'tmp')
        path_prcp = get_w5e5_file(dataset, 'prcp')
        #pathi = ('/home/lilianschuster/Schreibtisch/PhD/WP0_bayesian/'
        #                 'WPx_WFDE5/wfde5_cru/daily/v1.1/'
        #                 'wfde5_cru_tmp_1979-2018_flat.nc')
        ds_W5E5_daily_tmp = xr.open_dataset(path_tmp)
        ds_W5E5_daily_prcp = xr.open_dataset(path_prcp)

        # xr.open_dataset(get_ecmwf_file(dataset, 'tmp'))
        for ds in [ds_WFDE5_daily_tmp, ds_WFDE5_daily_prcp,
                   ds_W5E5_daily_tmp, ds_W5E5_daily_prcp]:
            assert ds['time.month'][0] == 1
            assert ds['time.month'][-1] == 12
            assert ds['time.year'][0] == 1979

        for ds_W5E5 in [ds_W5E5_daily_tmp, ds_W5E5_daily_prcp]:
            assert ds_W5E5['time.year'][-1] == 2019

        # WFDE5 old version should start in 1979 and end in 2018
        for ds_WFDE5 in [ds_WFDE5_daily_tmp, ds_WFDE5_daily_prcp]:
            assert ds_WFDE5['time.year'][-1] == 2018

        # temperature checks
        # checks if it is in Kelvin
        assert np.all(ds_WFDE5_daily_tmp.Tair > 0)
        assert np.all(ds_W5E5_daily_tmp.tas>0)
        # not too high temperatures
        assert np.max(ds_WFDE5_daily_tmp.Tair) < 350
        assert np.max(ds_W5E5_daily_tmp.tas) < 350

        # prcp checks
        assert np.all(ds_WFDE5_daily_prcp.tp >= 0)
        assert np.all(ds_W5E5_daily_prcp.pr >= 0)
        # they have different prcp units
        # (is accounted for in process_W5E5_data)
        assert ds_W5E5_daily_prcp.pr.units == 'kg m-2 s-1'
        assert ds_W5E5_daily_prcp.pr.max() < 1e-2
        assert ds_WFDE5_daily_prcp.tp.units == 'kg m-2 day-1 ~ mm/day'
        assert ds_WFDE5_daily_prcp.tp.max() > 100

        # compare the daily dataset to the monthly:
        ds_ERA5 = xr.open_dataset(get_ecmwf_file('ERA5', 'tmp'))

        # check if for Hintereisferner, both datasets produce similar monthly
        # temperature time series if ERA5_daily is resampled over month
        lon = 10.7584
        lat = 46.8003

        # compute all the distances and choose nearest gridpoint
        # this also checks if the flattened version is used!
        c_wfde5 = ((ds_WFDE5_daily_tmp.longitude - lon)**2
             + (ds_WFDE5_daily_tmp.latitude - lat)**2)
        ds_WFDE5_daily_tmp_g = ds_WFDE5_daily_tmp.isel(points=c_wfde5.argmin())
        c_w5e5 = ((ds_W5E5_daily_tmp.longitude - lon)**2
             + (ds_W5E5_daily_tmp.latitude - lat)**2)
        ds_W5E5_daily_tmp_g = ds_W5E5_daily_tmp.isel(points=c_w5e5.argmin())
        ds_W5E5_daily_tmp_g = ds_W5E5_daily_tmp_g.sel(time=slice('1979-01-01',
                                           '2018-12-01'))

        ds_ERA5_g = ds_ERA5.sel(time=slice('1979-01-01',
                                           '2018-12-01')).sel(longitude=lon,
                                                              latitude=lat,
                                                              method='nearest')
        # do we use a similar longitude/latitude? (not exactly the same, as ERA5 is
        # finer than WFDE5/W5E5)
        assert_allclose(ds_WFDE5_daily_tmp_g.longitude.values,
                        ds_ERA5_g.longitude.values,
                        atol=0.6)
        assert_allclose(ds_W5E5_daily_tmp_g.longitude.values,
                        ds_ERA5_g.longitude.values,
                        atol=0.6)
        assert_allclose(ds_WFDE5_daily_tmp_g.latitude.values,
                        ds_ERA5_g.latitude.values,
                        atol=0.6)
        assert_allclose(ds_W5E5_daily_tmp_g.latitude.values,
                        ds_ERA5_g.latitude.values,
                        atol=0.6)

        # do the three datasets have similar monthly temperature for HEF
        # at the nearest gridpoint to HEF (wfde5 temp against HEF temp. )
        wfde5_tmp_m = ds_WFDE5_daily_tmp_g.resample(time='MS').mean().Tair.values
        w5e5_tmp_m = ds_W5E5_daily_tmp_g.resample(time='MS').mean().tas.values

        tmp_corr_wfde5 = np.corrcoef(wfde5_tmp_m,
                              ds_ERA5_g.t2m.values)[0][1]
        tmp_corr_w5e5 = np.corrcoef(w5e5_tmp_m,
                               ds_ERA5_g.t2m.values)[0][1]
        assert tmp_corr_wfde5 > 0.95
        assert tmp_corr_w5e5 > 0.95
Exemple #4
0
def process_era5_daily_data(gdir,
                            y0=None,
                            y1=None,
                            output_filesuffix='_daily',
                            cluster=False):
    """Processes and writes the era5 daily baseline climate data for a glacier.
    into climate_historical_daily.nc

    Extracts the nearest timeseries and writes everything to a NetCDF file.
    This uses only the ERA5 daily temperatures. The precipitation, lapse
    rate and standard deviations are used from ERA5dr.

    TODO: see _verified_download_helper no known hash for
    era5_daily_t2m_1979-2018_flat.nc and era5_glacier_invariant_flat
    ----------
    y0 : int
        the starting year of the timeseries to write. The default is to take
        the entire time period available in the file, but with this kwarg
        you can shorten it (to save space or to crop bad data)
    y1 : int
        the starting year of the timeseries to write. The default is to take
        the entire time period available in the file, but with this kwarg
        you can shorten it (to save space or to crop bad data)
    output_filesuffix : str
        this add a suffix to the output file (useful to avoid overwriting
        previous experiments)
    cluster : bool
        default is False, if this is run on the cluster, set it to True,
        because we do not need to download the files

    """

    # era5daily only for temperature
    dataset = 'ERA5_daily'
    # for the other variables use the data of ERA5dr
    dataset_othervars = 'ERA5dr'

    # get the central longitude/latidudes of the glacier
    lon = gdir.cenlon + 360 if gdir.cenlon < 0 else gdir.cenlon
    lat = gdir.cenlat

    cluster_path = '/home/www/oggm/climate/'

    if cluster:
        path = cluster_path + BASENAMES[dataset]['tmp']
    else:
        path = get_ecmwf_file(dataset, 'tmp')

    # Use xarray to read the data
    # would go faster with netCDF -.-
    with xr.open_dataset(path) as ds:
        assert ds.longitude.min() >= 0

        # set temporal subset for the ts data (hydro years)
        if gdir.hemisphere == 'nh':
            sm = cfg.PARAMS['hydro_month_nh']
        elif gdir.hemisphere == 'sh':
            sm = cfg.PARAMS['hydro_month_sh']

        em = sm - 1 if (sm > 1) else 12

        yrs = ds['time.year'].data
        y0 = yrs[0] if y0 is None else y0
        y1 = yrs[-1] if y1 is None else y1

        if y1 > 2018 or y0 < 1979:
            text = 'The climate files only go from 1979--2018,\
                choose another y0 and y1'

            raise InvalidParamsError(text)
        # if default settings: this is the last day in March or September
        time_f = '{}-{:02d}'.format(y1, em)
        end_day = int(ds.sel(time=time_f).time.dt.daysinmonth[-1].values)

        #  this was tested also for hydro_month = 1
        ds = ds.sel(time=slice('{}-{:02d}-01'.format(y0, sm),
                               '{}-{:02d}-{}'.format(y1, em, end_day)))

        try:
            # computing all the distances and choose the nearest gridpoint
            c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
            ds = ds.isel(points=c.argmin())
        # I turned this around
        except ValueError:
            ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
            # normally if I do the flattening, this here should not occur

        # temperature should be in degree Celsius for the glacier climate files
        temp = ds['t2m'].data - 273.15
        time = ds.time.data

        ref_lon = float(ds['longitude'])
        ref_lat = float(ds['latitude'])

        ref_lon = ref_lon - 360 if ref_lon > 180 else ref_lon

    # pre should be done as in ERA5dr datasets
    with xr.open_dataset(get_ecmwf_file(dataset_othervars, 'pre')) as ds:
        assert ds.longitude.min() >= 0

        yrs = ds['time.year'].data
        y0 = yrs[0] if y0 is None else y0
        y1 = yrs[-1] if y1 is None else y1
        # Attention here we take the same y0 and y1 as given from the
        # daily tmp dataset (goes till end of 2018)

        ds = ds.sel(time=slice('{}-{:02d}-01'.format(y0, sm),
                               '{}-{:02d}-01'.format(y1, em)))
        try:
            # prcp is not flattened, so this here should work normally
            ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
        except ValueError:
            # if Flattened ERA5_precipitation?
            c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
            ds = ds.isel(points=c.argmin())

        # the prcp dataset needs to be restructured to have values for each day
        prcp = ds['tp'].data * 1000
        # just assume that precipitation is every day the same:
        prcp = np.repeat(prcp, ds['time.daysinmonth'])
        # Attention the unit is now prcp per day
        # (not per month as in OGGM default:
        # prcp = ds['tp'].data * 1000 * ds['time.daysinmonth']

    if cluster:
        path_inv = cluster_path + BASENAMES[dataset]['inv']
    else:
        path_inv = get_ecmwf_file(dataset, 'inv')
    with xr.open_dataset(path_inv) as ds:
        assert ds.longitude.min() >= 0
        ds = ds.isel(time=0)
        try:
            # Flattened ERA5_invariant (only possibility at the moment)
            c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
            ds = ds.isel(points=c.argmin())
        except ValueError:
            # this should not occur
            ds = ds.sel(longitude=lon, latitude=lat, method='nearest')

        G = cfg.G  # 9.80665
        hgt = ds['z'].data / G

    gradient = None
    temp_std = None
    path_lapserates = get_ecmwf_file(dataset_othervars, 'lapserates')
    with xr.open_dataset(path_lapserates) as ds:
        assert ds.longitude.min() >= 0

        yrs = ds['time.year'].data
        y0 = yrs[0] if y0 is None else y0
        y1 = yrs[-1] if y1 is None else y1
        # Attention here we take the same y0 and y1 as given from the
        # daily tmp dataset (goes till end of 2018)

        ds = ds.sel(time=slice('{}-{:02d}-01'.format(y0, sm),
                               '{}-{:02d}-01'.format(y1, em)))

        # no flattening done for the ERA5dr gradient dataset
        ds = ds.sel(longitude=lon, latitude=lat, method='nearest')

        # get the monthly gradient values
        gradient = ds['lapserate'].data

        # gradient needs to be restructured to have values for each day
        gradient = np.repeat(gradient, ds['time.daysinmonth'])
        # assume same gradient for each day

    # OK, ready to write
    write_climate_file(gdir,
                       time,
                       prcp,
                       temp,
                       hgt,
                       ref_lon,
                       ref_lat,
                       filesuffix=output_filesuffix,
                       gradient=gradient,
                       temp_std=temp_std,
                       source=dataset,
                       file_name='climate_historical')