Example #1
0
def process_custom_climate_data(gdir):
    """Processes and writes the climate data from a user-defined climate file.

    The input file must have a specific format (see
    oggm-sample-data/test-files/histalp_merged_hef.nc for an example).

    Uses caching for faster retrieval.

    This is the way OGGM does it for the Alps (HISTALP).
    """

    if not (('climate_file' in cfg.PATHS)
            and os.path.exists(cfg.PATHS['climate_file'])):
        raise IOError('Custom climate file not found')

    # read the file
    fpath = cfg.PATHS['climate_file']
    nc_ts = salem.GeoNetcdf(fpath)

    # set temporal subset for the ts data (hydro years)
    yrs = nc_ts.time.year
    y0, y1 = yrs[0], yrs[-1]
    nc_ts.set_period(t0='{}-10-01'.format(y0), t1='{}-09-01'.format(y1))
    time = nc_ts.time
    ny, r = divmod(len(time), 12)
    if r != 0:
        raise ValueError('Climate data should be N full years exclusively')

    # Units
    assert nc_ts._nc.variables['hgt'].units.lower() in [
        'm', 'meters', 'meter', 'metres', 'metre'
    ]
    assert nc_ts._nc.variables['temp'].units.lower() in [
        'degc', 'degrees', 'degree', 'c'
    ]
    assert nc_ts._nc.variables['prcp'].units.lower() in [
        'kg m-2', 'l m-2', 'mm', 'millimeters', 'millimeter'
    ]

    # geoloc
    lon = nc_ts._nc.variables['lon'][:]
    lat = nc_ts._nc.variables['lat'][:]

    # Gradient defaults
    use_grad = cfg.PARAMS['temp_use_local_gradient']
    def_grad = cfg.PARAMS['temp_default_gradient']
    g_minmax = cfg.PARAMS['temp_local_gradient_bounds']

    ilon = np.argmin(np.abs(lon - gdir.cenlon))
    ilat = np.argmin(np.abs(lat - gdir.cenlat))
    ref_pix_lon = lon[ilon]
    ref_pix_lat = lat[ilat]
    iprcp, itemp, igrad, ihgt = utils.joblib_read_climate(
        fpath, ilon, ilat, def_grad, g_minmax, use_grad)
    gdir.write_monthly_climate_file(time, iprcp, itemp, igrad, ihgt,
                                    ref_pix_lon, ref_pix_lat)
    # metadata
    out = {'climate_source': fpath, 'hydro_yr_0': y0 + 1, 'hydro_yr_1': y1}
    gdir.write_pickle(out, 'climate_info')
Example #2
0
def process_cru_data(gdir):
    """Processes and writes the climate data for this glacier.

    Interpolates the CRU TS data to the high-resolution CL2 climatologies
    (provided with OGGM) and writes everything to a NetCDF file.
    """

    # read the climatology
    clfile = utils.get_cru_cl_file()
    ncclim = salem.GeoNetcdf(clfile)
    # and the TS data
    nc_ts_tmp = salem.GeoNetcdf(utils.get_cru_file('tmp'), monthbegin=True)
    nc_ts_pre = salem.GeoNetcdf(utils.get_cru_file('pre'), monthbegin=True)

    # set temporal subset for the ts data (hydro years)
    yrs = nc_ts_pre.time.year
    y0, y1 = yrs[0], yrs[-1]
    nc_ts_tmp.set_period(t0='{}-10-01'.format(y0), t1='{}-09-01'.format(y1))
    nc_ts_pre.set_period(t0='{}-10-01'.format(y0), t1='{}-09-01'.format(y1))
    time = nc_ts_pre.time
    ny, r = divmod(len(time), 12)
    assert r == 0

    # gradient default params
    use_grad = cfg.PARAMS['temp_use_local_gradient']
    def_grad = cfg.PARAMS['temp_default_gradient']
    g_minmax = cfg.PARAMS['temp_local_gradient_bounds']

    lon = gdir.cenlon
    lat = gdir.cenlat

    # This is guaranteed to work because I prepared the file (I hope)
    ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

    # get climatology data
    loc_hgt = ncclim.get_vardata('elev')
    loc_tmp = ncclim.get_vardata('temp')
    loc_pre = ncclim.get_vardata('prcp')
    loc_lon = ncclim.get_vardata('lon')
    loc_lat = ncclim.get_vardata('lat')

    # see if the center is ok
    if not np.isfinite(loc_hgt[1, 1]):
        # take another candidate where finite
        isok = np.isfinite(loc_hgt)

        # wait: some areas are entirely NaNs, make the subset larger
        _margin = 1
        while not np.any(isok):
            _margin += 1
            ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=_margin)
            loc_hgt = ncclim.get_vardata('elev')
            isok = np.isfinite(loc_hgt)
        if _margin > 1:
            log.debug('%s: I had to look up for far climate pixels: %s',
                      gdir.rgi_id, _margin)

        # Take the first candidate (doesn't matter which)
        lon, lat = ncclim.grid.ll_coordinates
        lon = lon[isok][0]
        lat = lat[isok][0]
        # Resubset
        ncclim.set_subset()
        ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
        loc_hgt = ncclim.get_vardata('elev')
        loc_tmp = ncclim.get_vardata('temp')
        loc_pre = ncclim.get_vardata('prcp')
        loc_lon = ncclim.get_vardata('lon')
        loc_lat = ncclim.get_vardata('lat')

    assert np.isfinite(loc_hgt[1, 1])
    isok = np.isfinite(loc_hgt)
    hgt_f = loc_hgt[isok].flatten()
    assert len(hgt_f) > 0.
    ts_grad = np.zeros(12) + def_grad
    if use_grad and len(hgt_f) >= 5:
        for i in range(12):
            loc_tmp_mth = loc_tmp[i, ...][isok].flatten()
            slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth)
            ts_grad[i] = slope if (p_val < 0.01) else def_grad
    # ... but dont exaggerate too much
    ts_grad = np.clip(ts_grad, g_minmax[0], g_minmax[1])
    # convert to timeserie and hydroyears
    ts_grad = ts_grad.tolist()
    ts_grad = ts_grad[9:] + ts_grad[0:9]
    ts_grad = np.asarray(ts_grad * ny)

    # maybe this will throw out of bounds warnings
    nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
    nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

    # compute monthly anomalies
    # of temp
    ts_tmp = nc_ts_tmp.get_vardata('tmp', as_xarray=True)
    ts_tmp_avg = ts_tmp.sel(time=slice('1961-01-01', '1990-12-01'))
    ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time')
    ts_tmp = ts_tmp.groupby('time.month') - ts_tmp_avg
    # of precip
    ts_pre = nc_ts_pre.get_vardata('pre', as_xarray=True)
    ts_pre_avg = ts_pre.sel(time=slice('1961-01-01', '1990-12-01'))
    ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time')
    ts_pre = ts_pre.groupby('time.month') - ts_pre_avg

    # interpolate to HR grid
    if np.any(~np.isfinite(ts_tmp[:, 1, 1])):
        # Extreme case, middle pix is not valid
        # take any valid pix from the 3*3 (and hope there's one)
        found_it = False
        for idi in range(2):
            for idj in range(2):
                if np.all(np.isfinite(ts_tmp[:, idj, idi])):
                    ts_tmp[:, 1, 1] = ts_tmp[:, idj, idi]
                    ts_pre[:, 1, 1] = ts_pre[:, idj, idi]
                    found_it = True
        if not found_it:
            msg = '{}: OMG there is no climate data'.format(gdir.rgi_id)
            raise RuntimeError(msg)
    elif np.any(~np.isfinite(ts_tmp)):
        # maybe the side is nan, but we can do nearest
        ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid,
                                               interp='nearest')
        ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid,
                                               interp='nearest')
    else:
        # We can do bilinear
        ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid,
                                               interp='linear')
        ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid,
                                               interp='linear')

    # take the center pixel and add it to the CRU CL clim
    # for temp
    loc_tmp = xr.DataArray(loc_tmp[:, 1, 1], dims=['month'],
                           coords={'month':ts_tmp_avg.month})
    ts_tmp = xr.DataArray(ts_tmp[:, 1, 1], dims=['time'],
                           coords={'time':time})
    ts_tmp = ts_tmp.groupby('time.month') + loc_tmp
    # for prcp
    loc_pre = xr.DataArray(loc_pre[:, 1, 1], dims=['month'],
                           coords={'month':ts_pre_avg.month})
    ts_pre = xr.DataArray(ts_pre[:, 1, 1], dims=['time'],
                           coords={'time':time})
    ts_pre = ts_pre.groupby('time.month') + loc_pre

    # done
    loc_hgt = loc_hgt[1, 1]
    loc_lon = loc_lon[1]
    loc_lat = loc_lat[1]
    assert np.isfinite(loc_hgt)
    assert np.all(np.isfinite(ts_pre.values))
    assert np.all(np.isfinite(ts_tmp.values))
    assert np.all(np.isfinite(ts_grad))
    gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values,
                                    ts_grad, loc_hgt, loc_lon, loc_lat)
    ncclim._nc.close()
    nc_ts_tmp._nc.close()
    nc_ts_pre._nc.close()
    # metadata
    out = {'climate_source': 'CRU data', 'hydro_yr_0': y0+1, 'hydro_yr_1': y1}
    gdir.write_pickle(out, 'climate_info')
Example #3
0
def process_cesm_data(gdir, filesuffix=''):
    """Processes and writes the climate data for this glacier.

    This function is made for interpolating the Community
    Earth System Model Last Millenial Ensemble (CESM-LME) climate simulations,
    from Otto-Bliesner et al. (2016), to the high-resolution CL2 climatologies
    (provided with OGGM) and writes everything to a NetCDF file.

    Parameters
    ----------
    filesuffix : str
        append a suffix to the filename (useful for model runs).
    """

    # GCM temperature and precipitation data
    if not (('gcm_temp_file' in cfg.PATHS) and
                    os.path.exists(cfg.PATHS['gcm_temp_file'])):
        raise IOError('GCM temp file not found')

    if not (('gcm_precc_file' in cfg.PATHS) and
                    os.path.exists(cfg.PATHS['gcm_precc_file'])):
        raise IOError('GCM precc file not found')

    if not (('gcm_precl_file' in cfg.PATHS) and
                    os.path.exists(cfg.PATHS['gcm_precl_file'])):
        raise IOError('GCM precl file not found')

    # read the files
    fpath_temp = cfg.PATHS['gcm_temp_file']
    fpath_precc = cfg.PATHS['gcm_precc_file']
    fpath_precl = cfg.PATHS['gcm_precl_file']

    tempds = xr.open_dataset(fpath_temp)
    precpcds = xr.open_dataset(fpath_precc, decode_times=False)
    preclpds = xr.open_dataset(fpath_precl, decode_times=False)

    # select for location
    lon = gdir.cenlon
    lat = gdir.cenlat

    # CESM files are in 0-360
    if lon <= 0:
        lon += 360

    # take the closest
    # TODO: consider GCM interpolation?
    temp = tempds.TREFHT.sel(lat=lat, lon=lon, method='nearest')
    precp = precpcds.PRECC.sel(lat=lat, lon=lon, method='nearest') + \
            preclpds.PRECL.sel(lat=lat, lon=lon, method='nearest')

    # from normal years to hydrological years
    precp = precp[9:-3]
    temp = temp[9:-3]
    y0 = int(temp.time.values[0].strftime('%Y'))
    y1 = int(temp.time.values[-1].strftime('%Y'))
    temp['time'] = pd.period_range('{}-10'.format(y0), '{}-9'.format(y1),
                                   freq='M')
    precp['time'] = pd.period_range('{}-10'.format(y0), '{}-9'.format(y1),
                                    freq='M')
    ny, r = divmod(len(temp.time), 12)
    assert r == 0

    # Convert m s-1 to mm mth-1
    ndays = np.tile([31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30],
                    (y1 - y0))
    precp = precp * ndays * (60 * 60 * 24 * 1000)

    # compute monthly anomalies
    year = np.array([t.year for t in temp.time.values])
    # of temp
    ts_tmp_avg = temp.isel(time=(year >= 1961) & (year <= 1990))
    ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time')
    ts_tmp = temp.groupby('time.month') - ts_tmp_avg
    # of precip
    ts_pre_avg = precp.isel(time=(year >= 1961) & (year <= 1990))
    ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time')
    ts_pre = precp.groupby('time.month') - ts_pre_avg

    # Get CRU to apply the anomaly to
    fpath = gdir.get_filepath('climate_monthly')
    dscru = xr.open_dataset(fpath)

    # Here we assume the gradient is a monthly average
    ts_grad = np.tile(dscru.grad[0:12], ny)

    # Add climate anomaly to CRU clim
    dscru = dscru.sel(time=slice('1961', '1990'))
    # for temp
    loc_tmp = dscru.temp.groupby('time.month').mean()
    ts_tmp = ts_tmp.groupby('time.month') + loc_tmp
    # for prcp
    loc_pre = dscru.prcp.groupby('time.month').mean()
    ts_pre = ts_pre.groupby('time.month') + loc_pre

    # load dates in right format to save
    dsindex = salem.GeoNetcdf(fpath_temp, monthbegin=True)
    time1 = dsindex.variables['time']
    time2 = time1[9:-3] - ndays  # from normal years to hydrological years
    time2 = netCDF4.num2date(time2, time1.units, calendar='noleap')

    assert np.all(np.isfinite(ts_pre.values))
    assert np.all(np.isfinite(ts_tmp.values))
    assert np.all(np.isfinite(ts_grad))

    # back to -180 - 180
    loc_lon = precp.lon if precp.lon <= 180 else precp.lon - 360

    gdir.write_monthly_climate_file(time2, ts_pre.values, ts_tmp.values,
                                    ts_grad, dscru.ref_hgt,
                                    loc_lon, precp.lat.values,
                                    time_unit=time1.units,
                                    file_name='cesm_data',
                                    filesuffix=filesuffix)

    dsindex._nc.close()
    tempds.close()
    precpcds.close()
    preclpds.close()
Example #4
0
def process_custom_climate_data(gdir):
    """Processes and writes the climate data from a user-defined climate file.

    The input file must have a specific format (see
    oggm-sample-data/test-files/histalp_merged_hef.nc for an example).

    Uses caching for faster retrieval.

    This is the way OGGM does it for the Alps (HISTALP).
    """

    if not (('climate_file' in cfg.PATHS)
            and os.path.exists(cfg.PATHS['climate_file'])):
        raise IOError('Custom climate file not found')

    # read the file
    fpath = cfg.PATHS['climate_file']
    nc_ts = salem.GeoNetcdf(fpath)

    # set temporal subset for the ts data (hydro years)
    sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere]
    em = sm - 1 if (sm > 1) else 12
    yrs = nc_ts.time.year
    y0, y1 = yrs[0], yrs[-1]
    nc_ts.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                     t1='{}-{:02d}-01'.format(y1, em))
    time = nc_ts.time
    ny, r = divmod(len(time), 12)
    if r != 0:
        raise ValueError('Climate data should be N full years exclusively')

    # Units
    assert nc_ts._nc.variables['hgt'].units.lower() in [
        'm', 'meters', 'meter', 'metres', 'metre'
    ]
    assert nc_ts._nc.variables['temp'].units.lower() in [
        'degc', 'degrees', 'degree', 'c'
    ]
    assert nc_ts._nc.variables['prcp'].units.lower() in [
        'kg m-2', 'l m-2', 'mm', 'millimeters', 'millimeter'
    ]

    # geoloc
    lon = nc_ts._nc.variables['lon'][:]
    lat = nc_ts._nc.variables['lat'][:]

    # Gradient defaults
    use_grad = cfg.PARAMS['temp_use_local_gradient']
    def_grad = cfg.PARAMS['temp_default_gradient']
    g_minmax = cfg.PARAMS['temp_local_gradient_bounds']

    ilon = np.argmin(np.abs(lon - gdir.cenlon))
    ilat = np.argmin(np.abs(lat - gdir.cenlat))
    ref_pix_lon = lon[ilon]
    ref_pix_lat = lat[ilat]

    # read the data
    temp = nc_ts.get_vardata('temp')
    prcp = nc_ts.get_vardata('prcp')
    hgt = nc_ts.get_vardata('hgt')
    igrad = np.zeros(len(time)) + def_grad
    ttemp = temp[:, ilat - 1:ilat + 2, ilon - 1:ilon + 2]
    itemp = ttemp[:, 1, 1]
    thgt = hgt[ilat - 1:ilat + 2, ilon - 1:ilon + 2]
    ihgt = thgt[1, 1]
    thgt = thgt.flatten()
    iprcp = prcp[:, ilat, ilon]
    nc_ts.close()

    # Now the gradient
    if use_grad:
        for t, loct in enumerate(ttemp):
            slope, _, _, p_val, _ = stats.linregress(thgt, loct.flatten())
            igrad[t] = slope if (p_val < 0.01) else def_grad
        # dont exagerate too much
        igrad = np.clip(igrad, g_minmax[0], g_minmax[1])

    gdir.write_monthly_climate_file(time, iprcp, itemp, igrad, ihgt,
                                    ref_pix_lon, ref_pix_lat)
    # metadata
    out = {'climate_source': fpath, 'hydro_yr_0': y0 + 1, 'hydro_yr_1': y1}
    gdir.write_pickle(out, 'climate_info')
Example #5
0
def process_cesm_data(gdir,
                      filesuffix='',
                      fpath_temp=None,
                      fpath_precc=None,
                      fpath_precl=None):
    """Processes and writes the climate data for this glacier.

    This function is made for interpolating the Community
    Earth System Model Last Millenial Ensemble (CESM-LME) climate simulations,
    from Otto-Bliesner et al. (2016), to the high-resolution CL2 climatologies
    (provided with OGGM) and writes everything to a NetCDF file.

    Parameters
    ----------
    filesuffix : str
        append a suffix to the filename (useful for ensemble experiments).
    fpath_temp : str
        path to the temp file (default: cfg.PATHS['gcm_temp_file'])
    fpath_precc : str
        path to the precc file (default: cfg.PATHS['gcm_precc_file'])
    fpath_precl : str
        path to the precl file (default: cfg.PATHS['gcm_precl_file'])
    """

    # GCM temperature and precipitation data
    if fpath_temp is None:
        if not ('gcm_temp_file' in cfg.PATHS):
            raise ValueError("Need to set cfg.PATHS['gcm_temp_file']")
        fpath_temp = cfg.PATHS['gcm_temp_file']
    if fpath_precc is None:
        if not ('gcm_precc_file' in cfg.PATHS):
            raise ValueError("Need to set cfg.PATHS['gcm_precc_file']")
        fpath_precc = cfg.PATHS['gcm_precc_file']
    if fpath_precl is None:
        if not ('gcm_precl_file' in cfg.PATHS):
            raise ValueError("Need to set cfg.PATHS['gcm_precl_file']")
        fpath_precl = cfg.PATHS['gcm_precl_file']

    # read the files
    with warnings.catch_warnings():
        # Long time series are currently a pain pandas
        warnings.filterwarnings("ignore", message='Unable to decode time axis')
        tempds = xr.open_dataset(fpath_temp)
    precpcds = xr.open_dataset(fpath_precc, decode_times=False)
    preclpds = xr.open_dataset(fpath_precl, decode_times=False)

    # select for location
    lon = gdir.cenlon
    lat = gdir.cenlat

    # CESM files are in 0-360
    if lon <= 0:
        lon += 360

    # take the closest
    # TODO: consider GCM interpolation?
    temp = tempds.TREFHT.sel(lat=lat, lon=lon, method='nearest')
    precp = precpcds.PRECC.sel(lat=lat, lon=lon, method='nearest') + \
            preclpds.PRECL.sel(lat=lat, lon=lon, method='nearest')

    # from normal years to hydrological years
    sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere]
    em = sm - 1 if (sm > 1) else 12
    # TODO: we don't check if the files actually start in January but we should
    precp = precp[sm - 1:sm - 13].load()
    temp = temp[sm - 1:sm - 13].load()
    y0 = int(temp.time.values[0].strftime('%Y'))
    y1 = int(temp.time.values[-1].strftime('%Y'))
    time = pd.period_range('{}-{:02d}'.format(y0, sm),
                           '{}-{:02d}'.format(y1, em),
                           freq='M')
    temp['time'] = time
    precp['time'] = time
    # Workaround for https://github.com/pydata/xarray/issues/1565
    temp['month'] = ('time', time.month)
    precp['month'] = ('time', time.month)
    temp['year'] = ('time', time.year)
    precp['year'] = ('time', time.year)
    ny, r = divmod(len(time), 12)
    assert r == 0

    # Convert m s-1 to mm mth-1
    ndays = np.tile(np.roll(cfg.DAYS_IN_MONTH, 13 - sm), y1 - y0)
    precp = precp * ndays * (60 * 60 * 24 * 1000)

    # compute monthly anomalies
    # of temp
    ts_tmp_avg = temp.sel(time=(temp.year >= 1961) & (temp.year <= 1990))
    ts_tmp_avg = ts_tmp_avg.groupby(ts_tmp_avg.month).mean(dim='time')
    ts_tmp = temp.groupby(temp.month) - ts_tmp_avg
    # of precip -- scaled anomalies
    ts_pre_avg = precp.isel(time=(precp.year >= 1961) & (precp.year <= 1990))
    ts_pre_avg = ts_pre_avg.groupby(ts_pre_avg.month).mean(dim='time')
    ts_pre_ano = precp.groupby(precp.month) - ts_pre_avg
    # scaled anomalies is the default. Standard anomalies above
    # are used later for where ts_pre_avg == 0
    ts_pre = precp.groupby(precp.month) / ts_pre_avg

    # Get CRU to apply the anomaly to
    fpath = gdir.get_filepath('climate_monthly')
    ds_cru = xr.open_dataset(fpath)

    # Here we assume the gradient is a monthly average
    ts_grad = np.tile(ds_cru.grad[0:12], ny)

    # Add climate anomaly to CRU clim
    dscru = ds_cru.sel(time=slice('1961', '1990'))
    # for temp
    loc_tmp = dscru.temp.groupby('time.month').mean()
    ts_tmp = ts_tmp.groupby(ts_tmp.month) + loc_tmp
    # for prcp
    loc_pre = dscru.prcp.groupby('time.month').mean()
    # scaled anomalies
    ts_pre = ts_pre.groupby(ts_pre.month) * loc_pre
    # standard anomalies
    ts_pre_ano = ts_pre_ano.groupby(ts_pre_ano.month) + loc_pre
    # Correct infinite values with standard anomalies
    ts_pre.values = np.where(np.isfinite(ts_pre.values), ts_pre.values,
                             ts_pre_ano.values)
    # The last step might create negative values (unlikely). Clip them
    ts_pre.values = ts_pre.values.clip(0)

    # load dates in right format to save
    dsindex = salem.GeoNetcdf(fpath_temp, monthbegin=True)
    time1 = dsindex.variables['time']
    time2 = time1[sm - 1:sm - 13] - ndays  # to hydrological years
    time2 = netCDF4.num2date(time2, time1.units, calendar='noleap')

    assert np.all(np.isfinite(ts_pre.values))
    assert np.all(np.isfinite(ts_tmp.values))
    assert np.all(np.isfinite(ts_grad))

    # back to -180 - 180
    loc_lon = precp.lon if precp.lon <= 180 else precp.lon - 360

    gdir.write_monthly_climate_file(time2,
                                    ts_pre.values,
                                    ts_tmp.values,
                                    ts_grad,
                                    float(dscru.ref_hgt),
                                    loc_lon,
                                    precp.lat.values,
                                    time_unit=time1.units,
                                    file_name='cesm_data',
                                    filesuffix=filesuffix)

    dsindex._nc.close()
    tempds.close()
    precpcds.close()
    preclpds.close()
    ds_cru.close()
Example #6
0
def _distribute_cru_style_nonparallel(gdirs):
    """More general solution for OGGM globally.

    It uses the CRU CL2 ten-minutes climatology as baseline
    (provided with OGGM)
    """

    # read the climatology
    clfile = utils.get_cru_cl_file()
    ncclim = salem.GeoNetcdf(clfile)
    # and the TS data
    nc_ts_tmp = salem.GeoNetcdf(utils.get_cru_file('tmp'), monthbegin=True)
    nc_ts_pre = salem.GeoNetcdf(utils.get_cru_file('pre'), monthbegin=True)

    # set temporal subset for the ts data (hydro years)
    nc_ts_tmp.set_period(t0='1901-10-01', t1='2014-09-01')
    nc_ts_pre.set_period(t0='1901-10-01', t1='2014-09-01')
    time = nc_ts_pre.time
    ny, r = divmod(len(time), 12)
    assert r == 0

    # gradient default params
    use_grad = cfg.PARAMS['temp_use_local_gradient']
    def_grad = cfg.PARAMS['temp_default_gradient']
    g_minmax = cfg.PARAMS['temp_local_gradient_bounds']
    prcp_scaling_factor = cfg.PARAMS['prcp_scaling_factor']

    for gdir in gdirs:
        log.info('%s: %s', gdir.rgi_id, 'distribute_cru_style')
        lon = gdir.cenlon
        lat = gdir.cenlat

        # This is guaranteed to work because I prepared the file (I hope)
        ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

        # get monthly gradient ...
        loc_hgt = ncclim.get_vardata('elev')
        loc_tmp = ncclim.get_vardata('temp')
        loc_pre = ncclim.get_vardata('prcp')
        isok = np.isfinite(loc_hgt)
        hgt_f = loc_hgt[isok].flatten()
        ts_grad = np.zeros(12) + def_grad
        if use_grad and len(hgt_f) >= 5:
            for i in range(12):
                loc_tmp_mth = loc_tmp[i, ...][isok].flatten()
                slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth)
                ts_grad[i] = slope if (p_val < 0.01) else def_grad
        # ... but dont exaggerate too much
        ts_grad = np.clip(ts_grad, g_minmax[0], g_minmax[1])
        # convert to timeserie and hydroyears
        ts_grad = ts_grad.tolist()
        ts_grad = ts_grad[9:] + ts_grad[0:9]
        ts_grad = np.asarray(ts_grad * ny)

        # maybe this will throw out of bounds warnings
        nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
        nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

        # compute monthly anomalies
        # of temp
        ts_tmp = nc_ts_tmp.get_vardata('tmp', as_xarray=True)
        ts_tmp_avg = ts_tmp.sel(time=slice('1961-01-01', '1990-12-01'))
        ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time')
        ts_tmp = ts_tmp.groupby('time.month') - ts_tmp_avg
        # of precip
        ts_pre = nc_ts_pre.get_vardata('pre', as_xarray=True)
        ts_pre_avg = ts_pre.sel(time=slice('1961-01-01', '1990-12-01'))
        ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time')
        ts_pre = ts_pre.groupby('time.month') - ts_pre_avg

        # interpolate to HR grid
        if np.any(~np.isfinite(ts_tmp[:, 1, 1])):
            # Extreme case, middle pix is not valid
            # take any valid pix from the 3*3 (and hope theres one)
            found_it = False
            for idi in range(2):
                for idj in range(2):
                    if np.all(np.isfinite(ts_tmp[:, idj, idi])):
                        ts_tmp[:, 1, 1] = ts_tmp[:, idj, idi]
                        ts_pre[:, 1, 1] = ts_pre[:, idj, idi]
                        found_it = True
            if not found_it:
                msg = '{}: OMG there is no climate data'.format(gdir.rgi_id)
                raise RuntimeError(msg)
        elif np.any(~np.isfinite(ts_tmp)):
            # maybe the side is nan, but we can do nearest
            ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values,
                                                  nc_ts_tmp.grid,
                                                  interp='nearest')
            ts_pre = ncclim.grid.map_gridded_data(ts_pre.values,
                                                  nc_ts_pre.grid,
                                                  interp='nearest')
        else:
            # We can do bilinear
            ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values,
                                                  nc_ts_tmp.grid,
                                                  interp='linear')
            ts_pre = ncclim.grid.map_gridded_data(ts_pre.values,
                                                  nc_ts_pre.grid,
                                                  interp='linear')

        # take the center pixel and add it to the CRU CL clim
        # for temp
        loc_tmp = xr.DataArray(loc_tmp[:, 1, 1],
                               dims=['month'],
                               coords={'month': ts_pre_avg.month})
        ts_tmp = xr.DataArray(ts_tmp[:, 1, 1],
                              dims=['time'],
                              coords={'time': time})
        ts_tmp = ts_tmp.groupby('time.month') + loc_tmp
        # for prcp
        loc_pre = xr.DataArray(loc_pre[:, 1, 1],
                               dims=['month'],
                               coords={'month': ts_pre_avg.month})
        ts_pre = xr.DataArray(ts_pre[:, 1, 1],
                              dims=['time'],
                              coords={'time': time})
        ts_pre = ts_pre.groupby('time.month') + loc_pre * prcp_scaling_factor

        # done
        loc_hgt = loc_hgt[1, 1]
        assert np.isfinite(loc_hgt)
        assert np.all(np.isfinite(ts_pre.values))
        assert np.all(np.isfinite(ts_tmp.values))
        assert np.all(np.isfinite(ts_grad))
        gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values,
                                        ts_grad, loc_hgt)
Example #7
0
def process_histalp_data(gdir):
    """Processes and writes the climate data for this glacier.

    Extracts the nearest timeseries and writes everything to a NetCDF file.
    """

    if cfg.PARAMS['baseline_climate'] != 'HISTALP':
        raise ValueError("cfg.PARAMS['baseline_climate'] should be set to "
                         "HISTALP.")

    # read the time out of the pure netcdf file
    ft = utils.get_histalp_file('tmp')
    fp = utils.get_histalp_file('pre')
    with utils.ncDataset(ft) as nc:
        vt = nc.variables['time']
        assert vt[0] == 0
        assert vt[-1] == vt.shape[0] - 1
        t0 = vt.units.split(' since ')[1][:7]
        time_t = pd.date_range(start=t0, periods=vt.shape[0], freq='MS')
    with utils.ncDataset(fp) as nc:
        vt = nc.variables['time']
        assert vt[0] == 0.5
        assert vt[-1] == vt.shape[0] - .5
        t0 = vt.units.split(' since ')[1][:7]
        time_p = pd.date_range(start=t0, periods=vt.shape[0], freq='MS')

    # Now open with salem
    nc_ts_tmp = salem.GeoNetcdf(ft, time=time_t)
    nc_ts_pre = salem.GeoNetcdf(fp, time=time_p)

    # set temporal subset for the ts data (hydro years)
    # the reference time is given by precip, which is shorter
    sm = cfg.PARAMS['hydro_month_nh']
    em = sm - 1 if (sm > 1) else 12
    yrs = nc_ts_pre.time.year
    y0, y1 = yrs[0], yrs[-1]
    if cfg.PARAMS['baseline_y0'] != 0:
        y0 = cfg.PARAMS['baseline_y0']
    if cfg.PARAMS['baseline_y1'] != 0:
        y1 = cfg.PARAMS['baseline_y1']

    nc_ts_tmp.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                         t1='{}-{:02d}-01'.format(y1, em))
    nc_ts_pre.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                         t1='{}-{:02d}-01'.format(y1, em))
    time = nc_ts_pre.time
    ny, r = divmod(len(time), 12)
    assert r == 0

    # Units
    assert nc_ts_tmp._nc.variables['HSURF'].units.lower() in ['m', 'meters',
                                                              'meter',
                                                              'metres',
                                                              'metre']
    assert nc_ts_tmp._nc.variables['T_2M'].units.lower() in ['degc', 'degrees',
                                                             'degrees celcius',
                                                             'degree', 'c']
    assert nc_ts_pre._nc.variables['TOT_PREC'].units.lower() in ['kg m-2',
                                                                 'l m-2', 'mm',
                                                                 'millimeters',
                                                                 'millimeter']

    # geoloc
    lon = gdir.cenlon
    lat = gdir.cenlat
    nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
    nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

    # read the data
    temp = nc_ts_tmp.get_vardata('T_2M')
    prcp = nc_ts_pre.get_vardata('TOT_PREC')
    hgt = nc_ts_tmp.get_vardata('HSURF')
    ref_lon = nc_ts_tmp.get_vardata('lon')
    ref_lat = nc_ts_tmp.get_vardata('lat')
    source = nc_ts_tmp._nc.title[:7]
    nc_ts_tmp._nc.close()
    nc_ts_pre._nc.close()

    # Should we compute the gradient?
    use_grad = cfg.PARAMS['temp_use_local_gradient']
    igrad = None
    if use_grad:
        igrad = np.zeros(len(time)) * np.NaN
        for t, loct in enumerate(temp):
            slope, _, _, p_val, _ = stats.linregress(hgt.flatten(),
                                                     loct.flatten())
            igrad[t] = slope if (p_val < 0.01) else np.NaN

    gdir.write_monthly_climate_file(time, prcp[:, 1, 1], temp[:, 1, 1],
                                    hgt[1, 1], ref_lon[1], ref_lat[1],
                                    gradient=igrad)
    # metadata
    out = {'baseline_climate_source': source,
           'baseline_hydro_yr_0': y0 + 1,
           'baseline_hydro_yr_1': y1}
    gdir.write_pickle(out, 'climate_info')
Example #8
0
def process_cru_data(gdir):
    """Processes and writes the climate data for this glacier.

    Interpolates the CRU TS data to the high-resolution CL2 climatologies
    (provided with OGGM) and writes everything to a NetCDF file.
    """

    if cfg.PARAMS['baseline_climate'] != 'CRU':
        raise ValueError("cfg.PARAMS['baseline_climate'] should be set to CRU")

    # read the climatology
    clfile = utils.get_cru_cl_file()
    ncclim = salem.GeoNetcdf(clfile)
    # and the TS data
    nc_ts_tmp = salem.GeoNetcdf(utils.get_cru_file('tmp'), monthbegin=True)
    nc_ts_pre = salem.GeoNetcdf(utils.get_cru_file('pre'), monthbegin=True)

    # set temporal subset for the ts data (hydro years)
    sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere]
    em = sm - 1 if (sm > 1) else 12
    yrs = nc_ts_pre.time.year
    y0, y1 = yrs[0], yrs[-1]
    if cfg.PARAMS['baseline_y0'] != 0:
        y0 = cfg.PARAMS['baseline_y0']
    if cfg.PARAMS['baseline_y1'] != 0:
        y1 = cfg.PARAMS['baseline_y1']

    nc_ts_tmp.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                         t1='{}-{:02d}-01'.format(y1, em))
    nc_ts_pre.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                         t1='{}-{:02d}-01'.format(y1, em))
    time = nc_ts_pre.time
    ny, r = divmod(len(time), 12)
    assert r == 0

    lon = gdir.cenlon
    lat = gdir.cenlat

    # This is guaranteed to work because I prepared the file (I hope)
    ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

    # get climatology data
    loc_hgt = ncclim.get_vardata('elev')
    loc_tmp = ncclim.get_vardata('temp')
    loc_pre = ncclim.get_vardata('prcp')
    loc_lon = ncclim.get_vardata('lon')
    loc_lat = ncclim.get_vardata('lat')

    # see if the center is ok
    if not np.isfinite(loc_hgt[1, 1]):
        # take another candidate where finite
        isok = np.isfinite(loc_hgt)

        # wait: some areas are entirely NaNs, make the subset larger
        _margin = 1
        while not np.any(isok):
            _margin += 1
            ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=_margin)
            loc_hgt = ncclim.get_vardata('elev')
            isok = np.isfinite(loc_hgt)
        if _margin > 1:
            log.debug('(%s) I had to look up for far climate pixels: %s',
                      gdir.rgi_id, _margin)

        # Take the first candidate (doesn't matter which)
        lon, lat = ncclim.grid.ll_coordinates
        lon = lon[isok][0]
        lat = lat[isok][0]
        # Resubset
        ncclim.set_subset()
        ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
        loc_hgt = ncclim.get_vardata('elev')
        loc_tmp = ncclim.get_vardata('temp')
        loc_pre = ncclim.get_vardata('prcp')
        loc_lon = ncclim.get_vardata('lon')
        loc_lat = ncclim.get_vardata('lat')

    assert np.isfinite(loc_hgt[1, 1])
    isok = np.isfinite(loc_hgt)
    hgt_f = loc_hgt[isok].flatten()
    assert len(hgt_f) > 0.

    # Should we compute the gradient?
    use_grad = cfg.PARAMS['temp_use_local_gradient']
    ts_grad = None
    if use_grad and len(hgt_f) >= 5:
        ts_grad = np.zeros(12) * np.NaN
        for i in range(12):
            loc_tmp_mth = loc_tmp[i, ...][isok].flatten()
            slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth)
            ts_grad[i] = slope if (p_val < 0.01) else np.NaN
        # convert to a timeseries and hydrological years
        ts_grad = ts_grad.tolist()
        ts_grad = ts_grad[em:] + ts_grad[0:em]
        ts_grad = np.asarray(ts_grad * ny)

    # maybe this will throw out of bounds warnings
    nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
    nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

    # compute monthly anomalies
    # of temp
    ts_tmp = nc_ts_tmp.get_vardata('tmp', as_xarray=True)
    ts_tmp_avg = ts_tmp.sel(time=slice('1961-01-01', '1990-12-01'))
    ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time')
    ts_tmp = ts_tmp.groupby('time.month') - ts_tmp_avg
    # of precip
    ts_pre = nc_ts_pre.get_vardata('pre', as_xarray=True)
    ts_pre_avg = ts_pre.sel(time=slice('1961-01-01', '1990-12-01'))
    ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time')
    ts_pre_ano = ts_pre.groupby('time.month') - ts_pre_avg
    # scaled anomalies is the default. Standard anomalies above
    # are used later for where ts_pre_avg == 0
    ts_pre = ts_pre.groupby('time.month') / ts_pre_avg

    # interpolate to HR grid
    if np.any(~np.isfinite(ts_tmp[:, 1, 1])):
        # Extreme case, middle pix is not valid
        # take any valid pix from the 3*3 (and hope there's one)
        found_it = False
        for idi in range(2):
            for idj in range(2):
                if np.all(np.isfinite(ts_tmp[:, idj, idi])):
                    ts_tmp[:, 1, 1] = ts_tmp[:, idj, idi]
                    ts_pre[:, 1, 1] = ts_pre[:, idj, idi]
                    ts_pre_ano[:, 1, 1] = ts_pre_ano[:, idj, idi]
                    found_it = True
        if not found_it:
            msg = '({}) there is no climate data'.format(gdir.rgi_id)
            raise RuntimeError(msg)
    elif np.any(~np.isfinite(ts_tmp)):
        # maybe the side is nan, but we can do nearest
        ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid,
                                              interp='nearest')
        ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid,
                                              interp='nearest')
        ts_pre_ano = ncclim.grid.map_gridded_data(ts_pre_ano.values,
                                                  nc_ts_pre.grid,
                                                  interp='nearest')
    else:
        # We can do bilinear
        ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid,
                                              interp='linear')
        ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid,
                                              interp='linear')
        ts_pre_ano = ncclim.grid.map_gridded_data(ts_pre_ano.values,
                                                  nc_ts_pre.grid,
                                                  interp='linear')

    # take the center pixel and add it to the CRU CL clim
    # for temp
    loc_tmp = xr.DataArray(loc_tmp[:, 1, 1], dims=['month'],
                           coords={'month': ts_tmp_avg.month})
    ts_tmp = xr.DataArray(ts_tmp[:, 1, 1], dims=['time'],
                          coords={'time': time})
    ts_tmp = ts_tmp.groupby('time.month') + loc_tmp
    # for prcp
    loc_pre = xr.DataArray(loc_pre[:, 1, 1], dims=['month'],
                           coords={'month': ts_pre_avg.month})
    ts_pre = xr.DataArray(ts_pre[:, 1, 1], dims=['time'],
                          coords={'time': time})
    ts_pre_ano = xr.DataArray(ts_pre_ano[:, 1, 1], dims=['time'],
                              coords={'time': time})
    # scaled anomalies
    ts_pre = ts_pre.groupby('time.month') * loc_pre
    # standard anomalies
    ts_pre_ano = ts_pre_ano.groupby('time.month') + loc_pre
    # Correct infinite values with standard anomalies
    ts_pre.values = np.where(np.isfinite(ts_pre.values),
                             ts_pre.values,
                             ts_pre_ano.values)
    # The last step might create negative values (unlikely). Clip them
    ts_pre.values = ts_pre.values.clip(0)

    # done
    loc_hgt = loc_hgt[1, 1]
    loc_lon = loc_lon[1]
    loc_lat = loc_lat[1]
    assert np.isfinite(loc_hgt)
    assert np.all(np.isfinite(ts_pre.values))
    assert np.all(np.isfinite(ts_tmp.values))

    gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values,
                                    loc_hgt, loc_lon, loc_lat,
                                    gradient=ts_grad)

    source = nc_ts_tmp._nc.title[:10]
    ncclim._nc.close()
    nc_ts_tmp._nc.close()
    nc_ts_pre._nc.close()
    # metadata
    out = {'baseline_climate_source': source,
           'baseline_hydro_yr_0': y0+1,
           'baseline_hydro_yr_1': y1}
    gdir.write_pickle(out, 'climate_info')
Example #9
0
def process_custom_climate_data(gdir):
    """Processes and writes the climate data from a user-defined climate file.

    The input file must have a specific format (see
    oggm-sample-data/test-files/histalp_merged_hef.nc for an example).

    This is the way OGGM used to do it for HISTALP before it got automatised.
    """

    if not (('climate_file' in cfg.PATHS) and
            os.path.exists(cfg.PATHS['climate_file'])):
        raise IOError('Custom climate file not found')

    if cfg.PARAMS['baseline_climate'] not in ['', 'CUSTOM']:
        raise ValueError("When using custom climate data please set "
                         "PARAMS['baseline_climate'] to an empty string "
                         "or `CUSTOM`. Note that you can now use the "
                         "`process_histalp_data` task for automated HISTALP "
                         "data processing.")

    # read the file
    fpath = cfg.PATHS['climate_file']
    nc_ts = salem.GeoNetcdf(fpath)

    # set temporal subset for the ts data (hydro years)
    sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere]
    em = sm - 1 if (sm > 1) else 12
    yrs = nc_ts.time.year
    y0, y1 = yrs[0], yrs[-1]
    nc_ts.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                     t1='{}-{:02d}-01'.format(y1, em))
    time = nc_ts.time
    ny, r = divmod(len(time), 12)
    if r != 0:
        raise ValueError('Climate data should be N full years exclusively')

    # Units
    assert nc_ts._nc.variables['hgt'].units.lower() in ['m', 'meters', 'meter',
                                                        'metres', 'metre']
    assert nc_ts._nc.variables['temp'].units.lower() in ['degc', 'degrees',
                                                         'degree', 'c']
    assert nc_ts._nc.variables['prcp'].units.lower() in ['kg m-2', 'l m-2',
                                                         'mm', 'millimeters',
                                                         'millimeter']

    # geoloc
    lon = nc_ts._nc.variables['lon'][:]
    lat = nc_ts._nc.variables['lat'][:]

    ilon = np.argmin(np.abs(lon - gdir.cenlon))
    ilat = np.argmin(np.abs(lat - gdir.cenlat))
    ref_pix_lon = lon[ilon]
    ref_pix_lat = lat[ilat]

    # read the data
    temp = nc_ts.get_vardata('temp')
    prcp = nc_ts.get_vardata('prcp')
    hgt = nc_ts.get_vardata('hgt')
    ttemp = temp[:, ilat-1:ilat+2, ilon-1:ilon+2]
    itemp = ttemp[:, 1, 1]
    thgt = hgt[ilat-1:ilat+2, ilon-1:ilon+2]
    ihgt = thgt[1, 1]
    thgt = thgt.flatten()
    iprcp = prcp[:, ilat, ilon]
    nc_ts.close()

    # Should we compute the gradient?
    use_grad = cfg.PARAMS['temp_use_local_gradient']
    igrad = None
    if use_grad:
        igrad = np.zeros(len(time)) * np.NaN
        for t, loct in enumerate(ttemp):
            slope, _, _, p_val, _ = stats.linregress(thgt,
                                                     loct.flatten())
            igrad[t] = slope if (p_val < 0.01) else np.NaN

    gdir.write_monthly_climate_file(time, iprcp, itemp, ihgt,
                                    ref_pix_lon, ref_pix_lat,
                                    gradient=igrad)
    # metadata
    out = {'baseline_climate_source': fpath,
           'baseline_hydro_yr_0': y0+1,
           'baseline_hydro_yr_1': y1}
    gdir.write_pickle(out, 'climate_info')
Example #10
0
def process_cru_data(gdir, tmp_file=None, pre_file=None, y0=None, y1=None,
                     output_filesuffix=None):
    """Processes and writes the CRU baseline climate data for this glacier.

    Interpolates the CRU TS data to the high-resolution CL2 climatologies
    (provided with OGGM) and writes everything to a NetCDF file.

    Parameters
    ----------
    gdir : :py:class:`oggm.GlacierDirectory`
        the glacier directory to process
    tmp_file : str
        path to the CRU temperature file (defaults to the current OGGM chosen
        CRU version)
    pre_file : str
        path to the CRU precip file (defaults to the current OGGM chosen
        CRU version)
    y0 : int
        the starting year of the timeseries to write. The default is to take
        the entire time period available in the file, but with this kwarg
        you can shorten it (to save space or to crop bad data)
    y1 : int
        the starting year of the timeseries to write. The default is to take
        the entire time period available in the file, but with this kwarg
        you can shorten it (to save space or to crop bad data)
    output_filesuffix : str
        this add a suffix to the output file (useful to avoid overwriting
        previous experiments)
    """

    if cfg.PATHS.get('climate_file', None):
        warnings.warn("You seem to have set a custom climate file for this "
                      "run, but are using the default CRU climate "
                      "file instead.")

    if cfg.PARAMS['baseline_climate'] != 'CRU':
        raise InvalidParamsError("cfg.PARAMS['baseline_climate'] should be "
                                 "set to CRU")

    # read the climatology
    ncclim = salem.GeoNetcdf(get_cru_cl_file())

    # and the TS data
    if tmp_file is None:
        tmp_file = get_cru_file('tmp')
    if pre_file is None:
        pre_file = get_cru_file('pre')
    nc_ts_tmp = salem.GeoNetcdf(tmp_file, monthbegin=True)
    nc_ts_pre = salem.GeoNetcdf(pre_file, monthbegin=True)

    # set temporal subset for the ts data (hydro years)
    sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere]
    em = sm - 1 if (sm > 1) else 12
    yrs = nc_ts_pre.time.year
    y0 = yrs[0] if y0 is None else y0
    y1 = yrs[-1] if y1 is None else y1

    nc_ts_tmp.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                         t1='{}-{:02d}-01'.format(y1, em))
    nc_ts_pre.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                         t1='{}-{:02d}-01'.format(y1, em))
    time = nc_ts_pre.time
    ny, r = divmod(len(time), 12)
    assert r == 0

    lon = gdir.cenlon
    lat = gdir.cenlat

    # This is guaranteed to work because I prepared the file (I hope)
    ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

    # get climatology data
    loc_hgt = ncclim.get_vardata('elev')
    loc_tmp = ncclim.get_vardata('temp')
    loc_pre = ncclim.get_vardata('prcp')
    loc_lon = ncclim.get_vardata('lon')
    loc_lat = ncclim.get_vardata('lat')

    # see if the center is ok
    if not np.isfinite(loc_hgt[1, 1]):
        # take another candidate where finite
        isok = np.isfinite(loc_hgt)

        # wait: some areas are entirely NaNs, make the subset larger
        _margin = 1
        while not np.any(isok):
            _margin += 1
            ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=_margin)
            loc_hgt = ncclim.get_vardata('elev')
            isok = np.isfinite(loc_hgt)
        if _margin > 1:
            log.debug('(%s) I had to look up for far climate pixels: %s',
                      gdir.rgi_id, _margin)

        # Take the first candidate (doesn't matter which)
        lon, lat = ncclim.grid.ll_coordinates
        lon = lon[isok][0]
        lat = lat[isok][0]
        # Resubset
        ncclim.set_subset()
        ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
        loc_hgt = ncclim.get_vardata('elev')
        loc_tmp = ncclim.get_vardata('temp')
        loc_pre = ncclim.get_vardata('prcp')
        loc_lon = ncclim.get_vardata('lon')
        loc_lat = ncclim.get_vardata('lat')

    assert np.isfinite(loc_hgt[1, 1])
    isok = np.isfinite(loc_hgt)
    hgt_f = loc_hgt[isok].flatten()
    assert len(hgt_f) > 0.

    # Should we compute the gradient?
    use_grad = cfg.PARAMS['temp_use_local_gradient']
    ts_grad = None
    if use_grad and len(hgt_f) >= 5:
        ts_grad = np.zeros(12) * np.NaN
        for i in range(12):
            loc_tmp_mth = loc_tmp[i, ...][isok].flatten()
            slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth)
            ts_grad[i] = slope if (p_val < 0.01) else np.NaN
        # convert to a timeseries and hydrological years
        ts_grad = ts_grad.tolist()
        ts_grad = ts_grad[em:] + ts_grad[0:em]
        ts_grad = np.asarray(ts_grad * ny)

    # maybe this will throw out of bounds warnings
    nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
    nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

    # compute monthly anomalies
    # of temp
    ts_tmp = nc_ts_tmp.get_vardata('tmp', as_xarray=True)
    ts_tmp_avg = ts_tmp.sel(time=slice('1961-01-01', '1990-12-01'))
    ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time')
    ts_tmp = ts_tmp.groupby('time.month') - ts_tmp_avg
    # of precip
    ts_pre = nc_ts_pre.get_vardata('pre', as_xarray=True)
    ts_pre_avg = ts_pre.sel(time=slice('1961-01-01', '1990-12-01'))
    ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time')
    ts_pre_ano = ts_pre.groupby('time.month') - ts_pre_avg
    # scaled anomalies is the default. Standard anomalies above
    # are used later for where ts_pre_avg == 0
    ts_pre = ts_pre.groupby('time.month') / ts_pre_avg

    # interpolate to HR grid
    if np.any(~np.isfinite(ts_tmp[:, 1, 1])):
        # Extreme case, middle pix is not valid
        # take any valid pix from the 3*3 (and hope there's one)
        found_it = False
        for idi in range(2):
            for idj in range(2):
                if np.all(np.isfinite(ts_tmp[:, idj, idi])):
                    ts_tmp[:, 1, 1] = ts_tmp[:, idj, idi]
                    ts_pre[:, 1, 1] = ts_pre[:, idj, idi]
                    ts_pre_ano[:, 1, 1] = ts_pre_ano[:, idj, idi]
                    found_it = True
        if not found_it:
            msg = '({}) there is no climate data'.format(gdir.rgi_id)
            raise MassBalanceCalibrationError(msg)
    elif np.any(~np.isfinite(ts_tmp)):
        # maybe the side is nan, but we can do nearest
        ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid,
                                              interp='nearest')
        ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid,
                                              interp='nearest')
        ts_pre_ano = ncclim.grid.map_gridded_data(ts_pre_ano.values,
                                                  nc_ts_pre.grid,
                                                  interp='nearest')
    else:
        # We can do bilinear
        ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid,
                                              interp='linear')
        ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid,
                                              interp='linear')
        ts_pre_ano = ncclim.grid.map_gridded_data(ts_pre_ano.values,
                                                  nc_ts_pre.grid,
                                                  interp='linear')

    # take the center pixel and add it to the CRU CL clim
    # for temp
    loc_tmp = xr.DataArray(loc_tmp[:, 1, 1], dims=['month'],
                           coords={'month': ts_tmp_avg.month})
    ts_tmp = xr.DataArray(ts_tmp[:, 1, 1], dims=['time'],
                          coords={'time': time})
    ts_tmp = ts_tmp.groupby('time.month') + loc_tmp
    # for prcp
    loc_pre = xr.DataArray(loc_pre[:, 1, 1], dims=['month'],
                           coords={'month': ts_pre_avg.month})
    ts_pre = xr.DataArray(ts_pre[:, 1, 1], dims=['time'],
                          coords={'time': time})
    ts_pre_ano = xr.DataArray(ts_pre_ano[:, 1, 1], dims=['time'],
                              coords={'time': time})
    # scaled anomalies
    ts_pre = ts_pre.groupby('time.month') * loc_pre
    # standard anomalies
    ts_pre_ano = ts_pre_ano.groupby('time.month') + loc_pre
    # Correct infinite values with standard anomalies
    ts_pre.values = np.where(np.isfinite(ts_pre.values),
                             ts_pre.values,
                             ts_pre_ano.values)
    # The last step might create negative values (unlikely). Clip them
    ts_pre.values = utils.clip_min(ts_pre.values, 0)

    # done
    loc_hgt = loc_hgt[1, 1]
    loc_lon = loc_lon[1]
    loc_lat = loc_lat[1]
    assert np.isfinite(loc_hgt)
    assert np.all(np.isfinite(ts_pre.values))
    assert np.all(np.isfinite(ts_tmp.values))

    gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values,
                                    loc_hgt, loc_lon, loc_lat,
                                    filesuffix=output_filesuffix,
                                    gradient=ts_grad,
                                    source=nc_ts_tmp._nc.title[:10])

    ncclim._nc.close()
    nc_ts_tmp._nc.close()
    nc_ts_pre._nc.close()
Example #11
0
def process_dummy_cru_file(gdir, sigma_temp=2, sigma_prcp=0.5, seed=None,
                           y0=None, y1=None, output_filesuffix=None):
    """Create a simple baseline climate file for this glacier - for testing!

    This simply reproduces the climatology with a little randomness in it.

    TODO: extend the functionality by allowing a monthly varying sigma

    Parameters
    ----------
    gdir : GlacierDirectory
        the glacier directory
    sigma_temp : float
        the standard deviation of the random timeseries (set to 0 for constant
        ts)
    sigma_prcp : float
        the standard deviation of the random timeseries (set to 0 for constant
        ts)
    seed : int
        the RandomState seed
    y0 : int
        the starting year of the timeseries to write. The default is to take
        the entire time period available in the file, but with this kwarg
        you can shorten it (to save space or to crop bad data)
    y1 : int
        the starting year of the timeseries to write. The default is to take
        the entire time period available in the file, but with this kwarg
        you can shorten it (to save space or to crop bad data)
    output_filesuffix : str
        this add a suffix to the output file (useful to avoid overwriting
        previous experiments)
    """

    # read the climatology
    clfile = get_cru_cl_file()
    ncclim = salem.GeoNetcdf(clfile)

    # set temporal subset for the ts data (hydro years)
    sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere]
    em = sm - 1 if (sm > 1) else 12

    y0 = 1901 if y0 is None else y0
    y1 = 2018 if y1 is None else y1

    time = pd.date_range(start='{}-{:02d}-01'.format(y0, sm),
                         end='{}-{:02d}-01'.format(y1, em),
                         freq='MS')
    ny, r = divmod(len(time), 12)
    assert r == 0

    lon = gdir.cenlon
    lat = gdir.cenlat

    # This is guaranteed to work because I prepared the file (I hope)
    ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

    # get climatology data
    loc_hgt = ncclim.get_vardata('elev')
    loc_tmp = ncclim.get_vardata('temp')
    loc_pre = ncclim.get_vardata('prcp')
    loc_lon = ncclim.get_vardata('lon')
    loc_lat = ncclim.get_vardata('lat')

    # see if the center is ok
    if not np.isfinite(loc_hgt[1, 1]):
        # take another candidate where finite
        isok = np.isfinite(loc_hgt)

        # wait: some areas are entirely NaNs, make the subset larger
        _margin = 1
        while not np.any(isok):
            _margin += 1
            ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=_margin)
            loc_hgt = ncclim.get_vardata('elev')
            isok = np.isfinite(loc_hgt)
        if _margin > 1:
            log.debug('(%s) I had to look up for far climate pixels: %s',
                      gdir.rgi_id, _margin)

        # Take the first candidate (doesn't matter which)
        lon, lat = ncclim.grid.ll_coordinates
        lon = lon[isok][0]
        lat = lat[isok][0]
        # Resubset
        ncclim.set_subset()
        ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
        loc_hgt = ncclim.get_vardata('elev')
        loc_tmp = ncclim.get_vardata('temp')
        loc_pre = ncclim.get_vardata('prcp')
        loc_lon = ncclim.get_vardata('lon')
        loc_lat = ncclim.get_vardata('lat')

    assert np.isfinite(loc_hgt[1, 1])
    isok = np.isfinite(loc_hgt)
    hgt_f = loc_hgt[isok].flatten()
    assert len(hgt_f) > 0.

    # Should we compute the gradient?
    use_grad = cfg.PARAMS['temp_use_local_gradient']
    ts_grad = None
    if use_grad and len(hgt_f) >= 5:
        ts_grad = np.zeros(12) * np.NaN
        for i in range(12):
            loc_tmp_mth = loc_tmp[i, ...][isok].flatten()
            slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth)
            ts_grad[i] = slope if (p_val < 0.01) else np.NaN
        # convert to a timeseries and hydrological years
        ts_grad = ts_grad.tolist()
        ts_grad = ts_grad[em:] + ts_grad[0:em]
        ts_grad = np.asarray(ts_grad * ny)

    # Make DataArrays
    rng = np.random.RandomState(seed)
    loc_tmp = xr.DataArray(loc_tmp[:, 1, 1], dims=['month'],
                           coords={'month': np.arange(1, 13)})
    ts_tmp = rng.randn(len(time)) * sigma_temp
    ts_tmp = xr.DataArray(ts_tmp, dims=['time'],
                          coords={'time': time})

    loc_pre = xr.DataArray(loc_pre[:, 1, 1], dims=['month'],
                           coords={'month': np.arange(1, 13)})
    ts_pre = utils.clip_min(rng.randn(len(time)) * sigma_prcp + 1, 0)
    ts_pre = xr.DataArray(ts_pre, dims=['time'],
                          coords={'time': time})

    # Create the time series
    ts_tmp = ts_tmp.groupby('time.month') + loc_tmp
    ts_pre = ts_pre.groupby('time.month') * loc_pre

    # done
    loc_hgt = loc_hgt[1, 1]
    loc_lon = loc_lon[1]
    loc_lat = loc_lat[1]
    assert np.isfinite(loc_hgt)

    gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values,
                                    loc_hgt, loc_lon, loc_lat,
                                    gradient=ts_grad,
                                    filesuffix=output_filesuffix,
                                    source='CRU CL2 and some randomness')
    ncclim._nc.close()
Example #12
0
def process_histalp_data(gdir, y0=None, y1=None, output_filesuffix=None):
    """Processes and writes the HISTALP baseline climate data for this glacier.

    Extracts the nearest timeseries and writes everything to a NetCDF file.

    Parameters
    ----------
    gdir : :py:class:`oggm.GlacierDirectory`
        the glacier directory to process
    y0 : int
        the starting year of the timeseries to write. The default is to take
        1850 (because the data is quite bad before that)
    y1 : int
        the starting year of the timeseries to write. The default is to take
        the entire time period available in the file, but with this kwarg
        you can shorten it (to save space or to crop bad data)
    output_filesuffix : str
        this add a suffix to the output file (useful to avoid overwriting
        previous experiments)
    """

    if cfg.PATHS.get('climate_file', None):
        warnings.warn("You seem to have set a custom climate file for this "
                      "run, but are using the default HISTALP climate file "
                      "instead.")

    if cfg.PARAMS['baseline_climate'] != 'HISTALP':
        raise InvalidParamsError("cfg.PARAMS['baseline_climate'] should be "
                                 "set to HISTALP.")

    # read the time out of the pure netcdf file
    ft = get_histalp_file('tmp')
    fp = get_histalp_file('pre')
    with utils.ncDataset(ft) as nc:
        vt = nc.variables['time']
        assert vt[0] == 0
        assert vt[-1] == vt.shape[0] - 1
        t0 = vt.units.split(' since ')[1][:7]
        time_t = pd.date_range(start=t0, periods=vt.shape[0], freq='MS')
    with utils.ncDataset(fp) as nc:
        vt = nc.variables['time']
        assert vt[0] == 0.5
        assert vt[-1] == vt.shape[0] - .5
        t0 = vt.units.split(' since ')[1][:7]
        time_p = pd.date_range(start=t0, periods=vt.shape[0], freq='MS')

    # Now open with salem
    nc_ts_tmp = salem.GeoNetcdf(ft, time=time_t)
    nc_ts_pre = salem.GeoNetcdf(fp, time=time_p)

    # Some default
    if y0 is None:
        y0 = 1850

    # set temporal subset for the ts data (hydro years)
    # the reference time is given by precip, which is shorter
    sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere]
    em = sm - 1 if (sm > 1) else 12
    yrs = nc_ts_pre.time.year
    y0 = yrs[0] if y0 is None else y0
    y1 = yrs[-1] if y1 is None else y1

    nc_ts_tmp.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                         t1='{}-{:02d}-01'.format(y1, em))
    nc_ts_pre.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                         t1='{}-{:02d}-01'.format(y1, em))
    time = nc_ts_pre.time
    ny, r = divmod(len(time), 12)
    assert r == 0

    # Units
    assert nc_ts_tmp._nc.variables['HSURF'].units.lower() in [
        'm', 'meters', 'meter', 'metres', 'metre'
    ]
    assert nc_ts_tmp._nc.variables['T_2M'].units.lower() in [
        'degc', 'degrees', 'degrees celcius', 'degree', 'c'
    ]
    assert nc_ts_pre._nc.variables['TOT_PREC'].units.lower() in [
        'kg m-2', 'l m-2', 'mm', 'millimeters', 'millimeter'
    ]

    # geoloc
    lon = gdir.cenlon
    lat = gdir.cenlat
    nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
    nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

    # read the data
    temp = nc_ts_tmp.get_vardata('T_2M')
    prcp = nc_ts_pre.get_vardata('TOT_PREC')
    hgt = nc_ts_tmp.get_vardata('HSURF')
    ref_lon = nc_ts_tmp.get_vardata('lon')
    ref_lat = nc_ts_tmp.get_vardata('lat')
    source = nc_ts_tmp._nc.title[:7]
    nc_ts_tmp._nc.close()
    nc_ts_pre._nc.close()

    # Should we compute the gradient?
    use_grad = cfg.PARAMS['temp_use_local_gradient']
    igrad = None
    if use_grad:
        igrad = np.zeros(len(time)) * np.NaN
        for t, loct in enumerate(temp):
            slope, _, _, p_val, _ = stats.linregress(hgt.flatten(),
                                                     loct.flatten())
            igrad[t] = slope if (p_val < 0.01) else np.NaN

    gdir.write_monthly_climate_file(time,
                                    prcp[:, 1, 1],
                                    temp[:, 1, 1],
                                    hgt[1, 1],
                                    ref_lon[1],
                                    ref_lat[1],
                                    gradient=igrad,
                                    filesuffix=output_filesuffix,
                                    source=source)
Example #13
0
def process_ccsm_data(gdir, PI_path):
    """
        First attempt at a method to process the CCSM data into temperature/precip anomalies.

    """

    #Put this in the function def (see process_cesm_data)
    filesuffix=''

    if not (('climate_file' in cfg.PATHS) and
            os.path.exists(cfg.PATHS['climate_file'])):
        raise IOError('Custom climate file not found')

    #open dataset for precp use
    fpath = cfg.PATHS['climate_file']
    xr_ccsm = xr.open_dataset(fpath, decode_times=False)
    #open dataset for tmp use
    xr_ccsm_ts = xr.open_dataset(fpath)
    #repeating for pi
    xr_pi = xr.open_dataset(PI_path, decode_times=False)

    # selecting location
    lon = gdir.cenlon
    lat = gdir.cenlat
    
    #Setting the longitude to a 0-360 grid [I think...] "CESM files are in 0-360"
    if lon <= 0:
        lon += 360
    
    #"take the closest"
    #"TODO: consider GCM interpolation?"
    precp = xr_ccsm.PRECC.sel(lat=lat, lon=lon, method='nearest') + xr_ccsm.PRECL.sel(lat=lat, lon=lon, method='nearest')
    temp = xr_ccsm_ts.TS.sel(lat=lat, lon=lon, method='nearest')
    precp_pi = xr_pi.PRECC.sel(lat=lat, lon=lon, method='nearest') + xr_pi.PRECL.sel(lat=lat, lon=lon, method='nearest')
    temp_pi = xr_pi.TS.sel(lat=lat, lon=lon, method='nearest')

    #convert temp from K to C
    temp = temp - 273.15
    temp_pi = temp_pi - 273.15

    #Take anomaly for CCSM data (with preindustrial control)
    for i in range(12):
        temp.values[i] = temp.values[i] - temp_pi.values[i]
    for i in range(12):
        precp.values[i] = precp.values[i] - precp_pi.values[i]

    #from normal years to hydrological years
    sm = cfg.PARAMS['hydro_month_'+gdir.hemisphere]
    em = sm - 1 if (sm > 1) else 12
    y0 = int(pd.to_datetime(str(temp.time.values[0])).strftime('%Y'))
    y1 = int(pd.to_datetime(str(temp.time.values[-1])).strftime('%Y'))
    #time string for temp/precip (hydro years)
    time = pd.period_range('{}-{:02d}'.format(y0, sm),'{}-{:02d}'.format(y1, em), freq='M')

    #reorder single year of equil data & add correct time
    #calculate place in array to concat from (2 for ccsm data)
    conc_start = sm-2
    conc_end = sm-14

    temp_hydro = xr.concat([temp[conc_start:],temp[:conc_end]],dim="time")
    precp_hydro = xr.concat([precp[conc_start:],precp[:conc_end]],dim="time")
    temp_hydro['time'] = time
    precp_hydro['time'] = time

    temp = temp_hydro
    precp = precp_hydro

    # Workaround for https://github.com/pydata/xarray/issues/1565
    temp['month'] = ('time', time.month)
    precp['month'] = ('time', time.month)
    temp['year'] = ('time', time.year)
    temp['year'] = ('time', time.year)
    ny, r = divmod(len(temp.time), 12)
    assert r == 0

    #Convert m s-1 to mm mth-1 (for precp)
    ndays = np.tile(cfg.DAYS_IN_MONTH, y1-y0)
    precp = precp * ndays * (60 * 60 * 24 * 1000)

    #"Get CRU to apply the anomaly to"
    fpath = gdir.get_filepath('climate_monthly')
    ds_cru = xr.open_dataset(fpath)

    #"Add the climate anomaly to CRU clim"
    dscru = ds_cru.sel(time=slice('1961', '1990'))

    # temp
    loc_temp = dscru.temp.groupby('time.month').mean()
    #Oct-Sept format preserved
    ts_tmp = temp.groupby(temp.month) + loc_temp

    #for prcp
    loc_pre = dscru.prcp.groupby('time.month').mean()
    ts_pre = precp.groupby(precp.month) + loc_pre

    #load dates into save format
    fpath = cfg.PATHS['climate_file']
    dsindex = salem.GeoNetcdf(fpath, monthbegin=True)
    time1 = dsindex.variables['time']

    #weird recursive way to getting the dates in the correct format to save 
    #only nessisary for 1 year of data, in order to rearrange the months and 
    #get the correct year.
    time_array = [datetime(temp.time.year[i], temp.time.month[i],1) for i in range(12)]
    time_nums = netCDF4.date2num(time_array, time1.units, calendar='noleap')
    time2 = netCDF4.num2date(time_nums[:], time1.units, calendar='noleap')

    assert np.all(np.isfinite(ts_pre.values))
    assert np.all(np.isfinite(ts_tmp.values))

    #"back to -180 - 180"
    loc_lon = precp.lon if precp.lon <= 180 else precp.lon - 360

    #write to netcdf
    gdir.write_monthly_climate_file(time2, ts_pre.values, ts_tmp.values, 
                                    float(dscru.ref_hgt), loc_lon, 
                                    precp.lat.values, 
                                    time_unit=time1.units, 
                                    file_name='gcm_data', 
                                    filesuffix=filesuffix)

    #dsindex._nc.close()
    xr_ccsm.close()
    xr_ccsm_ts.close()
    xr_pi.close()
    ds_cru.close()