Beispiel #1
0
    def test_download_cru(self):

        tmp = cfg.PATHS['cru_dir']
        cfg.PATHS['cru_dir'] = os.path.join(TEST_DIR, 'cru_extract')

        of = utils.get_cru_file('tmp')
        self.assertTrue(os.path.exists(of))

        cfg.PATHS['cru_dir'] = tmp
Beispiel #2
0
    def test_download_cru(self):

        cfg.initialize()

        tmp = cfg.PATHS['cru_dir']
        cfg.PATHS['cru_dir'] = TEST_DIR

        of = utils.get_cru_file('tmp')
        self.assertTrue(os.path.exists(of))

        cfg.PATHS['cru_dir'] = tmp
# Set to True for operational runs
cfg.CONTINUE_ON_ERROR = False
cfg.PARAMS['auto_skip_task'] = False

# Test
cfg.PARAMS['mixed_min_shape'] = 0.003
cfg.PARAMS['default_parabolic_bedshape'] = 0.003
cfg.PARAMS['trapezoid_lambdas'] = 0.

# Don't use divides for now
cfg.set_divides_db()
cfg.set_intersects_db()

# Pre-download other files which will be needed later
_ = utils.get_cru_file(var='tmp')
_ = utils.get_cru_file(var='pre')

# Copy the precalibrated tstar file
# ---------------------------------

# Note that to be exact, this procedure can only be applied if the run
# parameters don't change between the calibration and the run.
# After testing, it appears that changing the 'border' parameter won't affect
# the results much (expectedly), so that it's ok to change it. All the rest
# (e.g. smoothing, dx, prcp factor...) should imply a re-calibration

mbf = 'https://dl.dropboxusercontent.com/u/20930277/ref_tstars_b60_prcpfac_25_defaults.csv'
mbf = utils.file_downloader(mbf)
shutil.copyfile(mbf, os.path.join(WORKING_DIR, 'ref_tstars.csv'))
cfg.PARAMS['min_mu_star'] = 0.0
cfg.PARAMS['inversion_fs'] = 5.7e-20
cfg.PARAMS['use_tar_shapefiles'] = False
cfg.PARAMS['use_intersects'] = True
cfg.PARAMS['use_compression'] = False
cfg.PARAMS['compress_climate_netcdf'] = False

# We use intersects
path = utils.get_rgi_intersects_region_file(rgi_region, version=rgi_version)
cfg.set_intersects_db(path)

# RGI file
rgidf = gpd.read_file(RGI_FILE)

# Pre-download other files which will be needed later
_ = utils.get_cru_file(var='tmp')
p = utils.get_cru_file(var='pre')
print('CRU file: ' + p)

# Run only for Lake Terminating and Marine Terminating
glac_type = [0]
keep_glactype = [(i not in glac_type) for i in rgidf.TermType]
rgidf = rgidf.iloc[keep_glactype]

# Run only glaciers that have a week connection or are
# not connected to the ice-sheet
connection = [2]
keep_connection = [(i not in connection) for i in rgidf.Connect]
rgidf = rgidf.iloc[keep_connection]

# Run glaciers without errors
Beispiel #5
0
def run_prepro_levels(rgi_version=None,
                      rgi_reg=None,
                      border=None,
                      output_folder='',
                      working_dir='',
                      dem_source='',
                      is_test=False,
                      demo=False,
                      test_rgidf=None,
                      test_intersects_file=None,
                      test_topofile=None,
                      test_crudir=None,
                      disable_mp=False,
                      timeout=0,
                      max_level=4,
                      logging_level='WORKFLOW'):
    """Does the actual job.

    Parameters
    ----------
    rgi_version : str
        the RGI version to use (defaults to cfg.PARAMS)
    rgi_reg : str
        the RGI region to process
    border : int
        the number of pixels at the maps border
    output_folder : str
        path to the output folder (where to put the preprocessed tar files)
    dem_source : str
        which DEM source to use: default, SOURCE_NAME or ALL
    working_dir : str
        path to the OGGM working directory
    is_test : bool
        to test on a couple of glaciers only!
    demo : bool
        to run the prepro for the list of demo glaciers
    test_rgidf : shapefile
        for testing purposes only
    test_intersects_file : shapefile
        for testing purposes only
    test_topofile : str
        for testing purposes only
    test_crudir : str
        for testing purposes only
    disable_mp : bool
        disable multiprocessing
    max_level : int
        the maximum pre-processing level before stopping
    logging_level : str
        the logging level to use (DEBUG, INFO, WARNING, WORKFLOW)
    """

    # TODO: temporarily silence Fiona deprecation warnings
    import warnings
    warnings.filterwarnings("ignore", category=DeprecationWarning)

    # Input check
    if max_level not in [1, 2, 3, 4]:
        raise InvalidParamsError('max_level should be one of [1, 2, 3, 4]')

    # Time
    start = time.time()

    def _time_log():
        # Log util
        m, s = divmod(time.time() - start, 60)
        h, m = divmod(m, 60)
        log.workflow('OGGM prepro_levels is done! Time needed: '
                     '{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)))

    # Initialize OGGM and set up the run parameters
    cfg.initialize(logging_level=logging_level)

    # Local paths
    utils.mkdir(working_dir)
    cfg.PATHS['working_dir'] = working_dir

    # Use multiprocessing?
    cfg.PARAMS['use_multiprocessing'] = not disable_mp

    # How many grid points around the glacier?
    # Make it large if you expect your glaciers to grow large
    cfg.PARAMS['border'] = border

    # Set to True for operational runs
    cfg.PARAMS['continue_on_error'] = True

    # Timeout
    cfg.PARAMS['task_timeout'] = timeout

    # For statistics
    climate_periods = [1920, 1960, 2000]

    if rgi_version is None:
        rgi_version = cfg.PARAMS['rgi_version']
    rgi_dir_name = 'RGI{}'.format(rgi_version)
    border_dir_name = 'b_{:03d}'.format(border)
    base_dir = os.path.join(output_folder, rgi_dir_name, border_dir_name)

    # Add a package version file
    utils.mkdir(base_dir)
    opath = os.path.join(base_dir, 'package_versions.txt')
    with open(opath, 'w') as vfile:
        vfile.write(utils.show_versions(logger=log))

    if demo:
        rgidf = utils.get_rgi_glacier_entities(cfg.DATA['demo_glaciers'].index)
    elif test_rgidf is None:
        # Get the RGI file
        rgidf = gpd.read_file(
            utils.get_rgi_region_file(rgi_reg, version=rgi_version))
        # We use intersects
        rgif = utils.get_rgi_intersects_region_file(rgi_reg,
                                                    version=rgi_version)
        cfg.set_intersects_db(rgif)
    else:
        rgidf = test_rgidf
        cfg.set_intersects_db(test_intersects_file)

    if is_test:
        # Just for fun
        rgidf = rgidf.sample(4)

    # Sort for more efficient parallel computing
    rgidf = rgidf.sort_values('Area', ascending=False)

    log.workflow('Starting prepro run for RGI reg: {} '
                 'and border: {}'.format(rgi_reg, border))
    log.workflow('Number of glaciers: {}'.format(len(rgidf)))

    # Input
    if test_topofile:
        cfg.PATHS['dem_file'] = test_topofile

    # L1 - initialize working directories
    # Which DEM source?
    if dem_source.upper() == 'ALL':
        # This is the complex one, just do the job an leave
        log.workflow('Running prepro on ALL sources')
        for i, s in enumerate(utils.DEM_SOURCES):
            rs = i == 0
            rgidf['DEM_SOURCE'] = s
            log.workflow('Running prepro on sources: {}'.format(s))
            gdirs = workflow.init_glacier_regions(rgidf, reset=rs, force=rs)
            workflow.execute_entity_task(_rename_dem_folder, gdirs, source=s)

        # Compress all in output directory
        l_base_dir = os.path.join(base_dir, 'L1')
        workflow.execute_entity_task(utils.gdir_to_tar,
                                     gdirs,
                                     delete=False,
                                     base_dir=l_base_dir)
        utils.base_dir_to_tar(l_base_dir)

        _time_log()
        return

    if dem_source:
        # Force a given source
        rgidf['DEM_SOURCE'] = dem_source.upper()

    # L1 - go
    gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True)

    # Glacier stats
    sum_dir = os.path.join(base_dir, 'L1', 'summary')
    utils.mkdir(sum_dir)
    opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg))
    utils.compile_glacier_statistics(gdirs, path=opath)

    # L1 OK - compress all in output directory
    l_base_dir = os.path.join(base_dir, 'L1')
    workflow.execute_entity_task(utils.gdir_to_tar,
                                 gdirs,
                                 delete=False,
                                 base_dir=l_base_dir)
    utils.base_dir_to_tar(l_base_dir)
    if max_level == 1:
        _time_log()
        return

    # L2 - Tasks
    # Pre-download other files just in case
    if test_crudir is None:
        _ = utils.get_cru_file(var='tmp')
        _ = utils.get_cru_file(var='pre')
    else:
        cfg.PATHS['cru_dir'] = test_crudir

    workflow.execute_entity_task(tasks.process_cru_data, gdirs)

    # Glacier stats
    sum_dir = os.path.join(base_dir, 'L2', 'summary')
    utils.mkdir(sum_dir)
    opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg))
    utils.compile_glacier_statistics(gdirs, path=opath)

    # L2 OK - compress all in output directory
    l_base_dir = os.path.join(base_dir, 'L2')
    workflow.execute_entity_task(utils.gdir_to_tar,
                                 gdirs,
                                 delete=False,
                                 base_dir=l_base_dir)
    utils.base_dir_to_tar(l_base_dir)
    if max_level == 2:
        _time_log()
        return

    # L3 - Tasks
    task_list = [
        tasks.glacier_masks, tasks.compute_centerlines,
        tasks.initialize_flowlines, tasks.compute_downstream_line,
        tasks.compute_downstream_bedshape, tasks.catchment_area,
        tasks.catchment_intersections, tasks.catchment_width_geom,
        tasks.catchment_width_correction, tasks.local_t_star,
        tasks.mu_star_calibration, tasks.prepare_for_inversion,
        tasks.mass_conservation_inversion, tasks.filter_inversion_output,
        tasks.init_present_time_glacier
    ]
    for task in task_list:
        workflow.execute_entity_task(task, gdirs)

    # Glacier stats
    sum_dir = os.path.join(base_dir, 'L3', 'summary')
    utils.mkdir(sum_dir)
    opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg))
    utils.compile_glacier_statistics(gdirs, path=opath)
    opath = os.path.join(sum_dir, 'climate_statistics_{}.csv'.format(rgi_reg))
    utils.compile_climate_statistics(gdirs,
                                     add_climate_period=climate_periods,
                                     path=opath)

    # L3 OK - compress all in output directory
    l_base_dir = os.path.join(base_dir, 'L3')
    workflow.execute_entity_task(utils.gdir_to_tar,
                                 gdirs,
                                 delete=False,
                                 base_dir=l_base_dir)
    utils.base_dir_to_tar(l_base_dir)
    if max_level == 3:
        _time_log()
        return

    # L4 - No tasks: add some stats for consistency and make the dirs small
    sum_dir = os.path.join(base_dir, 'L4', 'summary')
    utils.mkdir(sum_dir)
    opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg))
    utils.compile_glacier_statistics(gdirs, path=opath)

    # Copy mini data to new dir
    base_dir = os.path.join(base_dir, 'L4')
    mini_gdirs = workflow.execute_entity_task(tasks.copy_to_basedir,
                                              gdirs,
                                              base_dir=base_dir)

    # L4 OK - compress all in output directory
    workflow.execute_entity_task(utils.gdir_to_tar, mini_gdirs, delete=True)
    utils.base_dir_to_tar(base_dir)

    _time_log()
Beispiel #6
0
def process_cru_data(gdir):
    """Processes and writes the climate data for this glacier.

    Interpolates the CRU TS data to the high-resolution CL2 climatologies
    (provided with OGGM) and writes everything to a NetCDF file.
    """

    # read the climatology
    clfile = utils.get_cru_cl_file()
    ncclim = salem.GeoNetcdf(clfile)
    # and the TS data
    nc_ts_tmp = salem.GeoNetcdf(utils.get_cru_file('tmp'), monthbegin=True)
    nc_ts_pre = salem.GeoNetcdf(utils.get_cru_file('pre'), monthbegin=True)

    # set temporal subset for the ts data (hydro years)
    sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere]
    em = sm - 1 if (sm > 1) else 12
    yrs = nc_ts_pre.time.year
    y0, y1 = yrs[0], yrs[-1]
    nc_ts_tmp.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                         t1='{}-{:02d}-01'.format(y1, em))
    nc_ts_pre.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                         t1='{}-{:02d}-01'.format(y1, em))
    time = nc_ts_pre.time
    ny, r = divmod(len(time), 12)
    assert r == 0

    # gradient default params
    use_grad = cfg.PARAMS['temp_use_local_gradient']
    def_grad = cfg.PARAMS['temp_default_gradient']
    g_minmax = cfg.PARAMS['temp_local_gradient_bounds']

    lon = gdir.cenlon
    lat = gdir.cenlat

    # This is guaranteed to work because I prepared the file (I hope)
    ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

    # get climatology data
    loc_hgt = ncclim.get_vardata('elev')
    loc_tmp = ncclim.get_vardata('temp')
    loc_pre = ncclim.get_vardata('prcp')
    loc_lon = ncclim.get_vardata('lon')
    loc_lat = ncclim.get_vardata('lat')

    # see if the center is ok
    if not np.isfinite(loc_hgt[1, 1]):
        # take another candidate where finite
        isok = np.isfinite(loc_hgt)

        # wait: some areas are entirely NaNs, make the subset larger
        _margin = 1
        while not np.any(isok):
            _margin += 1
            ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=_margin)
            loc_hgt = ncclim.get_vardata('elev')
            isok = np.isfinite(loc_hgt)
        if _margin > 1:
            log.debug('(%s) I had to look up for far climate pixels: %s',
                      gdir.rgi_id, _margin)

        # Take the first candidate (doesn't matter which)
        lon, lat = ncclim.grid.ll_coordinates
        lon = lon[isok][0]
        lat = lat[isok][0]
        # Resubset
        ncclim.set_subset()
        ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
        loc_hgt = ncclim.get_vardata('elev')
        loc_tmp = ncclim.get_vardata('temp')
        loc_pre = ncclim.get_vardata('prcp')
        loc_lon = ncclim.get_vardata('lon')
        loc_lat = ncclim.get_vardata('lat')

    assert np.isfinite(loc_hgt[1, 1])
    isok = np.isfinite(loc_hgt)
    hgt_f = loc_hgt[isok].flatten()
    assert len(hgt_f) > 0.
    ts_grad = np.zeros(12) + def_grad
    if use_grad and len(hgt_f) >= 5:
        for i in range(12):
            loc_tmp_mth = loc_tmp[i, ...][isok].flatten()
            slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth)
            ts_grad[i] = slope if (p_val < 0.01) else def_grad
    # ... but dont exaggerate too much
    ts_grad = np.clip(ts_grad, g_minmax[0], g_minmax[1])
    # convert to timeserie and hydroyears
    ts_grad = ts_grad.tolist()
    ts_grad = ts_grad[em:] + ts_grad[0:em]
    ts_grad = np.asarray(ts_grad * ny)

    # maybe this will throw out of bounds warnings
    nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
    nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

    # compute monthly anomalies
    # of temp
    ts_tmp = nc_ts_tmp.get_vardata('tmp', as_xarray=True)
    ts_tmp_avg = ts_tmp.sel(time=slice('1961-01-01', '1990-12-01'))
    ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time')
    ts_tmp = ts_tmp.groupby('time.month') - ts_tmp_avg
    # of precip
    ts_pre = nc_ts_pre.get_vardata('pre', as_xarray=True)
    ts_pre_avg = ts_pre.sel(time=slice('1961-01-01', '1990-12-01'))
    ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time')
    ts_pre_ano = ts_pre.groupby('time.month') - ts_pre_avg
    # scaled anomalies is the default. Standard anomalies above
    # are used later for where ts_pre_avg == 0
    ts_pre = ts_pre.groupby('time.month') / ts_pre_avg

    # interpolate to HR grid
    if np.any(~np.isfinite(ts_tmp[:, 1, 1])):
        # Extreme case, middle pix is not valid
        # take any valid pix from the 3*3 (and hope there's one)
        found_it = False
        for idi in range(2):
            for idj in range(2):
                if np.all(np.isfinite(ts_tmp[:, idj, idi])):
                    ts_tmp[:, 1, 1] = ts_tmp[:, idj, idi]
                    ts_pre[:, 1, 1] = ts_pre[:, idj, idi]
                    ts_pre_ano[:, 1, 1] = ts_pre_ano[:, idj, idi]
                    found_it = True
        if not found_it:
            msg = '({}) there is no climate data'.format(gdir.rgi_id)
            raise RuntimeError(msg)
    elif np.any(~np.isfinite(ts_tmp)):
        # maybe the side is nan, but we can do nearest
        ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values,
                                              nc_ts_tmp.grid,
                                              interp='nearest')
        ts_pre = ncclim.grid.map_gridded_data(ts_pre.values,
                                              nc_ts_pre.grid,
                                              interp='nearest')
        ts_pre_ano = ncclim.grid.map_gridded_data(ts_pre_ano.values,
                                                  nc_ts_pre.grid,
                                                  interp='nearest')
    else:
        # We can do bilinear
        ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values,
                                              nc_ts_tmp.grid,
                                              interp='linear')
        ts_pre = ncclim.grid.map_gridded_data(ts_pre.values,
                                              nc_ts_pre.grid,
                                              interp='linear')
        ts_pre_ano = ncclim.grid.map_gridded_data(ts_pre_ano.values,
                                                  nc_ts_pre.grid,
                                                  interp='linear')

    # take the center pixel and add it to the CRU CL clim
    # for temp
    loc_tmp = xr.DataArray(loc_tmp[:, 1, 1],
                           dims=['month'],
                           coords={'month': ts_tmp_avg.month})
    ts_tmp = xr.DataArray(ts_tmp[:, 1, 1],
                          dims=['time'],
                          coords={'time': time})
    ts_tmp = ts_tmp.groupby('time.month') + loc_tmp
    # for prcp
    loc_pre = xr.DataArray(loc_pre[:, 1, 1],
                           dims=['month'],
                           coords={'month': ts_pre_avg.month})
    ts_pre = xr.DataArray(ts_pre[:, 1, 1],
                          dims=['time'],
                          coords={'time': time})
    ts_pre_ano = xr.DataArray(ts_pre_ano[:, 1, 1],
                              dims=['time'],
                              coords={'time': time})
    # scaled anomalies
    ts_pre = ts_pre.groupby('time.month') * loc_pre
    # standard anomalies
    ts_pre_ano = ts_pre_ano.groupby('time.month') + loc_pre
    # Correct infinite values with standard anomalies
    ts_pre.values = np.where(np.isfinite(ts_pre.values), ts_pre.values,
                             ts_pre_ano.values)
    # The last step might create negative values (unlikely). Clip them
    ts_pre.values = ts_pre.values.clip(0)

    # done
    loc_hgt = loc_hgt[1, 1]
    loc_lon = loc_lon[1]
    loc_lat = loc_lat[1]
    assert np.isfinite(loc_hgt)
    assert np.all(np.isfinite(ts_pre.values))
    assert np.all(np.isfinite(ts_tmp.values))
    assert np.all(np.isfinite(ts_grad))
    gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values,
                                    ts_grad, loc_hgt, loc_lon, loc_lat)
    ncclim._nc.close()
    nc_ts_tmp._nc.close()
    nc_ts_pre._nc.close()
    # metadata
    out = {
        'climate_source': 'CRU data',
        'hydro_yr_0': y0 + 1,
        'hydro_yr_1': y1
    }
    gdir.write_pickle(out, 'climate_info')
Beispiel #7
0
def _distribute_cru_style_nonparallel(gdirs):
    """More general solution for OGGM globally.

    It uses the CRU CL2 ten-minutes climatology as baseline
    (provided with OGGM)
    """

    # read the climatology
    clfile = utils.get_cru_cl_file()
    ncclim = salem.GeoNetcdf(clfile)
    # and the TS data
    nc_ts_tmp = salem.GeoNetcdf(utils.get_cru_file('tmp'), monthbegin=True)
    nc_ts_pre = salem.GeoNetcdf(utils.get_cru_file('pre'), monthbegin=True)

    # set temporal subset for the ts data (hydro years)
    nc_ts_tmp.set_period(t0='1901-10-01', t1='2014-09-01')
    nc_ts_pre.set_period(t0='1901-10-01', t1='2014-09-01')
    time = nc_ts_pre.time
    ny, r = divmod(len(time), 12)
    assert r == 0

    # gradient default params
    use_grad = cfg.PARAMS['temp_use_local_gradient']
    def_grad = cfg.PARAMS['temp_default_gradient']
    g_minmax = cfg.PARAMS['temp_local_gradient_bounds']
    prcp_scaling_factor = cfg.PARAMS['prcp_scaling_factor']

    for gdir in gdirs:
        log.info('%s: %s', gdir.rgi_id, 'distribute_cru_style')
        lon = gdir.cenlon
        lat = gdir.cenlat

        # This is guaranteed to work because I prepared the file (I hope)
        ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

        # get monthly gradient ...
        loc_hgt = ncclim.get_vardata('elev')
        loc_tmp = ncclim.get_vardata('temp')
        loc_pre = ncclim.get_vardata('prcp')
        isok = np.isfinite(loc_hgt)
        hgt_f = loc_hgt[isok].flatten()
        ts_grad = np.zeros(12) + def_grad
        if use_grad and len(hgt_f) >= 5:
            for i in range(12):
                loc_tmp_mth = loc_tmp[i, ...][isok].flatten()
                slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth)
                ts_grad[i] = slope if (p_val < 0.01) else def_grad
        # ... but dont exaggerate too much
        ts_grad = np.clip(ts_grad, g_minmax[0], g_minmax[1])
        # convert to timeserie and hydroyears
        ts_grad = ts_grad.tolist()
        ts_grad = ts_grad[9:] + ts_grad[0:9]
        ts_grad = np.asarray(ts_grad * ny)

        # maybe this will throw out of bounds warnings
        nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
        nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

        # compute monthly anomalies
        # of temp
        ts_tmp = nc_ts_tmp.get_vardata('tmp', as_xarray=True)
        ts_tmp_avg = ts_tmp.sel(time=slice('1961-01-01', '1990-12-01'))
        ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time')
        ts_tmp = ts_tmp.groupby('time.month') - ts_tmp_avg
        # of precip
        ts_pre = nc_ts_pre.get_vardata('pre', as_xarray=True)
        ts_pre_avg = ts_pre.sel(time=slice('1961-01-01', '1990-12-01'))
        ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time')
        ts_pre = ts_pre.groupby('time.month') - ts_pre_avg

        # interpolate to HR grid
        if np.any(~np.isfinite(ts_tmp[:, 1, 1])):
            # Extreme case, middle pix is not valid
            # take any valid pix from the 3*3 (and hope theres one)
            found_it = False
            for idi in range(2):
                for idj in range(2):
                    if np.all(np.isfinite(ts_tmp[:, idj, idi])):
                        ts_tmp[:, 1, 1] = ts_tmp[:, idj, idi]
                        ts_pre[:, 1, 1] = ts_pre[:, idj, idi]
                        found_it = True
            if not found_it:
                msg = '{}: OMG there is no climate data'.format(gdir.rgi_id)
                raise RuntimeError(msg)
        elif np.any(~np.isfinite(ts_tmp)):
            # maybe the side is nan, but we can do nearest
            ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values,
                                                  nc_ts_tmp.grid,
                                                  interp='nearest')
            ts_pre = ncclim.grid.map_gridded_data(ts_pre.values,
                                                  nc_ts_pre.grid,
                                                  interp='nearest')
        else:
            # We can do bilinear
            ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values,
                                                  nc_ts_tmp.grid,
                                                  interp='linear')
            ts_pre = ncclim.grid.map_gridded_data(ts_pre.values,
                                                  nc_ts_pre.grid,
                                                  interp='linear')

        # take the center pixel and add it to the CRU CL clim
        # for temp
        loc_tmp = xr.DataArray(loc_tmp[:, 1, 1],
                               dims=['month'],
                               coords={'month': ts_pre_avg.month})
        ts_tmp = xr.DataArray(ts_tmp[:, 1, 1],
                              dims=['time'],
                              coords={'time': time})
        ts_tmp = ts_tmp.groupby('time.month') + loc_tmp
        # for prcp
        loc_pre = xr.DataArray(loc_pre[:, 1, 1],
                               dims=['month'],
                               coords={'month': ts_pre_avg.month})
        ts_pre = xr.DataArray(ts_pre[:, 1, 1],
                              dims=['time'],
                              coords={'time': time})
        ts_pre = ts_pre.groupby('time.month') + loc_pre * prcp_scaling_factor

        # done
        loc_hgt = loc_hgt[1, 1]
        assert np.isfinite(loc_hgt)
        assert np.all(np.isfinite(ts_pre.values))
        assert np.all(np.isfinite(ts_tmp.values))
        assert np.all(np.isfinite(ts_grad))
        gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values,
                                        ts_grad, loc_hgt)
    def setup_cache(self):

        setattr(full_workflow.setup_cache, "timeout", 360)

        utils.mkdir(self.testdir, reset=True)
        self.cfg_init()

        # Pre-download other files which will be needed later
        utils.get_cru_cl_file()
        utils.get_cru_file(var='tmp')
        utils.get_cru_file(var='pre')

        # Get the RGI glaciers for the run.
        rgi_list = ['RGI60-01.10299', 'RGI60-11.00897', 'RGI60-18.02342']
        rgidf = utils.get_rgi_glacier_entities(rgi_list)

        # We use intersects
        db = utils.get_rgi_intersects_region_file(version='61',
                                                  rgi_ids=rgi_list)
        cfg.set_intersects_db(db)

        # Sort for more efficient parallel computing
        rgidf = rgidf.sort_values('Area', ascending=False)

        # Go - initialize working directories
        gdirs = workflow.init_glacier_regions(rgidf)

        # Preprocessing tasks
        task_list = [
            tasks.glacier_masks,
            tasks.compute_centerlines,
            tasks.initialize_flowlines,
            tasks.compute_downstream_line,
            tasks.compute_downstream_bedshape,
            tasks.catchment_area,
            tasks.catchment_intersections,
            tasks.catchment_width_geom,
            tasks.catchment_width_correction,
        ]
        for task in task_list:
            execute_entity_task(task, gdirs)

        # Climate tasks -- only data IO and tstar interpolation!
        execute_entity_task(tasks.process_cru_data, gdirs)
        execute_entity_task(tasks.local_mustar, gdirs)
        execute_entity_task(tasks.apparent_mb, gdirs)

        # Inversion tasks
        execute_entity_task(tasks.prepare_for_inversion, gdirs)
        # We use the default parameters for this run
        execute_entity_task(tasks.mass_conservation_inversion, gdirs)
        execute_entity_task(tasks.filter_inversion_output, gdirs)

        # Final preparation for the run
        execute_entity_task(tasks.init_present_time_glacier, gdirs)

        # Random climate representative for the tstar climate, without bias
        # In an ideal world this would imply that the glaciers remain stable,
        # but it doesn't have to be so
        execute_entity_task(tasks.run_constant_climate,
                            gdirs,
                            bias=0,
                            nyears=100,
                            output_filesuffix='_tstar')

        execute_entity_task(tasks.run_constant_climate,
                            gdirs,
                            y0=1990,
                            nyears=100,
                            output_filesuffix='_pd')

        # Compile output
        utils.glacier_characteristics(gdirs)
        utils.compile_run_output(gdirs, filesuffix='_tstar')
        utils.compile_run_output(gdirs, filesuffix='_pd')
        utils.compile_climate_input(gdirs)

        return gdirs
Beispiel #9
0
def run_benchmark(rgi_version=None, rgi_reg=None, border=None,
                  output_folder='', working_dir='', is_test=False,
                  test_rgidf=None, test_intersects_file=None,
                  test_topofile=None, test_crudir=None):
    """Does the actual job.

    Parameters
    ----------
    rgi_version : str
        the RGI version to use (defaults to cfg.PARAMS)
    rgi_reg : str
        the RGI region to process
    border : int
        the number of pixels at the maps border
    output_folder : str
        path to the output folder (where to put the preprocessed tar files)
    working_dir : str
        path to the OGGM working directory
    is_test : bool
        to test on a couple of glaciers only!
    test_rgidf : shapefile
        for testing purposes only
    test_intersects_file : shapefile
        for testing purposes only
    test_topofile : str
        for testing purposes only
    test_crudir : str
        for testing purposes only
    """

    # TODO: temporarily silence Fiona deprecation warnings
    import warnings
    warnings.filterwarnings("ignore", category=DeprecationWarning)

    # Module logger
    log = logging.getLogger(__name__)

    # Initialize OGGM and set up the run parameters
    cfg.initialize(logging_level='WORKFLOW')

    # Local paths
    utils.mkdir(working_dir)
    cfg.PATHS['working_dir'] = working_dir

    # Use multiprocessing?
    cfg.PARAMS['use_multiprocessing'] = True

    # How many grid points around the glacier?
    # Make it large if you expect your glaciers to grow large
    cfg.PARAMS['border'] = border

    # Set to True for operational runs
    cfg.PARAMS['continue_on_error'] = True

    # For statistics
    odf = pd.DataFrame()

    if rgi_version is None:
        rgi_version = cfg.PARAMS['rgi_version']
    base_dir = os.path.join(output_folder)

    # Add a package version file
    utils.mkdir(base_dir)
    opath = os.path.join(base_dir, 'package_versions.txt')
    with open(opath, 'w') as vfile:
        vfile.write(utils.show_versions(logger=log))

    # Read RGI
    start = time.time()
    if test_rgidf is None:
        # Get the RGI file
        rgidf = gpd.read_file(utils.get_rgi_region_file(rgi_reg,
                                                        version=rgi_version))
        # We use intersects
        rgif = utils.get_rgi_intersects_region_file(rgi_reg,
                                                    version=rgi_version)
        cfg.set_intersects_db(rgif)
    else:
        rgidf = test_rgidf
        cfg.set_intersects_db(test_intersects_file)

    if is_test:
        # Just for fun
        rgidf = rgidf.sample(2)
    _add_time_to_df(odf, 'Read RGI', time.time()-start)

    # Sort for more efficient parallel computing
    rgidf = rgidf.sort_values('Area', ascending=False)

    log.workflow('Starting prepro run for RGI reg: {} '
                 'and border: {}'.format(rgi_reg, border))
    log.workflow('Number of glaciers: {}'.format(len(rgidf)))

    # Input
    if test_topofile:
        cfg.PATHS['dem_file'] = test_topofile

    # Initialize working directories
    start = time.time()
    gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True)
    _add_time_to_df(odf, 'init_glacier_regions', time.time()-start)

    # Pre-download other files just in case
    if test_crudir is None:
        _ = utils.get_cru_file(var='tmp')
        _ = utils.get_cru_file(var='pre')
    else:
        cfg.PATHS['cru_dir'] = test_crudir

    # Tasks
    task_list = [
        tasks.process_cru_data,
        tasks.glacier_masks,
        tasks.compute_centerlines,
        tasks.initialize_flowlines,
        tasks.compute_downstream_line,
        tasks.compute_downstream_bedshape,
        tasks.catchment_area,
        tasks.catchment_intersections,
        tasks.catchment_width_geom,
        tasks.catchment_width_correction,
        tasks.local_t_star,
        tasks.mu_star_calibration,
        tasks.prepare_for_inversion,
        tasks.mass_conservation_inversion,
        tasks.filter_inversion_output,
        tasks.init_present_time_glacier,
    ]
    for task in task_list:
        start = time.time()
        workflow.execute_entity_task(task, gdirs)
        _add_time_to_df(odf, task.__name__, time.time()-start)

    # Runs
    start = time.time()
    workflow.execute_entity_task(tasks.run_random_climate, gdirs,
                                 nyears=250, bias=0, seed=0,
                                 output_filesuffix='_tstar')
    _add_time_to_df(odf, 'run_random_climate_tstar_250', time.time()-start)

    start = time.time()
    workflow.execute_entity_task(tasks.run_random_climate, gdirs,
                                 nyears=250, y0=1995, seed=0,
                                 output_filesuffix='_commit')
    _add_time_to_df(odf, 'run_random_climate_commit_250', time.time()-start)

    # Compile results
    start = time.time()
    utils.compile_glacier_statistics(gdirs)
    _add_time_to_df(odf, 'compile_glacier_statistics', time.time()-start)

    start = time.time()
    utils.compile_climate_statistics(gdirs,
                                     add_climate_period=[1920, 1960, 2000])
    _add_time_to_df(odf, 'compile_climate_statistics', time.time()-start)

    start = time.time()
    utils.compile_run_output(gdirs, filesuffix='_tstar')
    _add_time_to_df(odf, 'compile_run_output_tstar', time.time()-start)

    start = time.time()
    utils.compile_run_output(gdirs, filesuffix='_commit')
    _add_time_to_df(odf, 'compile_run_output_commit', time.time()-start)

    # Log
    opath = os.path.join(base_dir, 'benchmarks_b{:03d}.csv'.format(border))
    odf.index.name = 'Task'
    odf.to_csv(opath)
    log.workflow('OGGM benchmarks is done!')
Beispiel #10
0
ids_with_mb = pd.read_csv(flink)['RGI_ID'].values

# get some tw-glaciers that we want to test inside alaska region, also that are
# inside GlathiDa
keep_ids = [
    'RGI50-01.10689', 'RGI50-01.20791', 'RGI50-01.00037', 'RGI50-01.10402',
    'RGI50-01.22193', 'RGI50-01.22699'
]
keep_indexes = [((i in keep_ids) or (i in ids_with_mb)) for i in rgidf.RGIID]
rgidf = rgidf.iloc[keep_indexes]

log.info('Number of glaciers: {}'.format(len(rgidf)))

# Download other files if needed
_ = utils.get_cru_file(var='tmp')

# Go - initialize working directories
# gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True)
gdirs = workflow.init_glacier_regions(rgidf)

# Pre-pro tasks
task_list = [
    tasks.glacier_masks, tasks.compute_centerlines,
    tasks.compute_downstream_lines, tasks.catchment_area,
    tasks.initialize_flowlines, tasks.catchment_width_geom,
    tasks.catchment_width_correction
]
if RUN_GIS_PREPRO:
    for task in task_list:
        execute_entity_task(task, gdirs)
Beispiel #11
0
def run_benchmark(rgi_version=None,
                  rgi_reg=None,
                  border=None,
                  output_folder='',
                  working_dir='',
                  is_test=False,
                  test_rgidf=None,
                  test_intersects_file=None,
                  test_topofile=None,
                  test_crudir=None):
    """Does the actual job.

    Parameters
    ----------
    rgi_version : str
        the RGI version to use (defaults to cfg.PARAMS)
    rgi_reg : str
        the RGI region to process
    border : int
        the number of pixels at the maps border
    output_folder : str
        path to the output folder (where to put the preprocessed tar files)
    working_dir : str
        path to the OGGM working directory
    is_test : bool
        to test on a couple of glaciers only!
    test_rgidf : shapefile
        for testing purposes only
    test_intersects_file : shapefile
        for testing purposes only
    test_topofile : str
        for testing purposes only
    test_crudir : str
        for testing purposes only
    """

    # TODO: temporarily silence Fiona deprecation warnings
    import warnings
    warnings.filterwarnings("ignore", category=DeprecationWarning)

    # Module logger
    log = logging.getLogger(__name__)

    # Initialize OGGM and set up the run parameters
    cfg.initialize(logging_level='WORKFLOW')

    # Local paths
    utils.mkdir(working_dir)
    cfg.PATHS['working_dir'] = working_dir

    # Use multiprocessing?
    cfg.PARAMS['use_multiprocessing'] = True

    # How many grid points around the glacier?
    # Make it large if you expect your glaciers to grow large
    cfg.PARAMS['border'] = border

    # Set to True for operational runs
    cfg.PARAMS['continue_on_error'] = True

    # For statistics
    odf = pd.DataFrame()

    if rgi_version is None:
        rgi_version = cfg.PARAMS['rgi_version']
    base_dir = os.path.join(output_folder)

    # Add a package version file
    utils.mkdir(base_dir)
    opath = os.path.join(base_dir, 'package_versions.txt')
    with open(opath, 'w') as vfile:
        vfile.write(utils.show_versions(logger=log))

    # Read RGI
    start = time.time()
    if test_rgidf is None:
        # Get the RGI file
        rgidf = gpd.read_file(
            utils.get_rgi_region_file(rgi_reg, version=rgi_version))
        # We use intersects
        rgif = utils.get_rgi_intersects_region_file(rgi_reg,
                                                    version=rgi_version)
        cfg.set_intersects_db(rgif)
    else:
        rgidf = test_rgidf
        cfg.set_intersects_db(test_intersects_file)

    if is_test:
        # Just for fun
        rgidf = rgidf.sample(2)
    _add_time_to_df(odf, 'Read RGI', time.time() - start)

    # Sort for more efficient parallel computing
    rgidf = rgidf.sort_values('Area', ascending=False)

    log.workflow('Starting prepro run for RGI reg: {} '
                 'and border: {}'.format(rgi_reg, border))
    log.workflow('Number of glaciers: {}'.format(len(rgidf)))

    # Input
    if test_topofile:
        cfg.PATHS['dem_file'] = test_topofile

    # Initialize working directories
    start = time.time()
    gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True)
    _add_time_to_df(odf, 'init_glacier_regions', time.time() - start)

    # Pre-download other files just in case
    if test_crudir is None:
        _ = utils.get_cru_file(var='tmp')
        _ = utils.get_cru_file(var='pre')
    else:
        cfg.PATHS['cru_dir'] = test_crudir

    # Tasks
    task_list = [
        tasks.process_cru_data,
        tasks.glacier_masks,
        tasks.compute_centerlines,
        tasks.initialize_flowlines,
        tasks.compute_downstream_line,
        tasks.compute_downstream_bedshape,
        tasks.catchment_area,
        tasks.catchment_intersections,
        tasks.catchment_width_geom,
        tasks.catchment_width_correction,
        tasks.local_t_star,
        tasks.mu_star_calibration,
        tasks.prepare_for_inversion,
        tasks.mass_conservation_inversion,
        tasks.filter_inversion_output,
        tasks.init_present_time_glacier,
    ]
    for task in task_list:
        start = time.time()
        workflow.execute_entity_task(task, gdirs)
        _add_time_to_df(odf, task.__name__, time.time() - start)

    # Runs
    start = time.time()
    workflow.execute_entity_task(tasks.run_random_climate,
                                 gdirs,
                                 nyears=250,
                                 bias=0,
                                 seed=0,
                                 output_filesuffix='_tstar')
    _add_time_to_df(odf, 'run_random_climate_tstar_250', time.time() - start)

    start = time.time()
    workflow.execute_entity_task(tasks.run_random_climate,
                                 gdirs,
                                 nyears=250,
                                 y0=1995,
                                 seed=0,
                                 output_filesuffix='_commit')
    _add_time_to_df(odf, 'run_random_climate_commit_250', time.time() - start)

    # Compile results
    start = time.time()
    utils.compile_glacier_statistics(gdirs)
    _add_time_to_df(odf, 'compile_glacier_statistics', time.time() - start)

    start = time.time()
    utils.compile_climate_statistics(gdirs,
                                     add_climate_period=[1920, 1960, 2000])
    _add_time_to_df(odf, 'compile_climate_statistics', time.time() - start)

    start = time.time()
    utils.compile_run_output(gdirs, filesuffix='_tstar')
    _add_time_to_df(odf, 'compile_run_output_tstar', time.time() - start)

    start = time.time()
    utils.compile_run_output(gdirs, filesuffix='_commit')
    _add_time_to_df(odf, 'compile_run_output_commit', time.time() - start)

    # Log
    opath = os.path.join(base_dir, 'benchmarks_b{:03d}.csv'.format(border))
    odf.index.name = 'Task'
    odf.to_csv(opath)
    log.workflow('OGGM benchmarks is done!')
Beispiel #12
0
def _distribute_cru_style_nonparallel(gdirs):
    """More general solution for OGGM globally.

    It uses the CRU CL2 ten-minutes climatology as baseline
    (provided with OGGM)
    """

    # read the climatology
    clfile = utils.get_cru_cl_file()
    ncclim = salem.GeoNetcdf(clfile)
    # and the TS data
    nc_ts_tmp = salem.GeoNetcdf(utils.get_cru_file('tmp'), monthbegin=True)
    nc_ts_pre = salem.GeoNetcdf(utils.get_cru_file('pre'), monthbegin=True)

    # set temporal subset for the ts data (hydro years)
    nc_ts_tmp.set_period(t0='1901-10-01', t1='2014-09-01')
    nc_ts_pre.set_period(t0='1901-10-01', t1='2014-09-01')
    time = nc_ts_pre.time
    ny, r = divmod(len(time), 12)
    assert r == 0

    # gradient default params
    use_grad = cfg.PARAMS['temp_use_local_gradient']
    def_grad = cfg.PARAMS['temp_default_gradient']
    g_minmax = cfg.PARAMS['temp_local_gradient_bounds']
    prcp_scaling_factor = cfg.PARAMS['prcp_scaling_factor']

    for gdir in gdirs:
        log.info('%s: %s', gdir.rgi_id, 'distribute_cru_style')
        lon = gdir.cenlon
        lat = gdir.cenlat

        # This is guaranteed to work because I prepared the file (I hope)
        ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

        # get monthly gradient ...
        loc_hgt = ncclim.get_vardata('elev')
        loc_tmp = ncclim.get_vardata('temp')
        loc_pre = ncclim.get_vardata('prcp')
        isok = np.isfinite(loc_hgt)
        hgt_f = loc_hgt[isok].flatten()
        ts_grad = np.zeros(12) + def_grad
        if use_grad and len(hgt_f) >= 5:
            for i in range(12):
                loc_tmp_mth = loc_tmp[i, ...][isok].flatten()
                slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth)
                ts_grad[i] = slope if (p_val < 0.01) else def_grad
        # ... but dont exaggerate too much
        ts_grad = np.clip(ts_grad, g_minmax[0], g_minmax[1])
        # convert to timeserie and hydroyears
        ts_grad = ts_grad.tolist()
        ts_grad = ts_grad[9:] + ts_grad[0:9]
        ts_grad = np.asarray(ts_grad * ny)

        # maybe this will throw out of bounds warnings
        nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
        nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

        # compute monthly anomalies
        # of temp
        ts_tmp = nc_ts_tmp.get_vardata('tmp', as_xarray=True)
        ts_tmp_avg = ts_tmp.sel(time=slice('1961-01-01', '1990-12-01'))
        ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time')
        ts_tmp = ts_tmp.groupby('time.month') - ts_tmp_avg
        # of precip
        ts_pre = nc_ts_pre.get_vardata('pre', as_xarray=True)
        ts_pre_avg = ts_pre.sel(time=slice('1961-01-01', '1990-12-01'))
        ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time')
        ts_pre = ts_pre.groupby('time.month') - ts_pre_avg

        # interpolate to HR grid
        if np.any(~np.isfinite(ts_tmp[:, 1, 1])):
            # Extreme case, middle pix is not valid
            # take any valid pix from the 3*3 (and hope theres one)
            found_it = False
            for idi in range(2):
                for idj in range(2):
                    if np.all(np.isfinite(ts_tmp[:, idj, idi])):
                        ts_tmp[:, 1, 1] = ts_tmp[:, idj, idi]
                        ts_pre[:, 1, 1] = ts_pre[:, idj, idi]
                        found_it = True
            if not found_it:
                msg = '{}: OMG there is no climate data'.format(gdir.rgi_id)
                raise RuntimeError(msg)
        elif np.any(~np.isfinite(ts_tmp)):
            # maybe the side is nan, but we can do nearest
            ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid,
                                                   interp='nearest')
            ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid,
                                                   interp='nearest')
        else:
            # We can do bilinear
            ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid,
                                                   interp='linear')
            ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid,
                                                   interp='linear')

        # take the center pixel and add it to the CRU CL clim
        # for temp
        loc_tmp = xr.DataArray(loc_tmp[:, 1, 1], dims=['month'],
                               coords={'month':ts_pre_avg.month})
        ts_tmp = xr.DataArray(ts_tmp[:, 1, 1], dims=['time'],
                               coords={'time':time})
        ts_tmp = ts_tmp.groupby('time.month') + loc_tmp
        # for prcp
        loc_pre = xr.DataArray(loc_pre[:, 1, 1], dims=['month'],
                               coords={'month':ts_pre_avg.month})
        ts_pre = xr.DataArray(ts_pre[:, 1, 1], dims=['time'],
                               coords={'time':time})
        ts_pre = ts_pre.groupby('time.month') + loc_pre * prcp_scaling_factor

        # done
        loc_hgt = loc_hgt[1, 1]
        assert np.isfinite(loc_hgt)
        assert np.all(np.isfinite(ts_pre.values))
        assert np.all(np.isfinite(ts_tmp.values))
        assert np.all(np.isfinite(ts_grad))
        gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values,
                                        ts_grad, loc_hgt)
Beispiel #13
0
def run_prepro_levels(rgi_version=None, rgi_reg=None, border=None,
                      output_folder='', working_dir='', is_test=False,
                      demo=False, test_rgidf=None, test_intersects_file=None,
                      test_topofile=None, test_crudir=None):
    """Does the actual job.

    Parameters
    ----------
    rgi_version : str
        the RGI version to use (defaults to cfg.PARAMS)
    rgi_reg : str
        the RGI region to process
    border : int
        the number of pixels at the maps border
    output_folder : str
        path to the output folder (where to put the preprocessed tar files)
    working_dir : str
        path to the OGGM working directory
    is_test : bool
        to test on a couple of glaciers only!
    demo : bool
        to run the prepro for the list of demo glaciers
    test_rgidf : shapefile
        for testing purposes only
    test_intersects_file : shapefile
        for testing purposes only
    test_topofile : str
        for testing purposes only
    test_crudir : str
        for testing purposes only
    """

    # TODO: temporarily silence Fiona deprecation warnings
    import warnings
    warnings.filterwarnings("ignore", category=DeprecationWarning)

    # Module logger
    log = logging.getLogger(__name__)

    # Time
    start = time.time()

    # Initialize OGGM and set up the run parameters
    cfg.initialize(logging_level='WORKFLOW')

    # Local paths
    utils.mkdir(working_dir)
    cfg.PATHS['working_dir'] = working_dir

    # Use multiprocessing?
    cfg.PARAMS['use_multiprocessing'] = True

    # How many grid points around the glacier?
    # Make it large if you expect your glaciers to grow large
    cfg.PARAMS['border'] = border

    # Set to True for operational runs
    cfg.PARAMS['continue_on_error'] = True

    # For statistics
    climate_periods = [1920, 1960, 2000]

    if rgi_version is None:
        rgi_version = cfg.PARAMS['rgi_version']
    rgi_dir_name = 'RGI{}'.format(rgi_version)
    border_dir_name = 'b_{:03d}'.format(border)
    base_dir = os.path.join(output_folder, rgi_dir_name, border_dir_name)

    # Add a package version file
    utils.mkdir(base_dir)
    opath = os.path.join(base_dir, 'package_versions.txt')
    with open(opath, 'w') as vfile:
        vfile.write(utils.show_versions(logger=log))

    if demo:
        rgidf = utils.get_rgi_glacier_entities(cfg.DEMO_GLACIERS.index)
    elif test_rgidf is None:
        # Get the RGI file
        rgidf = gpd.read_file(utils.get_rgi_region_file(rgi_reg,
                                                        version=rgi_version))
        # We use intersects
        rgif = utils.get_rgi_intersects_region_file(rgi_reg,
                                                    version=rgi_version)
        cfg.set_intersects_db(rgif)
    else:
        rgidf = test_rgidf
        cfg.set_intersects_db(test_intersects_file)

    if is_test:
        # Just for fun
        rgidf = rgidf.sample(4)

    # Sort for more efficient parallel computing
    rgidf = rgidf.sort_values('Area', ascending=False)

    log.workflow('Starting prepro run for RGI reg: {} '
                 'and border: {}'.format(rgi_reg, border))
    log.workflow('Number of glaciers: {}'.format(len(rgidf)))

    # Input
    if test_topofile:
        cfg.PATHS['dem_file'] = test_topofile

    # L1 - initialize working directories
    gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True)

    # Glacier stats
    sum_dir = os.path.join(base_dir, 'L1', 'summary')
    utils.mkdir(sum_dir)
    opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg))
    utils.compile_glacier_statistics(gdirs, path=opath)

    # L1 OK - compress all in output directory
    l_base_dir = os.path.join(base_dir, 'L1')
    workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False,
                                 base_dir=l_base_dir)
    utils.base_dir_to_tar(l_base_dir)

    # L2 - Tasks
    # Pre-download other files just in case
    if test_crudir is None:
        _ = utils.get_cru_file(var='tmp')
        _ = utils.get_cru_file(var='pre')
    else:
        cfg.PATHS['cru_dir'] = test_crudir

    workflow.execute_entity_task(tasks.process_cru_data, gdirs)

    # Glacier stats
    sum_dir = os.path.join(base_dir, 'L2', 'summary')
    utils.mkdir(sum_dir)
    opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg))
    utils.compile_glacier_statistics(gdirs, path=opath)

    # L2 OK - compress all in output directory
    l_base_dir = os.path.join(base_dir, 'L2')
    workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False,
                                 base_dir=l_base_dir)
    utils.base_dir_to_tar(l_base_dir)

    # L3 - Tasks
    task_list = [
        tasks.glacier_masks,
        tasks.compute_centerlines,
        tasks.initialize_flowlines,
        tasks.compute_downstream_line,
        tasks.compute_downstream_bedshape,
        tasks.catchment_area,
        tasks.catchment_intersections,
        tasks.catchment_width_geom,
        tasks.catchment_width_correction,
        tasks.local_t_star,
        tasks.mu_star_calibration,
        tasks.prepare_for_inversion,
        tasks.mass_conservation_inversion,
        tasks.filter_inversion_output,
    ]
    for task in task_list:
        workflow.execute_entity_task(task, gdirs)

    # Glacier stats
    sum_dir = os.path.join(base_dir, 'L3', 'summary')
    utils.mkdir(sum_dir)
    opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg))
    utils.compile_glacier_statistics(gdirs, path=opath)
    opath = os.path.join(sum_dir, 'climate_statistics_{}.csv'.format(rgi_reg))
    utils.compile_climate_statistics(gdirs, add_climate_period=climate_periods,
                                     path=opath)

    # L3 OK - compress all in output directory
    l_base_dir = os.path.join(base_dir, 'L3')
    workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False,
                                 base_dir=l_base_dir)
    utils.base_dir_to_tar(l_base_dir)

    # L4 - Tasks
    workflow.execute_entity_task(tasks.init_present_time_glacier, gdirs)

    # Glacier stats
    sum_dir = os.path.join(base_dir, 'L4', 'summary')
    utils.mkdir(sum_dir)
    opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg))
    utils.compile_glacier_statistics(gdirs, path=opath)

    # Copy mini data to new dir
    base_dir = os.path.join(base_dir, 'L4')
    mini_gdirs = workflow.execute_entity_task(tasks.copy_to_basedir, gdirs,
                                              base_dir=base_dir)

    # L4 OK - compress all in output directory
    workflow.execute_entity_task(utils.gdir_to_tar, mini_gdirs, delete=True)
    utils.base_dir_to_tar(base_dir)

    # Log
    m, s = divmod(time.time() - start, 60)
    h, m = divmod(m, 60)
    log.workflow('OGGM prepro_levels is done! Time needed: '
                 '{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)))
Beispiel #14
0
def initialization_selection():
    # -------------
    # Initialization
    # -------------
    cfg.initialize()

    # working directories
    cfg.PATHS['working_dir'] = mbcfg.PATHS['working_dir']

    cfg.PATHS['rgi_version'] = mbcfg.PARAMS['rgi_version']

    # We are running the calibration ourselves
    cfg.PARAMS['run_mb_calibration'] = True

    # No need for intersects since this has an effect on the inversion only
    cfg.PARAMS['use_intersects'] = False

    # Use multiprocessing?
    cfg.PARAMS['use_multiprocessing'] = True

    # Set to True for operational runs
    # maybe also here?
    cfg.PARAMS['continue_on_error'] = False

    # set negative flux filtering to false. should be standard soon
    cfg.PARAMS['filter_for_neg_flux'] = False

    # Pre-download other files which will be needed later
    _ = utils.get_cru_file(var='tmp')
    _ = utils.get_cru_file(var='pre')
    rgi_dir = utils.get_rgi_dir(version=cfg.PATHS['rgi_version'])

    # Get the reference glacier ids (they are different for each RGI version)
    df, _ = utils.get_wgms_files()
    rids = df['RGI{}0_ID'.format(cfg.PATHS['rgi_version'])]

    # Make a new dataframe with those (this takes a while)
    rgidf = []
    for reg in df['RGI_REG'].unique():
        if reg == '19':
            continue  # we have no climate data in Antarctica
        if mbcfg.PARAMS['region'] is not None\
                and reg != mbcfg.PARAMS['region']:
            continue

        fn = '*' + reg + '_rgi{}0_*.shp'.format(cfg.PATHS['rgi_version'])
        fs = list(sorted(glob(os.path.join(rgi_dir, '*', fn))))[0]
        sh = gpd.read_file(fs)
        rgidf.append(sh.loc[sh.RGIId.isin(rids)])
    rgidf = pd.concat(rgidf)
    rgidf.crs = sh.crs  # for geolocalisation

    # reduce Europe to Histalp area (exclude Pyrenees, etc...)
    if mbcfg.PARAMS['histalp']:
        rgidf = rgidf.loc[(rgidf.CenLon >= 4) & (rgidf.CenLon < 20) &
                          (rgidf.CenLat >= 43) & (rgidf.CenLat < 47)]

    # We have to check which of them actually have enough mb data.
    # Let OGGM do it:
    gdirs = workflow.init_glacier_regions(rgidf)
    # We need to know which period we have data for

    if mbcfg.PARAMS['histalp']:
        cfg.PATHS['climate_file'] = mbcfg.PATHS['histalpfile']
        execute_entity_task(tasks.process_custom_climate_data, gdirs)
    else:
        execute_entity_task(tasks.process_cru_data, gdirs, print_log=False)

    gdirs = utils.get_ref_mb_glaciers(gdirs)
    # Keep only these
    rgidf = rgidf.loc[rgidf.RGIId.isin([g.rgi_id for g in gdirs])]

    # Save
    rgidf.to_file(os.path.join(cfg.PATHS['working_dir'],
                               'mb_ref_glaciers.shp'))

    # Sort for more efficient parallel computing
    rgidf = rgidf.sort_values('Area', ascending=False)

    # Go - initialize working directories
    gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True)

    return gdirs
Beispiel #15
0
def process_cru_data(gdir):
    """Processes and writes the climate data for this glacier.

    Interpolates the CRU TS data to the high-resolution CL2 climatologies
    (provided with OGGM) and writes everything to a NetCDF file.
    """

    # read the climatology
    clfile = utils.get_cru_cl_file()
    ncclim = salem.GeoNetcdf(clfile)
    # and the TS data
    nc_ts_tmp = salem.GeoNetcdf(utils.get_cru_file('tmp'), monthbegin=True)
    nc_ts_pre = salem.GeoNetcdf(utils.get_cru_file('pre'), monthbegin=True)

    # set temporal subset for the ts data (hydro years)
    yrs = nc_ts_pre.time.year
    y0, y1 = yrs[0], yrs[-1]
    nc_ts_tmp.set_period(t0='{}-10-01'.format(y0), t1='{}-09-01'.format(y1))
    nc_ts_pre.set_period(t0='{}-10-01'.format(y0), t1='{}-09-01'.format(y1))
    time = nc_ts_pre.time
    ny, r = divmod(len(time), 12)
    assert r == 0

    # gradient default params
    use_grad = cfg.PARAMS['temp_use_local_gradient']
    def_grad = cfg.PARAMS['temp_default_gradient']
    g_minmax = cfg.PARAMS['temp_local_gradient_bounds']

    lon = gdir.cenlon
    lat = gdir.cenlat

    # This is guaranteed to work because I prepared the file (I hope)
    ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

    # get climatology data
    loc_hgt = ncclim.get_vardata('elev')
    loc_tmp = ncclim.get_vardata('temp')
    loc_pre = ncclim.get_vardata('prcp')
    loc_lon = ncclim.get_vardata('lon')
    loc_lat = ncclim.get_vardata('lat')

    # see if the center is ok
    if not np.isfinite(loc_hgt[1, 1]):
        # take another candidate where finite
        isok = np.isfinite(loc_hgt)

        # wait: some areas are entirely NaNs, make the subset larger
        _margin = 1
        while not np.any(isok):
            _margin += 1
            ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=_margin)
            loc_hgt = ncclim.get_vardata('elev')
            isok = np.isfinite(loc_hgt)
        if _margin > 1:
            log.debug('%s: I had to look up for far climate pixels: %s',
                      gdir.rgi_id, _margin)

        # Take the first candidate (doesn't matter which)
        lon, lat = ncclim.grid.ll_coordinates
        lon = lon[isok][0]
        lat = lat[isok][0]
        # Resubset
        ncclim.set_subset()
        ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
        loc_hgt = ncclim.get_vardata('elev')
        loc_tmp = ncclim.get_vardata('temp')
        loc_pre = ncclim.get_vardata('prcp')
        loc_lon = ncclim.get_vardata('lon')
        loc_lat = ncclim.get_vardata('lat')

    assert np.isfinite(loc_hgt[1, 1])
    isok = np.isfinite(loc_hgt)
    hgt_f = loc_hgt[isok].flatten()
    assert len(hgt_f) > 0.
    ts_grad = np.zeros(12) + def_grad
    if use_grad and len(hgt_f) >= 5:
        for i in range(12):
            loc_tmp_mth = loc_tmp[i, ...][isok].flatten()
            slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth)
            ts_grad[i] = slope if (p_val < 0.01) else def_grad
    # ... but dont exaggerate too much
    ts_grad = np.clip(ts_grad, g_minmax[0], g_minmax[1])
    # convert to timeserie and hydroyears
    ts_grad = ts_grad.tolist()
    ts_grad = ts_grad[9:] + ts_grad[0:9]
    ts_grad = np.asarray(ts_grad * ny)

    # maybe this will throw out of bounds warnings
    nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
    nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

    # compute monthly anomalies
    # of temp
    ts_tmp = nc_ts_tmp.get_vardata('tmp', as_xarray=True)
    ts_tmp_avg = ts_tmp.sel(time=slice('1961-01-01', '1990-12-01'))
    ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time')
    ts_tmp = ts_tmp.groupby('time.month') - ts_tmp_avg
    # of precip
    ts_pre = nc_ts_pre.get_vardata('pre', as_xarray=True)
    ts_pre_avg = ts_pre.sel(time=slice('1961-01-01', '1990-12-01'))
    ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time')
    ts_pre = ts_pre.groupby('time.month') - ts_pre_avg

    # interpolate to HR grid
    if np.any(~np.isfinite(ts_tmp[:, 1, 1])):
        # Extreme case, middle pix is not valid
        # take any valid pix from the 3*3 (and hope there's one)
        found_it = False
        for idi in range(2):
            for idj in range(2):
                if np.all(np.isfinite(ts_tmp[:, idj, idi])):
                    ts_tmp[:, 1, 1] = ts_tmp[:, idj, idi]
                    ts_pre[:, 1, 1] = ts_pre[:, idj, idi]
                    found_it = True
        if not found_it:
            msg = '{}: OMG there is no climate data'.format(gdir.rgi_id)
            raise RuntimeError(msg)
    elif np.any(~np.isfinite(ts_tmp)):
        # maybe the side is nan, but we can do nearest
        ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid,
                                               interp='nearest')
        ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid,
                                               interp='nearest')
    else:
        # We can do bilinear
        ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid,
                                               interp='linear')
        ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid,
                                               interp='linear')

    # take the center pixel and add it to the CRU CL clim
    # for temp
    loc_tmp = xr.DataArray(loc_tmp[:, 1, 1], dims=['month'],
                           coords={'month':ts_pre_avg.month})
    ts_tmp = xr.DataArray(ts_tmp[:, 1, 1], dims=['time'],
                           coords={'time':time})
    ts_tmp = ts_tmp.groupby('time.month') + loc_tmp
    # for prcp
    loc_pre = xr.DataArray(loc_pre[:, 1, 1], dims=['month'],
                           coords={'month':ts_pre_avg.month})
    ts_pre = xr.DataArray(ts_pre[:, 1, 1], dims=['time'],
                           coords={'time':time})
    ts_pre = ts_pre.groupby('time.month') + loc_pre

    # done
    loc_hgt = loc_hgt[1, 1]
    loc_lon = loc_lon[1]
    loc_lat = loc_lat[1]
    assert np.isfinite(loc_hgt)
    assert np.all(np.isfinite(ts_pre.values))
    assert np.all(np.isfinite(ts_tmp.values))
    assert np.all(np.isfinite(ts_grad))
    gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values,
                                    ts_grad, loc_hgt, loc_lon, loc_lat)
    ncclim._nc.close()
    nc_ts_tmp._nc.close()
    nc_ts_pre._nc.close()
    # metadata
    out = {'climate_source': 'CRU data', 'hydro_yr_0': y0+1, 'hydro_yr_1': y1}
    gdir.write_pickle(out, 'climate_info')
Beispiel #16
0
def run_prepro_levels(rgi_version=None,
                      rgi_reg=None,
                      border=None,
                      output_folder='',
                      working_dir='',
                      is_test=False,
                      demo=False,
                      test_rgidf=None,
                      test_intersects_file=None,
                      test_topofile=None,
                      test_crudir=None):
    """Does the actual job.

    Parameters
    ----------
    rgi_version : str
        the RGI version to use (defaults to cfg.PARAMS)
    rgi_reg : str
        the RGI region to process
    border : int
        the number of pixels at the maps border
    output_folder : str
        path to the output folder (where to put the preprocessed tar files)
    working_dir : str
        path to the OGGM working directory
    is_test : bool
        to test on a couple of glaciers only!
    demo : bool
        to run the prepro for the list of demo glaciers
    test_rgidf : shapefile
        for testing purposes only
    test_intersects_file : shapefile
        for testing purposes only
    test_topofile : str
        for testing purposes only
    test_crudir : str
        for testing purposes only
    """

    # TODO: temporarily silence Fiona deprecation warnings
    import warnings
    warnings.filterwarnings("ignore", category=DeprecationWarning)

    # Module logger
    log = logging.getLogger(__name__)

    # Time
    start = time.time()

    # Initialize OGGM and set up the run parameters
    cfg.initialize(logging_level='WORKFLOW')

    # Local paths
    utils.mkdir(working_dir)
    cfg.PATHS['working_dir'] = working_dir

    # Use multiprocessing?
    cfg.PARAMS['use_multiprocessing'] = True

    # How many grid points around the glacier?
    # Make it large if you expect your glaciers to grow large
    cfg.PARAMS['border'] = border

    # Set to True for operational runs
    cfg.PARAMS['continue_on_error'] = True

    # For statistics
    climate_periods = [1920, 1960, 2000]

    if rgi_version is None:
        rgi_version = cfg.PARAMS['rgi_version']
    rgi_dir_name = 'RGI{}'.format(rgi_version)
    border_dir_name = 'b_{:03d}'.format(border)
    base_dir = os.path.join(output_folder, rgi_dir_name, border_dir_name)

    # Add a package version file
    utils.mkdir(base_dir)
    opath = os.path.join(base_dir, 'package_versions.txt')
    with open(opath, 'w') as vfile:
        vfile.write(utils.show_versions(logger=log))

    if demo:
        rgidf = utils.get_rgi_glacier_entities(cfg.DATA['demo_glaciers'].index)
    elif test_rgidf is None:
        # Get the RGI file
        rgidf = gpd.read_file(
            utils.get_rgi_region_file(rgi_reg, version=rgi_version))
        # We use intersects
        rgif = utils.get_rgi_intersects_region_file(rgi_reg,
                                                    version=rgi_version)
        cfg.set_intersects_db(rgif)
    else:
        rgidf = test_rgidf
        cfg.set_intersects_db(test_intersects_file)

    if is_test:
        # Just for fun
        rgidf = rgidf.sample(4)

    # Sort for more efficient parallel computing
    rgidf = rgidf.sort_values('Area', ascending=False)

    log.workflow('Starting prepro run for RGI reg: {} '
                 'and border: {}'.format(rgi_reg, border))
    log.workflow('Number of glaciers: {}'.format(len(rgidf)))

    # Input
    if test_topofile:
        cfg.PATHS['dem_file'] = test_topofile

    # L1 - initialize working directories
    gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True)

    # Glacier stats
    sum_dir = os.path.join(base_dir, 'L1', 'summary')
    utils.mkdir(sum_dir)
    opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg))
    utils.compile_glacier_statistics(gdirs, path=opath)

    # L1 OK - compress all in output directory
    l_base_dir = os.path.join(base_dir, 'L1')
    workflow.execute_entity_task(utils.gdir_to_tar,
                                 gdirs,
                                 delete=False,
                                 base_dir=l_base_dir)
    utils.base_dir_to_tar(l_base_dir)

    # L2 - Tasks
    # Pre-download other files just in case
    if test_crudir is None:
        _ = utils.get_cru_file(var='tmp')
        _ = utils.get_cru_file(var='pre')
    else:
        cfg.PATHS['cru_dir'] = test_crudir

    workflow.execute_entity_task(tasks.process_cru_data, gdirs)

    # Glacier stats
    sum_dir = os.path.join(base_dir, 'L2', 'summary')
    utils.mkdir(sum_dir)
    opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg))
    utils.compile_glacier_statistics(gdirs, path=opath)

    # L2 OK - compress all in output directory
    l_base_dir = os.path.join(base_dir, 'L2')
    workflow.execute_entity_task(utils.gdir_to_tar,
                                 gdirs,
                                 delete=False,
                                 base_dir=l_base_dir)
    utils.base_dir_to_tar(l_base_dir)

    # L3 - Tasks
    task_list = [
        tasks.glacier_masks,
        tasks.compute_centerlines,
        tasks.initialize_flowlines,
        tasks.compute_downstream_line,
        tasks.compute_downstream_bedshape,
        tasks.catchment_area,
        tasks.catchment_intersections,
        tasks.catchment_width_geom,
        tasks.catchment_width_correction,
        tasks.local_t_star,
        tasks.mu_star_calibration,
        tasks.prepare_for_inversion,
        tasks.mass_conservation_inversion,
        tasks.filter_inversion_output,
    ]
    for task in task_list:
        workflow.execute_entity_task(task, gdirs)

    # Glacier stats
    sum_dir = os.path.join(base_dir, 'L3', 'summary')
    utils.mkdir(sum_dir)
    opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg))
    utils.compile_glacier_statistics(gdirs, path=opath)
    opath = os.path.join(sum_dir, 'climate_statistics_{}.csv'.format(rgi_reg))
    utils.compile_climate_statistics(gdirs,
                                     add_climate_period=climate_periods,
                                     path=opath)

    # L3 OK - compress all in output directory
    l_base_dir = os.path.join(base_dir, 'L3')
    workflow.execute_entity_task(utils.gdir_to_tar,
                                 gdirs,
                                 delete=False,
                                 base_dir=l_base_dir)
    utils.base_dir_to_tar(l_base_dir)

    # L4 - Tasks
    workflow.execute_entity_task(tasks.init_present_time_glacier, gdirs)

    # Glacier stats
    sum_dir = os.path.join(base_dir, 'L4', 'summary')
    utils.mkdir(sum_dir)
    opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg))
    utils.compile_glacier_statistics(gdirs, path=opath)

    # Copy mini data to new dir
    base_dir = os.path.join(base_dir, 'L4')
    mini_gdirs = workflow.execute_entity_task(tasks.copy_to_basedir,
                                              gdirs,
                                              base_dir=base_dir)

    # L4 OK - compress all in output directory
    workflow.execute_entity_task(utils.gdir_to_tar, mini_gdirs, delete=True)
    utils.base_dir_to_tar(base_dir)

    # Log
    m, s = divmod(time.time() - start, 60)
    h, m = divmod(m, 60)
    log.workflow('OGGM prepro_levels is done! Time needed: '
                 '{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)))