Ejemplo n.º 1
0
def get_ecmwf_file(dataset='ERA5', var=None):
    """Returns a path to the desired ECMWF baseline climate file.

    If the file is not present, download it.

    Parameters
    ----------
    dataset : str
        'ERA5', 'ERA5L', 'CERA'
    var : str
        'inv' for invariant
        'tmp' for temperature
        'pre' for precipitation

    Returns
    -------
    str
        path to the file
    """

    # Be sure input makes sense
    if dataset not in BASENAMES.keys():
        raise InvalidParamsError('ECMWF dataset {} not '
                                 'in {}'.format(dataset, BASENAMES.keys()))
    if var not in BASENAMES[dataset].keys():
        raise InvalidParamsError('ECMWF variable {} not '
                                 'in {}'.format(var,
                                                BASENAMES[dataset].keys()))

    # File to look for
    return utils.file_downloader(ECMWF_SERVER + BASENAMES[dataset][var])
Ejemplo n.º 2
0
def calendardate_to_hydrodate(y, m, start_month=None):
    """Converts a calendar (year, month) pair to a hydrological date.

    Parameters
    ----------
    y : int
        the year
    m : int
        the month
    start_month : int
        the first month of the hydrological year
    """

    if start_month is None:
        raise InvalidParamsError('In order to avoid confusion, we now force '
                                 'callers of this function to specify the '
                                 'hydrological convention they are using.')

    try:
        if m >= start_month:
            out_y = y + 1
            out_m = m - start_month + 1
        else:
            out_y = y
            out_m = m + 13 - start_month
    except (TypeError, ValueError):
        # TODO: inefficient but no time right now
        out_y = np.zeros(len(y), np.int64)
        out_m = np.zeros(len(y), np.int64)
        for i, (_y, _m) in enumerate(zip(y, m)):
            _y, _m = calendardate_to_hydrodate(_y, _m, start_month=start_month)
            out_y[i] = _y
            out_m[i] = _m
    return out_y, out_m
Ejemplo n.º 3
0
 def prcp_fac(self, new_prcp_fac):
     # just to check that no invalid prcp_factors are used
     if new_prcp_fac <= 0:
         raise InvalidParamsError('prcp_fac has to be above zero!')
     self.prcp *= new_prcp_fac / self._prcp_fac
     # update old prcp_fac in order that it can be updated again ...
     self._prcp_fac = new_prcp_fac
Ejemplo n.º 4
0
def get_histalp_file(var=None):
    """Returns a path to the desired HISTALP baseline climate file.

    If the file is not present, download it.

    Parameters
    ----------
    var : str
        'tmp' for temperature
        'pre' for precipitation

    Returns
    -------
    str
        path to the CRU file
    """

    # Be sure input makes sense
    if var not in ['tmp', 'pre']:
        raise InvalidParamsError('HISTALP variable {} '
                                 'does not exist!'.format(var))

    # File to look for
    if var == 'tmp':
        bname = 'HISTALP_temperature_1780-2014.nc'
    else:
        bname = 'HISTALP_precipitation_all_abs_1801-2014.nc'

    h_url = HISTALP_SERVER + bname + '.bz2'
    return utils.file_extractor(utils.file_downloader(h_url))
Ejemplo n.º 5
0
def merge_glacier_tasks(gdirs, main_rgi_id=None, return_all=False, buffer=None,
                        **kwargs):
    """Shortcut function: run all tasks to merge tributaries to a main glacier

    Parameters
    ----------
    gdirs : list of :py:class:`oggm.GlacierDirectory`
        all glaciers, main and tributary. Preprocessed and initialised
    main_rgi_id: str
        RGI ID of the main glacier of interest. If None is provided merging
        will start based uppon the largest glacier
    return_all : bool
        if main_rgi_id is given and return_all = False: only the main glaicer
        is returned
        if main_rgi_is given and return_all = True, the main glacier and every
        remaining glacier from the initial gdirs list is returned, possible
        merged as well.
    buffer : float
        buffer around a flowline to first better find an overlap with another
        flowline. And second assure some distance between the lines at a
        junction. Will default to `cfg.PARAMS['kbuffer']`.
    kwargs: keyword argument for the recursive merging

    Returns
    -------
    merged_gdirs: list of all merged :py:class:`oggm.GlacierDirectory`
    """

    if len(gdirs) > 100:
        raise InvalidParamsError('this could take time! I should include an '
                                 'optional parameter to ignore this.')

    # sort all glaciers descending by area
    gdirs.sort(key=lambda x: x.rgi_area_m2, reverse=True)

    # if main glacier is asked, put it in first position
    if main_rgi_id is not None:
        gdir_main = [gd for gd in gdirs if gd.rgi_id == main_rgi_id][0]
        gdirs.remove(gdir_main)
        gdirs = [gdir_main] + gdirs

    merged_gdirs = []
    while len(gdirs) > 1:
        # main glacier is always the first: either given or the largest one
        gdir_main = gdirs.pop(0)
        gdir_merged, gdirs = _recursive_merging(gdirs, gdir_main, **kwargs)
        merged_gdirs.append(gdir_merged)

    # now we have gdirs which contain all the necessary flowlines,
    # time to clean them up
    for gdir in merged_gdirs:
        flowline.clean_merged_flowlines(gdir, buffer=buffer)

    if main_rgi_id is not None and return_all is False:
        return [gd for gd in merged_gdirs if main_rgi_id in gd.rgi_id][0]

    # add the remaining glacier to the final list
    merged_gdirs = merged_gdirs + gdirs

    return merged_gdirs
Ejemplo n.º 6
0
def get_cru_file(var=None):
    """Returns a path to the desired CRU baseline climate file.

    If the file is not present, download it.

    Parameters
    ----------
    var : str
        'tmp' for temperature
        'pre' for precipitation

    Returns
    -------
    str
        path to the CRU file
    """

    # Be sure input makes sense
    if var not in ['tmp', 'pre']:
        raise InvalidParamsError('CRU variable {} does not exist!'.format(var))

    # Download
    cru_filename = CRU_BASE.format(var)
    cru_url = CRU_SERVER + '{}/'.format(var) + cru_filename + '.gz'
    return utils.file_extractor(utils.file_downloader(cru_url))
Ejemplo n.º 7
0
 def _get_2d_monthly_climate(self, heights, year=None):
     # first get the climate data
     Warning('Attention: this has not been tested enough to be sure that '
             'it works')
     if self.mb_type == 'mb_real_daily':
         return self._get_climate(heights, 'monthly', year=year)
     else:
         raise InvalidParamsError('_get_2d_monthly_climate works only\
                                  with mb_real_daily as mb_type!!!')
Ejemplo n.º 8
0
Archivo: cfg.py Proyecto: ehultee/oggm
    def __setitem__(self, key, value):
        # Overrides the original dic to expand the path
        try:
            value = os.path.expanduser(value)
        except AttributeError:
            raise InvalidParamsError('The value you are trying to set does '
                                     'not seem to be a valid path: '
                                     '{}'.format(value))

        ResettingOrderedDict.__setitem__(self, key, value)
Ejemplo n.º 9
0
def compute_ref_t_stars(gdirs):
    """ Detects the best t* for the reference glaciers and writes them to disk

    This task will be needed for mass balance calibration of custom climate
    data. For CRU and HISTALP baseline climate a precalibrated list is
    available and should be used instead.

    Parameters
    ----------
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
        will be filtered for reference glaciers
    """

    if not cfg.PARAMS['run_mb_calibration']:
        raise InvalidParamsError('Are you sure you want to calibrate the '
                                 'reference t*? There is a pre-calibrated '
                                 'version available. If you know what you are '
                                 'doing and still want to calibrate, set the '
                                 '`run_mb_calibration` parameter to `True`.')

    log.info('Compute the reference t* and mu* for WGMS glaciers')

    # Reference glaciers only if in the list and period is good
    ref_gdirs = utils.get_ref_mb_glaciers(gdirs)

    # Run
    from oggm.workflow import execute_entity_task
    out = execute_entity_task(t_star_from_refmb, ref_gdirs)

    # Loop write
    df = pd.DataFrame()
    for gdir, res in zip(ref_gdirs, out):
        # list of mus compatibles with refmb
        rid = gdir.rgi_id
        df.loc[rid, 'lon'] = gdir.cenlon
        df.loc[rid, 'lat'] = gdir.cenlat
        df.loc[rid, 'n_mb_years'] = len(gdir.get_ref_mb_data())
        df.loc[rid, 'tstar'] = res['t_star']
        df.loc[rid, 'bias'] = res['bias']

    # Write out
    df['tstar'] = df['tstar'].astype(int)
    df['n_mb_years'] = df['n_mb_years'].astype(int)
    file = os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv')
    df.sort_index().to_csv(file)
Ejemplo n.º 10
0
    def _log_param_change(self, key, value):

        prev = self.get(key)
        if prev is None:
            if key in ['baseline_y0', 'baseline_y1']:
                raise InvalidParamsError('The `baseline_y0` and `baseline_y1` '
                                         'parameters have been removed. '
                                         'You now have to set them explicitly '
                                         'in your call to '
                                         '`process_climate_data`.')

            log.workflow('WARNING: adding an unknown parameter '
                         '`{}`:`{}` to PARAMS.'.format(key, value))
            return

        if prev == value:
            return

        if key == 'use_multiprocessing':
            msg = 'ON' if value else 'OFF'
            log.workflow('Multiprocessing switched {} '.format(msg) +
                         'after user settings.')
            return

        if key == 'mp_processes':
            if value == -1:
                import multiprocessing
                value = multiprocessing.cpu_count()
                if PARAMS.get('use_multiprocessing', False):
                    log.workflow('Multiprocessing: using all available '
                                 'processors (N={})'.format(value))
            else:
                if PARAMS.get('use_multiprocessing', False):
                    log.workflow('Multiprocessing: using the requested number '
                                 'of processors (N={})'.format(value))
            return

        log.workflow("PARAMS['{}'] changed from `{}` to `{}`.".format(key,
                                                                      prev,
                                                                      value))
Ejemplo n.º 11
0
def run_prepro_levels(rgi_version=None,
                      rgi_reg=None,
                      border=None,
                      output_folder='',
                      working_dir='',
                      dem_source='',
                      is_test=False,
                      demo=False,
                      test_rgidf=None,
                      test_intersects_file=None,
                      test_topofile=None,
                      test_crudir=None,
                      disable_mp=False,
                      timeout=0,
                      max_level=4,
                      logging_level='WORKFLOW'):
    """Does the actual job.

    Parameters
    ----------
    rgi_version : str
        the RGI version to use (defaults to cfg.PARAMS)
    rgi_reg : str
        the RGI region to process
    border : int
        the number of pixels at the maps border
    output_folder : str
        path to the output folder (where to put the preprocessed tar files)
    dem_source : str
        which DEM source to use: default, SOURCE_NAME or ALL
    working_dir : str
        path to the OGGM working directory
    is_test : bool
        to test on a couple of glaciers only!
    demo : bool
        to run the prepro for the list of demo glaciers
    test_rgidf : shapefile
        for testing purposes only
    test_intersects_file : shapefile
        for testing purposes only
    test_topofile : str
        for testing purposes only
    test_crudir : str
        for testing purposes only
    disable_mp : bool
        disable multiprocessing
    max_level : int
        the maximum pre-processing level before stopping
    logging_level : str
        the logging level to use (DEBUG, INFO, WARNING, WORKFLOW)
    """

    # TODO: temporarily silence Fiona deprecation warnings
    import warnings
    warnings.filterwarnings("ignore", category=DeprecationWarning)

    # Input check
    if max_level not in [1, 2, 3, 4]:
        raise InvalidParamsError('max_level should be one of [1, 2, 3, 4]')

    # Time
    start = time.time()

    def _time_log():
        # Log util
        m, s = divmod(time.time() - start, 60)
        h, m = divmod(m, 60)
        log.workflow('OGGM prepro_levels is done! Time needed: '
                     '{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)))

    # Initialize OGGM and set up the run parameters
    cfg.initialize(logging_level=logging_level)

    # Local paths
    utils.mkdir(working_dir)
    cfg.PATHS['working_dir'] = working_dir

    # Use multiprocessing?
    cfg.PARAMS['use_multiprocessing'] = not disable_mp

    # How many grid points around the glacier?
    # Make it large if you expect your glaciers to grow large
    cfg.PARAMS['border'] = border

    # Set to True for operational runs
    cfg.PARAMS['continue_on_error'] = True

    # Timeout
    cfg.PARAMS['task_timeout'] = timeout

    # For statistics
    climate_periods = [1920, 1960, 2000]

    if rgi_version is None:
        rgi_version = cfg.PARAMS['rgi_version']
    rgi_dir_name = 'RGI{}'.format(rgi_version)
    border_dir_name = 'b_{:03d}'.format(border)
    base_dir = os.path.join(output_folder, rgi_dir_name, border_dir_name)

    # Add a package version file
    utils.mkdir(base_dir)
    opath = os.path.join(base_dir, 'package_versions.txt')
    with open(opath, 'w') as vfile:
        vfile.write(utils.show_versions(logger=log))

    if demo:
        rgidf = utils.get_rgi_glacier_entities(cfg.DATA['demo_glaciers'].index)
    elif test_rgidf is None:
        # Get the RGI file
        rgidf = gpd.read_file(
            utils.get_rgi_region_file(rgi_reg, version=rgi_version))
        # We use intersects
        rgif = utils.get_rgi_intersects_region_file(rgi_reg,
                                                    version=rgi_version)
        cfg.set_intersects_db(rgif)
    else:
        rgidf = test_rgidf
        cfg.set_intersects_db(test_intersects_file)

    if is_test:
        # Just for fun
        rgidf = rgidf.sample(4)

    # Sort for more efficient parallel computing
    rgidf = rgidf.sort_values('Area', ascending=False)

    log.workflow('Starting prepro run for RGI reg: {} '
                 'and border: {}'.format(rgi_reg, border))
    log.workflow('Number of glaciers: {}'.format(len(rgidf)))

    # Input
    if test_topofile:
        cfg.PATHS['dem_file'] = test_topofile

    # L1 - initialize working directories
    # Which DEM source?
    if dem_source.upper() == 'ALL':
        # This is the complex one, just do the job an leave
        log.workflow('Running prepro on ALL sources')
        for i, s in enumerate(utils.DEM_SOURCES):
            rs = i == 0
            rgidf['DEM_SOURCE'] = s
            log.workflow('Running prepro on sources: {}'.format(s))
            gdirs = workflow.init_glacier_regions(rgidf, reset=rs, force=rs)
            workflow.execute_entity_task(_rename_dem_folder, gdirs, source=s)

        # Compress all in output directory
        l_base_dir = os.path.join(base_dir, 'L1')
        workflow.execute_entity_task(utils.gdir_to_tar,
                                     gdirs,
                                     delete=False,
                                     base_dir=l_base_dir)
        utils.base_dir_to_tar(l_base_dir)

        _time_log()
        return

    if dem_source:
        # Force a given source
        rgidf['DEM_SOURCE'] = dem_source.upper()

    # L1 - go
    gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True)

    # Glacier stats
    sum_dir = os.path.join(base_dir, 'L1', 'summary')
    utils.mkdir(sum_dir)
    opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg))
    utils.compile_glacier_statistics(gdirs, path=opath)

    # L1 OK - compress all in output directory
    l_base_dir = os.path.join(base_dir, 'L1')
    workflow.execute_entity_task(utils.gdir_to_tar,
                                 gdirs,
                                 delete=False,
                                 base_dir=l_base_dir)
    utils.base_dir_to_tar(l_base_dir)
    if max_level == 1:
        _time_log()
        return

    # L2 - Tasks
    # Pre-download other files just in case
    if test_crudir is None:
        _ = utils.get_cru_file(var='tmp')
        _ = utils.get_cru_file(var='pre')
    else:
        cfg.PATHS['cru_dir'] = test_crudir

    workflow.execute_entity_task(tasks.process_cru_data, gdirs)

    # Glacier stats
    sum_dir = os.path.join(base_dir, 'L2', 'summary')
    utils.mkdir(sum_dir)
    opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg))
    utils.compile_glacier_statistics(gdirs, path=opath)

    # L2 OK - compress all in output directory
    l_base_dir = os.path.join(base_dir, 'L2')
    workflow.execute_entity_task(utils.gdir_to_tar,
                                 gdirs,
                                 delete=False,
                                 base_dir=l_base_dir)
    utils.base_dir_to_tar(l_base_dir)
    if max_level == 2:
        _time_log()
        return

    # L3 - Tasks
    task_list = [
        tasks.glacier_masks, tasks.compute_centerlines,
        tasks.initialize_flowlines, tasks.compute_downstream_line,
        tasks.compute_downstream_bedshape, tasks.catchment_area,
        tasks.catchment_intersections, tasks.catchment_width_geom,
        tasks.catchment_width_correction, tasks.local_t_star,
        tasks.mu_star_calibration, tasks.prepare_for_inversion,
        tasks.mass_conservation_inversion, tasks.filter_inversion_output,
        tasks.init_present_time_glacier
    ]
    for task in task_list:
        workflow.execute_entity_task(task, gdirs)

    # Glacier stats
    sum_dir = os.path.join(base_dir, 'L3', 'summary')
    utils.mkdir(sum_dir)
    opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg))
    utils.compile_glacier_statistics(gdirs, path=opath)
    opath = os.path.join(sum_dir, 'climate_statistics_{}.csv'.format(rgi_reg))
    utils.compile_climate_statistics(gdirs,
                                     add_climate_period=climate_periods,
                                     path=opath)

    # L3 OK - compress all in output directory
    l_base_dir = os.path.join(base_dir, 'L3')
    workflow.execute_entity_task(utils.gdir_to_tar,
                                 gdirs,
                                 delete=False,
                                 base_dir=l_base_dir)
    utils.base_dir_to_tar(l_base_dir)
    if max_level == 3:
        _time_log()
        return

    # L4 - No tasks: add some stats for consistency and make the dirs small
    sum_dir = os.path.join(base_dir, 'L4', 'summary')
    utils.mkdir(sum_dir)
    opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg))
    utils.compile_glacier_statistics(gdirs, path=opath)

    # Copy mini data to new dir
    base_dir = os.path.join(base_dir, 'L4')
    mini_gdirs = workflow.execute_entity_task(tasks.copy_to_basedir,
                                              gdirs,
                                              base_dir=base_dir)

    # L4 OK - compress all in output directory
    workflow.execute_entity_task(utils.gdir_to_tar, mini_gdirs, delete=True)
    utils.base_dir_to_tar(base_dir)

    _time_log()
Ejemplo n.º 12
0
def write_climate_file(gdir,
                       time,
                       prcp,
                       temp,
                       ref_pix_hgt,
                       ref_pix_lon,
                       ref_pix_lat,
                       gradient=None,
                       temp_std=None,
                       time_unit=None,
                       calendar=None,
                       source=None,
                       file_name='climate_historical',
                       filesuffix=''):
    """Creates a netCDF4 file with climate data timeseries.

    Parameters
    ----------
    gdir:
        glacier directory
    time : ndarray
        the time array, in a format understood by netCDF4
    prcp : ndarray
        the precipitation array (unit: 'kg m-2 month-1')
    temp : ndarray
        the temperature array (unit: 'degC')
    ref_pix_hgt : float
        the elevation of the dataset's reference altitude
        (for correction). In practice it is the same altitude as the
        baseline climate.
    ref_pix_lon : float
        the location of the gridded data's grid point
    ref_pix_lat : float
        the location of the gridded data's grid point
    gradient : ndarray, optional
        whether to use a time varying gradient
    temp_std : ndarray, optional
        the daily standard deviation of temperature (useful for PyGEM)
    time_unit : str
        the reference time unit for your time array. This should be chosen
        depending on the length of your data. The default is to choose
        it ourselves based on the starting year.
    calendar : str
        If you use an exotic calendar (e.g. 'noleap')
    source : str
        the climate data source (required)
    file_name : str
        How to name the file
    filesuffix : str
        Apply a suffix to the file
    """

    if source == 'ERA5_daily' and filesuffix == '':
        raise InvalidParamsError('filesuffix should be "_daily" as only \
                                 file_name climate_historical is normally \
                                     monthly data')
    # overwrite is default
    fpath = gdir.get_filepath(file_name, filesuffix=filesuffix)
    if os.path.exists(fpath):
        os.remove(fpath)

    if source is None:
        raise InvalidParamsError('`source` kwarg is required')

    zlib = cfg.PARAMS['compress_climate_netcdf']

    try:
        y0 = time[0].year
        y1 = time[-1].year
    except AttributeError:
        time = pd.DatetimeIndex(time)
        y0 = time[0].year
        y1 = time[-1].year

    if time_unit is None:
        # http://pandas.pydata.org/pandas-docs/stable/timeseries.html
        # #timestamp-limitations
        if y0 > 1800:
            time_unit = 'days since 1801-01-01 00:00:00'
        elif y0 >= 0:
            time_unit = ('days since {:04d}-01-01 '
                         '00:00:00'.format(time[0].year))
        else:
            raise InvalidParamsError('Time format not supported')

    with ncDataset(fpath, 'w', format='NETCDF4') as nc:
        nc.ref_hgt = ref_pix_hgt
        nc.ref_pix_lon = ref_pix_lon
        nc.ref_pix_lat = ref_pix_lat
        nc.ref_pix_dis = haversine(gdir.cenlon, gdir.cenlat, ref_pix_lon,
                                   ref_pix_lat)
        nc.climate_source = source
        if time[0].month == 1:
            nc.hydro_yr_0 = y0
        else:
            nc.hydro_yr_0 = y0 + 1
        nc.hydro_yr_1 = y1

        nc.createDimension('time', None)

        nc.author = 'OGGM'
        nc.author_info = 'Open Global Glacier Model'

        timev = nc.createVariable('time', 'i4', ('time', ))

        tatts = {'units': time_unit}
        if calendar is None:
            calendar = 'standard'

        tatts['calendar'] = calendar
        try:
            numdate = netCDF4.date2num([t for t in time],
                                       time_unit,
                                       calendar=calendar)
        except TypeError:
            # numpy's broken datetime only works for us precision
            time = time.astype('M8[us]').astype(datetime.datetime)
            numdate = netCDF4.date2num(time, time_unit, calendar=calendar)

        timev.setncatts(tatts)
        timev[:] = numdate

        v = nc.createVariable('prcp', 'f4', ('time', ), zlib=zlib)
        v.units = 'kg m-2'
        # this could be made more beautriful
        # just rough estimate
        if len(prcp) > (nc.hydro_yr_1 - nc.hydro_yr_0 + 1) * 30 * 12:
            v.long_name = ("total daily precipitation amount, "
                           "assumed same for each day of month")
        elif len(prcp) == (nc.hydro_yr_1 - nc.hydro_yr_0 + 1) * 12:
            v.long_name = 'total monthly precipitation amount'
        else:
            v.long_name = 'total monthly precipitation amount'
            warnings.warn("there might be a conflict in the prcp timeseries,"
                          "please check!")

        v[:] = prcp

        v = nc.createVariable('temp', 'f4', ('time', ), zlib=zlib)
        v.units = 'degC'
        if source == 'ERA5_daily' and len(temp) > (y1 - y0) * 30 * 12:
            v.long_name = '2m daily temperature at height ref_hgt'
        elif source == 'ERA5_daily' and len(temp) <= (y1 - y0) * 30 * 12:
            raise InvalidParamsError('if the climate dataset (here source)'
                                     'is ERA5_daily, temperatures should be in'
                                     'daily resolution, please check or set'
                                     'set source to another climate dataset')
        else:
            v.long_name = '2m monthly temperature at height ref_hgt'

        v[:] = temp

        if gradient is not None:
            v = nc.createVariable('gradient', 'f4', ('time', ), zlib=zlib)
            v.units = 'degC m-1'
            v.long_name = ('temperature gradient from local regression or'
                           'lapserates')
            v[:] = gradient

        if temp_std is not None:
            v = nc.createVariable('temp_std', 'f4', ('time', ), zlib=zlib)
            v.units = 'degC'
            v.long_name = 'standard deviation of daily temperatures'
            v[:] = temp_std
Ejemplo n.º 13
0
def parse_args(args):
    """Check input arguments and env variables"""

    # CLI args
    description = ('Generate the preprocessed OGGM glacier directories for '
                   'this OGGM version.')
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('--map-border',
                        type=int,
                        help='the size of the map border. Is required if '
                        '$OGGM_MAP_BORDER is not set.')
    parser.add_argument('--rgi-reg',
                        type=str,
                        help='the rgi region to process. Is required if '
                        '$OGGM_RGI_REG is not set.')
    parser.add_argument('--rgi-version',
                        type=str,
                        help='the RGI version to use. Defaults to the OGGM '
                        'default.')
    parser.add_argument('--max-level',
                        type=int,
                        default=4,
                        help='the maximum level you want to run the '
                        'pre-processing for (1, 2, 3 or 3).')
    parser.add_argument('--working-dir',
                        type=str,
                        help='path to the directory where to write the '
                        'output. Defaults to current directory or '
                        '$OGGM_WORKDIR.')
    parser.add_argument('--output',
                        type=str,
                        help='path to the directory where to write the '
                        'output. Defaults to current directory or '
                        '$OGGM_OUTDIR.')
    parser.add_argument('--dem-source',
                        type=str,
                        default='',
                        help='which DEM source to use. Possible options are '
                        'the name of a specific DEM (e.g. RAMP, SRTM...) '
                        'or ALL, in which case all available DEMs will '
                        'be processed and adjoined with a suffix at the '
                        'end of the file name. The ALL option is only '
                        'compatible with level 1 folders, after which '
                        'the processing will stop. The default is to use '
                        'the default OGGM DEM.')
    parser.add_argument('--disable-mp',
                        nargs='?',
                        const=True,
                        default=False,
                        help='if you want to disable multiprocessing.')
    parser.add_argument('--timeout',
                        type=int,
                        default=0,
                        help='apply a timeout to the entity tasks '
                        '(in seconds).')
    parser.add_argument('--demo',
                        nargs='?',
                        const=True,
                        default=False,
                        help='if you want to run the prepro for the '
                        'list of demo glaciers.')
    parser.add_argument('--test',
                        nargs='?',
                        const=True,
                        default=False,
                        help='if you want to do a test on a couple of '
                        'glaciers first.')
    parser.add_argument('--logging-level',
                        type=str,
                        default='WORKFLOW',
                        help='the logging level to use (DEBUG, INFO, WARNING, '
                        'WORKFLOW).')
    args = parser.parse_args(args)

    # Check input
    rgi_reg = args.rgi_reg
    if args.demo:
        rgi_reg = 0
    if not rgi_reg and not args.demo:
        rgi_reg = os.environ.get('OGGM_RGI_REG', None)
        if rgi_reg is None:
            raise InvalidParamsError('--rgi-reg is required!')
    rgi_reg = '{:02}'.format(int(rgi_reg))

    rgi_version = args.rgi_version

    border = args.map_border
    if not border:
        border = os.environ.get('OGGM_MAP_BORDER', None)
        if border is None:
            raise InvalidParamsError('--map-border is required!')

    working_dir = args.working_dir
    if not working_dir:
        working_dir = os.environ.get('OGGM_WORKDIR', '')

    output_folder = args.output
    if not output_folder:
        output_folder = os.environ.get('OGGM_OUTDIR', '')

    border = int(border)
    output_folder = os.path.abspath(output_folder)
    working_dir = os.path.abspath(working_dir)

    # All good
    return dict(rgi_version=rgi_version,
                rgi_reg=rgi_reg,
                border=border,
                output_folder=output_folder,
                working_dir=working_dir,
                is_test=args.test,
                demo=args.demo,
                dem_source=args.dem_source,
                max_level=args.max_level,
                timeout=args.timeout,
                disable_mp=args.disable_mp,
                logging_level=args.logging_level)
Ejemplo n.º 14
0
    def step(self, dt):
        """Advance one step."""

        # Just a check to avoid useless computations
        if dt <= 0:
            raise InvalidParamsError('dt needs to be strictly positive')

        # Guarantee a precise arrival on a specific date if asked
        min_dt = dt if dt < self.min_dt else self.min_dt
        dt = utils.clip_scalar(dt, min_dt, self.max_dt)

        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=RuntimeWarning)

            fl = self.fls[0]
            dx = fl.dx_meter

            # Switch to the notation from the MUSCL_1D example
            # This is useful to ensure that the MUSCL-SuperBee code
            # is working as it has been benchmarked many times

            # mass balance
            m_dot = self.get_mb(fl.surface_h, self.yr, fl_id=id(fl))
            # get in the surface elevation
            S = fl.surface_h
            # get the bed
            B = fl.bed_h
            # define Glen's law here
            N = self.glen_n
            # this is the correct Gamma !!
            Gamma = 2. * self.glen_a * (self.rho * G)**N / (N + 2.)
            # time stepping
            c_stab = 0.165

            # define the finite difference indices
            k = np.arange(0, fl.nx)
            kp = np.hstack([np.arange(1, fl.nx), fl.nx - 1])
            kpp = np.hstack([np.arange(2, fl.nx), fl.nx - 1, fl.nx - 1])
            km = np.hstack([0, np.arange(0, fl.nx - 1)])
            kmm = np.hstack([0, 0, np.arange(0, fl.nx - 2)])

            # I'm gonna introduce another level of adaptive time stepping here,
            # which is probably not necessary. However I keep it to be
            # consistent with my benchmarked and tested code.
            # If the OGGM time stepping is correctly working, this loop
            # should never run more than once
            # Fabi: actually no, it is your job to choose the right time step
            # but you can let OGGM decide whether a new time step is needed
            # or not -> to be meliorated one day
            stab_t = 0.
            while stab_t < dt:
                H = S - B

                # MUSCL scheme up. "up" denotes here the k+1/2 flux boundary
                r_up_m = (H[k] - H[km]) / (H[kp] - H[k])  # Eq. 27
                H_up_m = H[k] + 0.5 * self.phi(r_up_m) * (H[kp] - H[k]
                                                          )  # Eq. 23
                # Eq. 27, k+1 is used instead of k
                r_up_p = (H[kp] - H[k]) / (H[kpp] - H[kp])
                # Eq. 24
                H_up_p = H[kp] - 0.5 * self.phi(r_up_p) * (H[kpp] - H[kp])

                # surface slope gradient
                s_grad_up = ((S[kp] - S[k])**2. / dx**2.)**((N - 1.) / 2.)
                # like Eq. 30, now using Eq. 23 instead of Eq. 24
                D_up_m = Gamma * H_up_m**(N + 2.) * s_grad_up
                D_up_p = Gamma * H_up_p**(N + 2.) * s_grad_up  # Eq. 30

                # Eq. 31
                D_up_min = np.minimum(D_up_m, D_up_p)
                # Eq. 32
                D_up_max = np.maximum(D_up_m, D_up_p)
                D_up = np.zeros(fl.nx)

                # Eq. 33
                cond = (S[kp] <= S[k]) & (H_up_m <= H_up_p)
                D_up[cond] = D_up_min[cond]
                cond = (S[kp] <= S[k]) & (H_up_m > H_up_p)
                D_up[cond] = D_up_max[cond]
                cond = (S[kp] > S[k]) & (H_up_m <= H_up_p)
                D_up[cond] = D_up_max[cond]
                cond = (S[kp] > S[k]) & (H_up_m > H_up_p)
                D_up[cond] = D_up_min[cond]

                # MUSCL scheme down. "down" denotes the k-1/2 flux boundary
                r_dn_m = (H[km] - H[kmm]) / (H[k] - H[km])
                H_dn_m = H[km] + 0.5 * self.phi(r_dn_m) * (H[k] - H[km])
                r_dn_p = (H[k] - H[km]) / (H[kp] - H[k])
                H_dn_p = H[k] - 0.5 * self.phi(r_dn_p) * (H[kp] - H[k])

                # calculate the slope gradient
                s_grad_dn = ((S[k] - S[km])**2. / dx**2.)**((N - 1.) / 2.)
                D_dn_m = Gamma * H_dn_m**(N + 2.) * s_grad_dn
                D_dn_p = Gamma * H_dn_p**(N + 2.) * s_grad_dn

                D_dn_min = np.minimum(D_dn_m, D_dn_p)
                D_dn_max = np.maximum(D_dn_m, D_dn_p)
                D_dn = np.zeros(fl.nx)

                cond = (S[k] <= S[km]) & (H_dn_m <= H_dn_p)
                D_dn[cond] = D_dn_min[cond]
                cond = (S[k] <= S[km]) & (H_dn_m > H_dn_p)
                D_dn[cond] = D_dn_max[cond]
                cond = (S[k] > S[km]) & (H_dn_m <= H_dn_p)
                D_dn[cond] = D_dn_max[cond]
                cond = (S[k] > S[km]) & (H_dn_m > H_dn_p)
                D_dn[cond] = D_dn_min[cond]

                # Eq. 37
                dt_stab = c_stab * dx**2. / max(max(abs(D_up)), max(abs(D_dn)))
                dt_use = min(dt_stab, dt - stab_t)
                stab_t = stab_t + dt_use

                # explicit time stepping scheme, Eq. 36
                div_q = (D_up * (S[kp] - S[k]) / dx - D_dn *
                         (S[k] - S[km]) / dx) / dx
                # Eq. 35
                S = S[k] + (m_dot + div_q) * dt_use

                # Eq. 7
                S = np.maximum(S, B)

        # Done with the loop, prepare output
        fl.thick = S - B

        # Next step
        self.t += dt
Ejemplo n.º 15
0
Archivo: cfg.py Proyecto: ehultee/oggm
def oggm_static_paths():
    """Initialise the OGGM paths from the config file."""

    global PATHS, PARAMS

    # See if the file is there, if not create it
    if not os.path.exists(CONFIG_FILE):
        dldir = os.path.join(os.path.expanduser('~'), 'OGGM')
        config = ConfigObj()
        config['dl_cache_dir'] = os.path.join(dldir, 'download_cache')
        config['dl_cache_readonly'] = False
        config['tmp_dir'] = os.path.join(dldir, 'tmp')
        config['rgi_dir'] = os.path.join(dldir, 'rgi')
        config['test_dir'] = os.path.join(dldir, 'tests')
        config['has_internet'] = True
        config.filename = CONFIG_FILE
        config.write()

    # OK, read in the file
    try:
        config = ConfigObj(CONFIG_FILE, file_error=True)
    except (ConfigObjError, IOError) as e:
        log.critical('Config file could not be parsed (%s): %s', CONFIG_FILE,
                     e)
        sys.exit()

    # Check that all keys are here
    for k in [
            'dl_cache_dir', 'dl_cache_readonly', 'tmp_dir', 'rgi_dir',
            'test_dir', 'has_internet'
    ]:
        if k not in config:
            raise InvalidParamsError('The oggm config file ({}) should have '
                                     'an entry for {}.'.format(CONFIG_FILE, k))

    # Override defaults with env variables if available
    if os.environ.get('OGGM_DOWNLOAD_CACHE_RO') is not None:
        ro = bool(strtobool(os.environ.get('OGGM_DOWNLOAD_CACHE_RO')))
        config['dl_cache_readonly'] = ro
    if os.environ.get('OGGM_DOWNLOAD_CACHE') is not None:
        config['dl_cache_dir'] = os.environ.get('OGGM_DOWNLOAD_CACHE')
    if os.environ.get('OGGM_EXTRACT_DIR') is not None:
        # This is for the directories where OGGM needs to extract things
        # On the cluster it might be useful to do it on a fast disc
        edir = os.path.abspath(os.environ.get('OGGM_EXTRACT_DIR'))
        config['tmp_dir'] = os.path.join(edir, 'tmp')
        config['rgi_dir'] = os.path.join(edir, 'rgi')

    # Fill the PATH dict
    for k, v in config.iteritems():
        if not k.endswith('_dir'):
            continue
        PATHS[k] = os.path.abspath(os.path.expanduser(v))

    # Other
    PARAMS.do_log = False
    PARAMS['has_internet'] = config.as_bool('has_internet')
    PARAMS['dl_cache_readonly'] = config.as_bool('dl_cache_readonly')
    PARAMS.do_log = True

    # Create cache dir if possible
    if not os.path.exists(PATHS['dl_cache_dir']):
        if not PARAMS['dl_cache_readonly']:
            os.makedirs(PATHS['dl_cache_dir'])
Ejemplo n.º 16
0
def sia_thickness(slope,
                  width,
                  flux,
                  shape='rectangular',
                  glen_a=None,
                  fs=None,
                  shape_factor=None):
    """Computes the ice thickness from mass-conservation.

    This is a utility function tested against the true OGGM inversion
    function. Useful for teaching and inversion with calving.

    Parameters
    ----------
    slope : -np.gradient(hgt, dx)
    width : section width in m
    flux : mass flux in m3 s-1
    shape : 'rectangular' or 'parabolic'
    glen_a : Glen A, defaults to PARAMS
    fs : sliding, defaults to PARAMS
    shape_factor: for lateral drag

    Returns
    -------
    the ice thickness (in m)
    """

    if glen_a is None:
        glen_a = cfg.PARAMS['inversion_glen_a']
    if fs is None:
        fs = cfg.PARAMS['inversion_fs']
    if shape not in ['parabolic', 'rectangular']:
        raise InvalidParamsError('shape must be `parabolic` or `rectangular`,'
                                 'not: {}'.format(shape))

    _inv_function = _inversion_simple if fs == 0 else _inversion_poly

    # Ice flow params
    fd = 2. / (cfg.PARAMS['glen_n'] + 2) * glen_a
    rho = cfg.PARAMS['ice_density']

    # Clip the slope, in degrees
    clip_angle = cfg.PARAMS['min_slope']

    # Clip slope to avoid negative and small slopes
    slope = utils.clip_array(slope, np.deg2rad(clip_angle), np.pi / 2.)

    # Convert the flux to m2 s-1 (averaged to represent the sections center)
    flux_a0 = 1 if shape == 'rectangular' else 1.5
    flux_a0 *= flux / width

    # Polynomial factors (a5 = 1)
    a0 = -flux_a0 / ((rho * cfg.G * slope)**3 * fd)
    a3 = fs / fd

    # Inversion with shape factors?
    sf_func = None
    if shape_factor == 'Adhikari' or shape_factor == 'Nye':
        sf_func = utils.shape_factor_adhikari
    elif shape_factor == 'Huss':
        sf_func = utils.shape_factor_huss

    sf = np.ones(slope.shape)  # Default shape factor is 1
    if sf_func is not None:

        # Start iteration for shape factor with first guess of 1
        i = 0
        sf_diff = np.ones(slope.shape)

        # Some hard-coded factors here
        sf_tol = 1e-2
        max_sf_iter = 20

        while i < max_sf_iter and np.any(sf_diff > sf_tol):
            out_thick = _compute_thick(a0, a3, flux_a0, sf, _inv_function)
            is_rectangular = np.repeat(shape == 'rectangular', len(width))
            sf_diff[:] = sf[:]
            sf = sf_func(width, out_thick, is_rectangular)
            sf_diff = sf_diff - sf
            i += 1

        log.info('Shape factor {:s} used, took {:d} iterations for '
                 'convergence.'.format(shape_factor, i))

    return _compute_thick(a0, a3, flux_a0, sf, _inv_function)
Ejemplo n.º 17
0
def process_gcm_data(gdir,
                     filesuffix='',
                     prcp=None,
                     temp=None,
                     year_range=('1961', '1990'),
                     scale_stddev=True,
                     time_unit=None,
                     calendar=None,
                     source=''):
    """ Applies the anomaly method to GCM climate data

    This function can be applied to any GCM data, if it is provided in a
    suitable :py:class:`xarray.DataArray`. See Parameter description for
    format details.

    For CESM-LME a specific function :py:func:`tasks.process_cesm_data` is
    available which does the preprocessing of the data and subsequently calls
    this function.

    Parameters
    ----------
    gdir : :py:class:`oggm.GlacierDirectory`
        where to write the data
    filesuffix : str
        append a suffix to the filename (useful for ensemble experiments).
    prcp : :py:class:`xarray.DataArray`
        | monthly total precipitation [mm month-1]
        | Coordinates:
        | lat float64
        | lon float64
        | time: cftime object
    temp : :py:class:`xarray.DataArray`
        | monthly temperature [K]
        | Coordinates:
        | lat float64
        | lon float64
        | time cftime object
    year_range : tuple of str
        the year range for which you want to compute the anomalies. Default
        is `('1961', '1990')`
    scale_stddev : bool
        whether or not to scale the temperature standard deviation as well
    time_unit : str
        The unit conversion for NetCDF files. It must be adapted to the
        length of the time series. The default is to choose
        it ourselves based on the starting year.
        For example: 'days since 0850-01-01 00:00:00'
    calendar : str
        If you use an exotic calendar (e.g. 'noleap')
    source : str
        For metadata: the source of the climate data
    """

    # Standard sanity checks
    months = temp['time.month']
    if months[0] != 1:
        raise ValueError('We expect the files to start in January!')
    if months[-1] < 10:
        raise ValueError('We expect the files to end in December!')

    if (np.abs(temp['lon']) > 180) or (np.abs(prcp['lon']) > 180):
        raise ValueError('We expect the longitude coordinates to be within '
                         '[-180, 180].')

    # from normal years to hydrological years
    sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere]
    if sm != 1:
        prcp = prcp[sm - 1:sm - 13].load()
        temp = temp[sm - 1:sm - 13].load()

    assert len(prcp) // 12 == len(
        prcp) / 12, 'Somehow we didn\'t get full years'
    assert len(temp) // 12 == len(
        temp) / 12, 'Somehow we didn\'t get full years'

    # Get the reference data to apply the anomaly to
    fpath = gdir.get_filepath('climate_historical')
    with xr.open_dataset(fpath) as ds_ref:

        ds_ref = ds_ref.sel(time=slice(*year_range))

        # compute monthly anomalies
        # of temp
        if scale_stddev:
            # This is a bit more arithmetic
            ts_tmp_sel = temp.sel(time=slice(*year_range))
            if len(ts_tmp_sel) // 12 != len(ts_tmp_sel) / 12:
                raise InvalidParamsError('year_range cannot contain the first'
                                         'or last calendar year in the series')
            if ((len(ts_tmp_sel) // 12) % 2) == 1:
                raise InvalidParamsError('We need an even number of years '
                                         'for this to work')
            ts_tmp_std = ts_tmp_sel.groupby('time.month').std(dim='time')
            std_fac = ds_ref.temp.groupby('time.month').std(
                dim='time') / ts_tmp_std
            if sm != 1:
                # Just to avoid useless roll
                std_fac = std_fac.roll(month=13 - sm, roll_coords=True)
            std_fac = np.tile(std_fac.data, len(temp) // 12)
            # We need an even number of years for this to work
            win_size = len(ts_tmp_sel) + 1

            def roll_func(x, axis=None):
                x = x[:, ::12]
                n = len(x[0, :]) // 2
                xm = np.nanmean(x, axis=axis)
                return xm + (x[:, n] - xm) * std_fac

            temp = temp.rolling(time=win_size, center=True,
                                min_periods=1).reduce(roll_func)

        ts_tmp_sel = temp.sel(time=slice(*year_range))
        if len(ts_tmp_sel.time) != len(ds_ref.time):
            raise InvalidParamsError('The reference climate period and the '
                                     'GCM period after window selection do '
                                     'not match.')
        ts_tmp_avg = ts_tmp_sel.groupby('time.month').mean(dim='time')
        ts_tmp = temp.groupby('time.month') - ts_tmp_avg
        # of precip -- scaled anomalies
        ts_pre_avg = prcp.sel(time=slice(*year_range))
        ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time')
        ts_pre_ano = prcp.groupby('time.month') - ts_pre_avg
        # scaled anomalies is the default. Standard anomalies above
        # are used later for where ts_pre_avg == 0
        ts_pre = prcp.groupby('time.month') / ts_pre_avg

        # for temp
        loc_tmp = ds_ref.temp.groupby('time.month').mean()
        ts_tmp = ts_tmp.groupby('time.month') + loc_tmp

        # for prcp
        loc_pre = ds_ref.prcp.groupby('time.month').mean()
        # scaled anomalies
        ts_pre = ts_pre.groupby('time.month') * loc_pre
        # standard anomalies
        ts_pre_ano = ts_pre_ano.groupby('time.month') + loc_pre
        # Correct infinite values with standard anomalies
        ts_pre.values = np.where(np.isfinite(ts_pre.values), ts_pre.values,
                                 ts_pre_ano.values)
        # The previous step might create negative values (unlikely). Clip them
        ts_pre.values = utils.clip_min(ts_pre.values, 0)

        assert np.all(np.isfinite(ts_pre.values))
        assert np.all(np.isfinite(ts_tmp.values))

        gdir.write_monthly_climate_file(temp.time.values,
                                        ts_pre.values,
                                        ts_tmp.values,
                                        float(ds_ref.ref_hgt),
                                        prcp.lon.values,
                                        prcp.lat.values,
                                        time_unit=time_unit,
                                        calendar=calendar,
                                        file_name='gcm_data',
                                        source=source,
                                        filesuffix=filesuffix)
Ejemplo n.º 18
0
def parse_args(args):
    """Check input arguments and env variables"""

    # CLI args
    description = ('Generate the preprocessed OGGM glacier directories for '
                   'this OGGM version.')
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('--map-border',
                        type=int,
                        help='the size of the map border. Is required if '
                        '$OGGM_MAP_BORDER is not set.')
    parser.add_argument('--rgi-reg',
                        type=str,
                        help='the rgi region to process. Is required if '
                        '$OGGM_RGI_REG is not set.')
    parser.add_argument('--rgi-version',
                        type=str,
                        help='the RGI version to use. Defaults to the OGGM '
                        'default.')
    parser.add_argument('--start-level',
                        type=int,
                        default=0,
                        help='the pre-processed level to start from (default '
                        'is to start from 0). If set, you will need to '
                        'indicate --start-base-url as well.')
    parser.add_argument('--start-base-url',
                        type=str,
                        help='the pre-processed base-url to fetch the data '
                        'from when starting from level > 0.')
    parser.add_argument('--max-level',
                        type=int,
                        default=5,
                        help='the maximum level you want to run the '
                        'pre-processing for (1, 2, 3, 4 or 5).')
    parser.add_argument('--working-dir',
                        type=str,
                        help='path to the directory where to write the '
                        'output. Defaults to current directory or '
                        '$OGGM_WORKDIR.')
    parser.add_argument('--params-file',
                        type=str,
                        help='path to the OGGM parameter file to use in place '
                        'of the default one.')
    parser.add_argument('--ref-tstars-base-url',
                        type=str,
                        help='the url where to find the pre-calibrated '
                        'reference tstar list. Required as of v1.4.')
    parser.add_argument('--output',
                        type=str,
                        help='path to the directory where to write the '
                        'output. Defaults to current directory or '
                        '$OGGM_OUTDIR.')
    parser.add_argument('--logging-level',
                        type=str,
                        default='WORKFLOW',
                        help='the logging level to use (DEBUG, INFO, WARNING, '
                        'WORKFLOW).')
    parser.add_argument('--elev-bands',
                        nargs='?',
                        const=True,
                        default=False,
                        help='compute the flowlines based on the Huss&Hock '
                        '2015 method instead of the OGGM default, which is '
                        'a mix of elev_bands and centerlines.')
    parser.add_argument('--centerlines-only',
                        nargs='?',
                        const=True,
                        default=False,
                        help='compute the flowlines based on the OGGM '
                        'centerline(s) method instead of the OGGM '
                        'default, which is a mix of elev_bands and '
                        'centerlines.')
    parser.add_argument('--match-regional-geodetic-mb',
                        type=str,
                        default='',
                        help='match regional SMB values to geodetic estimates '
                        '(currently hugonnet: Hugonnet et al., 2020, or '
                        'zemp: Zemp et al, 2019) '
                        'by shifting the SMB residual.')
    parser.add_argument('--match-geodetic-mb-per-glacier',
                        type=str,
                        default='',
                        help='match SMB values to geodetic estimates '
                        '(currently hugonnet: Hugonnet et al., '
                        '2020 only.')
    parser.add_argument('--evolution-model',
                        type=str,
                        default='fl_sia',
                        help='which geometry evolution model to use: '
                        '`fl_sia` (default), or `massredis` (mass '
                        'redistribution curve).')
    parser.add_argument('--dem-source',
                        type=str,
                        default='',
                        help='which DEM source to use. Possible options are '
                        'the name of a specific DEM (e.g. RAMP, SRTM...) '
                        'or ALL, in which case all available DEMs will '
                        'be processed and adjoined with a suffix at the '
                        'end of the file name. The ALL option is only '
                        'compatible with level 1 folders, after which '
                        'the processing will stop. The default is to use '
                        'the default OGGM DEM.')
    parser.add_argument('--add-consensus',
                        nargs='?',
                        const=True,
                        default=False,
                        help='adds (reprojects) the consensus estimates '
                        'thickness to the glacier directories. '
                        'With --elev-bands, the data will also be '
                        'binned.')
    parser.add_argument('--demo',
                        nargs='?',
                        const=True,
                        default=False,
                        help='if you want to run the prepro for the '
                        'list of demo glaciers.')
    parser.add_argument('--test',
                        nargs='?',
                        const=True,
                        default=False,
                        help='if you want to do a test on a couple of '
                        'glaciers first.')
    parser.add_argument('--test-ids',
                        nargs='+',
                        help='if --test, specify the RGI ids to run separated '
                        'by a space (default: 4 randomly selected).')
    parser.add_argument('--disable-dl-verify',
                        nargs='?',
                        const=True,
                        default=False,
                        help='if used OGGM downloads will not be verified '
                        'against a hash sum.')
    parser.add_argument('--disable-mp',
                        nargs='?',
                        const=True,
                        default=False,
                        help='if you want to disable multiprocessing.')
    parser.add_argument('--dynamic_spinup',
                        type=str,
                        default='',
                        help="include a dynamic spinup for matching 'area' OR "
                        "'volume' at the RGI-date")

    args = parser.parse_args(args)

    # Check input
    rgi_reg = args.rgi_reg
    if args.demo:
        rgi_reg = 0
    if not rgi_reg and not args.demo:
        rgi_reg = os.environ.get('OGGM_RGI_REG', None)
        if rgi_reg is None:
            raise InvalidParamsError('--rgi-reg is required!')
    rgi_reg = '{:02}'.format(int(rgi_reg))
    ok_regs = ['{:02}'.format(int(r)) for r in range(1, 20)]
    if not args.demo and rgi_reg not in ok_regs:
        raise InvalidParamsError('--rgi-reg should range from 01 to 19!')

    rgi_version = args.rgi_version

    border = args.map_border
    if not border:
        border = os.environ.get('OGGM_MAP_BORDER', None)
        if border is None:
            raise InvalidParamsError('--map-border is required!')

    working_dir = args.working_dir
    if not working_dir:
        working_dir = os.environ.get('OGGM_WORKDIR', '')

    output_folder = args.output
    if not output_folder:
        output_folder = os.environ.get('OGGM_OUTDIR', '')

    border = int(border)
    output_folder = os.path.abspath(output_folder)
    working_dir = os.path.abspath(working_dir)

    # All good
    return dict(
        rgi_version=rgi_version,
        rgi_reg=rgi_reg,
        border=border,
        output_folder=output_folder,
        working_dir=working_dir,
        params_file=args.params_file,
        is_test=args.test,
        test_ids=args.test_ids,
        demo=args.demo,
        dem_source=args.dem_source,
        start_level=args.start_level,
        start_base_url=args.start_base_url,
        max_level=args.max_level,
        disable_mp=args.disable_mp,
        logging_level=args.logging_level,
        elev_bands=args.elev_bands,
        centerlines_only=args.centerlines_only,
        match_regional_geodetic_mb=args.match_regional_geodetic_mb,
        match_geodetic_mb_per_glacier=args.match_geodetic_mb_per_glacier,
        add_consensus=args.add_consensus,
        disable_dl_verify=args.disable_dl_verify,
        ref_tstars_base_url=args.ref_tstars_base_url,
        evolution_model=args.evolution_model,
        dynamic_spinup=False
        if args.dynamic_spinup == '' else args.dynamic_spinup,
    )
Ejemplo n.º 19
0
def parse_args(args):
    """Check input arguments and env variables"""

    # CLI args
    description = ('Run an OGGM benchmark on a selected RGI Region. '
                   'This writes a benchmark_{border}.txt file where '
                   'the results are summarized')
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('--map-border',
                        type=int,
                        help='the size of the map border. Is required if '
                        '$OGGM_MAP_BORDER is not set.')
    parser.add_argument('--rgi-reg',
                        type=str,
                        help='the rgi region to process. Is required if '
                        '$OGGM_RGI_REG is not set.')
    parser.add_argument('--rgi-version',
                        type=str,
                        help='the RGI version to use. Defaults to the OGGM '
                        'default.')
    parser.add_argument('--working-dir',
                        type=str,
                        help='path to the directory where to write the '
                        'output. Defaults to current directory or '
                        '$OGGM_WORKDIR.')
    parser.add_argument('--output',
                        type=str,
                        help='path to the directory where to write the '
                        'output. Defaults to current directory or'
                        '$OGGM_OUTDIR.')
    parser.add_argument('--test',
                        nargs='?',
                        const=True,
                        default=False,
                        help='if you want to do a test on a couple of '
                        'glaciers first.')
    args = parser.parse_args(args)

    # Check input
    rgi_reg = args.rgi_reg
    if not rgi_reg:
        rgi_reg = os.environ.get('OGGM_RGI_REG', None)
        if rgi_reg is None:
            raise InvalidParamsError('--rgi-reg is required!')
    rgi_reg = '{:02}'.format(int(rgi_reg))

    rgi_version = args.rgi_version

    border = args.map_border
    if not border:
        border = os.environ.get('OGGM_MAP_BORDER', None)
        if border is None:
            raise InvalidParamsError('--map-border is required!')

    working_dir = args.working_dir
    if not working_dir:
        working_dir = os.environ.get('OGGM_WORKDIR', '')

    output_folder = args.output
    if not output_folder:
        output_folder = os.environ.get('OGGM_OUTDIR', '')

    border = int(border)
    output_folder = os.path.abspath(output_folder)
    working_dir = os.path.abspath(working_dir)

    # All good
    return dict(rgi_version=rgi_version,
                rgi_reg=rgi_reg,
                border=border,
                output_folder=output_folder,
                working_dir=working_dir,
                is_test=args.test)
Ejemplo n.º 20
0
    def _get_tempformelt(self, temp, pok):
        """ Helper function to compute tempformelt to avoid code duplication
        in get_monthly_climate() and _get2d_annual_climate()

        If using this again outside of this class, need to remove the "self",
        such as for 'mb_climate_on_height' in climate.py, that has no self....
        (would need to change temp, t_melt ,temp_std, mb_type, N, loop)

        Input: stuff that is different for the different methods
            temp: temperature time series
            pok: indices of time series

        Returns
        -------
        (tempformelt)
        """

        tempformelt_without_std = temp - self.t_melt

        # computations change only if 'mb_daily' as mb_type!
        if self.mb_type == 'mb_monthly' or self.mb_type == 'mb_real_daily':
            tempformelt = tempformelt_without_std
        elif self.mb_type == 'mb_daily':

            itemp_std = self.temp_std[pok]

            tempformelt_with_std = np.full(np.shape(tempformelt_without_std),
                                           np.NaN)
            # matrix with N values that are distributed around 0
            # showing how much fake 'daily' values vary from the mean
            z_scores_mean = stats.norm.ppf(
                np.arange(1 / self.N - 1 / (2 * self.N), 1, 1 / self.N))

            z_std = np.matmul(
                np.atleast_2d(z_scores_mean).T, np.atleast_2d(itemp_std))

            # there are two possibilities,
            # not using the loop is most of the times faster
            if self.loop is False:
                # without the loop: but not much faster ..
                tempformelt_daily = np.atleast_3d(tempformelt_without_std).T + \
                                    np.atleast_3d(z_std)
                clip_min(tempformelt_daily, 0, out=tempformelt_daily)
                tempformelt_with_std = tempformelt_daily.mean(axis=0).T
            else:
                shape_tfm = np.shape(tempformelt_without_std)
                tempformelt_with_std = np.full(shape_tfm, np.NaN)
                z_std = np.matmul(
                    np.atleast_2d(z_scores_mean).T, np.atleast_2d(itemp_std))
                for h in np.arange(0, np.shape(tempformelt_without_std)[0]):
                    h_tfm_daily_ = np.atleast_2d(tempformelt_without_std[h, :])
                    h_tempformelt_daily = h_tfm_daily_ + z_std
                    clip_min(h_tempformelt_daily, 0, out=h_tempformelt_daily)
                    h_tempformelt_monthly = h_tempformelt_daily.mean(axis=0)
                    tempformelt_with_std[h, :] = h_tempformelt_monthly
            tempformelt = tempformelt_with_std

        else:
            raise InvalidParamsError('mb_type can only be "mb_monthly,\
                                     mb_daily or mb_real_daily" ')
        #  replace all values below zero to zero
        clip_min(tempformelt, 0, out=tempformelt)

        return tempformelt
Ejemplo n.º 21
0
    def __init__(self,
                 gdir,
                 mu_star,
                 bias=0,
                 mb_type='mb_daily',
                 N=10000,
                 loop=False,
                 grad_type='cte',
                 filename='climate_historical',
                 input_filesuffix='',
                 repeat=False,
                 ys=None,
                 ye=None,
                 t_solid=0,
                 t_liq=2,
                 t_melt=0,
                 prcp_fac=2.5,
                 default_grad=-0.0065,
                 temp_local_gradient_bounds=[-0.009, -0.003],
                 SEC_IN_YEAR=SEC_IN_YEAR,
                 SEC_IN_MONTH=SEC_IN_MONTH):
        """ Initialize.
        Parameters
        ----------
        gdir : GlacierDirectory
            the glacier directory
        mu_star : float
            monthly temperature sensitivity (kg /m² /mth /K),
            need to be prescribed, e.g. such that
            |mean(MODEL_MB)-mean(REF_MB)|--> 0
        bias : float, optional
            default is to use zero bias [mm we yr-1]
            you want to use (the default is to use zero bias)
            Note that this bias is *substracted* from the computed MB. Indeed:
            BIAS = MODEL_MB - REFERENCE_MB.
        mb_type: str
            three types: 'mb_daily' (default: use temp_std and N percentiles),
            'mb_monthly' (same as default OGGM mass balance),
            'mb_real_daily' (use daily temperature values).
            the mb_type only work if the baseline_climate of gdir is right
        N : int
            number of percentiles used to generate gaussian-like daily
            temperatures from daily std and mean monthly temp
        loop : bool
            the way how the matrix multiplication is done,
            using np.matmul or a loop(default: False)
            only applied if mb_type is 'mb_daily'
            which one is faster?
        grad_type : str
            three types of applying the temperature gradient:
            'cte' (default, constant lapse rate, set to default_grad,
                   same as in default OGGM)
            'var_an_cycle' (varies spatially and over annual cycle,
                            but constant over the years)
            'var' (varies spatially & temporally as in the climate files)
        filename : str, optional
            set to a different BASENAME if you want to use alternative climate
            data, default is climate_historical
        input_filesuffix : str,
            the file suffix of the input climate file, default is '',
            if ERA5_daily with daily temperatures, it is set to _daily
        repeat : bool
            Whether the climate period given by [ys, ye] should be repeated
            indefinitely in a circular way
        ys : int
            The start of the climate period where the MB model is valid
            (default: the period with available data)
        ye : int
            The end of the climate period where the MB model is valid
            (default: the period with available data)
        t_solid : float
            temperature threshold for solid precipitation
            (degree Celsius, default 0)
        t_liq: float
            temperature threshold for liquid precipitation
            (degree Celsius, default 2)
        t_melt : float
            temperature threshold where snow/ice melts
            (degree Celsius, default 0)
        prcp_fac : float, >0
            multiplicative precipitation correction factor (default 2.5)
        default_grad : float,
            constant lapse rate (temperature gradient, default: -0.0065 m/K)
            if grad_type != cte, then this value is not used
            but instead the changing lapse rate from the climate datasets
        temp_local_gradient_bounds : [float, float],
            if grad_type != cte and the lapse rate does not lie in this range,
            set it instead to these minimum, maximum gradients
            (default: [-0.009, -0.003] m/K)
        SEC_IN_YEAR: float
            seconds in a year (default: 31536000s),
            maybe this could be changed
        SEC_IN_MONTH: float
            seconds in a month (default: 2628000s),
            maybe this could be changed as not each
            month has the same amount of seconds,
            in February can be a difference of 8%

        Attributes
        ----------
        temp_bias : float, default 0
            Add a temperature bias to the time series
        prcp_bias : float, default 1
            Precipitation factor to the time series (called bias for
            consistency with `temp_bias`)
        """

        self.mu_star = mu_star
        self.bias = bias

        # Parameters (from cfg.PARAMS in OGGM default)
        self.t_solid = t_solid
        self.t_liq = t_liq
        self.t_melt = t_melt
        self.N = N
        self.mb_type = mb_type
        self.loop = loop
        self.grad_type = grad_type
        # default rho is 900  kg/m3
        self.rho = cfg.PARAMS['ice_density']

        # Public attrs
        self.hemisphere = gdir.hemisphere
        self.temp_bias = 0.
        self.prcp_bias = 1.
        self.repeat = repeat

        self.SEC_IN_YEAR = SEC_IN_YEAR
        self.SEC_IN_MONTH = SEC_IN_MONTH

        # check if the right climate is used for the right mb_type
        # these checks might be changed if there are more climate datasets
        # available!!!
        # only have daily temperatures for 'ERA5_daily'
        baseline_climate = gdir.get_climate_info()['baseline_climate_source']
        if (self.mb_type == 'mb_real_daily'
                and baseline_climate != 'ERA5_daily'):
            text = ('wrong climate for mb_real_daily, need to do e.g. '
                    'process_era5_daily_data(gd) to enable ERA5_daily')
            raise InvalidParamsError(text)
        # mb_monthly does not work when daily temperatures are used
        if self.mb_type == 'mb_monthly' and baseline_climate == 'ERA5_daily':
            text = ('wrong climate for mb_monthly, need to do e.g.'
                    'oggm.shop.ecmwf.process_ecmwf_data(gd, dataset="ERA5dr")')
            raise InvalidParamsError(text)
        # mb_daily needs temp_std
        if self.mb_type == 'mb_daily' and baseline_climate == 'ERA5_daily':
            text = 'wrong climate for mb_daily, need to do e.g. \
            oggm.shop.ecmwf.process_ecmwf_data(gd, dataset = "ERA5dr")'

            raise InvalidParamsError(text)

        if baseline_climate == 'ERA5_daily':
            input_filesuffix = '_daily'

        # Read climate file
        fpath = gdir.get_filepath(filename, filesuffix=input_filesuffix)

        # used xarray instead of netCDF4, is this slower?
        with xr.open_dataset(fpath) as xr_nc:
            if self.mb_type == 'mb_real_daily' or self.mb_type == 'mb_monthly':
                # even if there is temp_std inside the dataset, we won't use
                # it for these mb_types
                self.temp_std = np.NaN
            else:
                try:
                    self.temp_std = xr_nc['temp_std'].values
                except KeyError:
                    text = ('The applied climate has no temp std, do e.g.'
                            'oggm.shop.ecmwf.process_ecmwf_data'
                            '(gd, dataset="ERA5dr")')

                    raise InvalidParamsError(text)

            # goal is to get self.years/self.months in hydro_years
            if self.mb_type != 'mb_real_daily':
                time = xr_nc.time
                ny, r = divmod(len(time), 12)
                if r != 0:
                    raise ValueError('Climate data should be N full years')
                # This is where we switch to hydro float year format
                # Last year gives the tone of the hydro year
                self.years = np.repeat(
                    np.arange(xr_nc.time[-1].dt.year - ny + 1,
                              xr_nc.time[-1].dt.year + 1), 12)
                self.months = np.tile(np.arange(1, 13), ny)

            elif self.mb_type == 'mb_real_daily':
                # use pandas to convert month/year to hydro_years
                # this has to be done differently than above because not
                # every month, year has the same amount of days
                pd_test = pd.DataFrame(xr_nc.time.to_series().dt.year.values,
                                       columns=['year'])
                pd_test.index = xr_nc.time.to_series().values
                pd_test['month'] = xr_nc.time.to_series().dt.month.values
                pd_test['hydro_year'] = np.NaN
                # get the month where the hydrological month starts
                # as chosen from the gdir climate file
                # default 10 for 'nh', 4 for 'sh'
                hydro_month_start = int(xr_nc.time[0].dt.month.values)
                if hydro_month_start == 1:
                    # hydro_year corresponds to normal year
                    pd_test.loc[pd_test.index.month >= hydro_month_start,
                                'hydro_year'] = pd_test['year']
                else:
                    pd_test.loc[pd_test.index.month < hydro_month_start,
                                'hydro_year'] = pd_test['year']
                    # otherwise, those days with a month>=hydro_month_start
                    # belong to the next hydro_year
                    pd_test.loc[pd_test.index.month >= hydro_month_start,
                                'hydro_year'] = pd_test['year'] + 1
                # month_hydro is 1 if it is hydro_month_start
                month_hydro = pd_test['month'].values + (12 -
                                                         hydro_month_start + 1)
                month_hydro[month_hydro > 12] += -12
                pd_test['hydro_month'] = month_hydro
                pd_test = pd_test.astype('int')
                self.years = pd_test['hydro_year'].values
                ny = self.years[-1] - self.years[0] + 1
                self.months = pd_test['hydro_month'].values
            # Read timeseries
            self.temp = xr_nc['temp'].values
            self.prcp = xr_nc['prcp'].values * prcp_fac

            # lapse rate (temperature gradient)
            if self.grad_type == 'var' or self.grad_type == 'var_an_cycle':
                try:
                    grad = xr_nc['gradient'].values
                    # Security for stuff that can happen with local gradients
                    g_minmax = temp_local_gradient_bounds

                    # if gradient is not a number, or positive/negative
                    # infinity, use the default gradient
                    grad = np.where(~np.isfinite(grad), default_grad, grad)

                    # if outside boundaries of default -0.009 and above
                    # -0.003 -> use the boundaries instead
                    grad = clip_array(grad, g_minmax[0], g_minmax[1])

                    if self.grad_type == 'var_an_cycle':
                        # if we want constant lapse rates over the years
                        # that change over the annual cycle, but not over time
                        if self.mb_type == 'mb_real_daily':
                            grad_gb = xr_nc['gradient'].groupby('time.month')
                            grad = grad_gb.mean().values
                            g_minmax = temp_local_gradient_bounds

                            # if gradient is not a number, or positive/negative
                            # infinity, use the default gradient
                            grad = np.where(~np.isfinite(grad), default_grad,
                                            grad)

                            # if outside boundaries of default -0.009 and above
                            # -0.003 -> use the boundaries instead
                            grad = clip_array(grad, g_minmax[0], g_minmax[1])

                            stack_grad = grad.reshape(-1, 12)
                            grad = np.tile(stack_grad.mean(axis=0), ny)
                            reps_day1 = xr_nc.time[xr_nc.time.dt.day == 1]
                            reps = reps_day1.dt.daysinmonth
                            grad = np.repeat(grad, reps)

                        else:
                            stack_grad = grad.reshape(-1, 12)
                            grad = np.tile(stack_grad.mean(axis=0), ny)
                except KeyError:
                    text = ('there is no gradient available in chosen climate'
                            'file, try instead e.g. ERA5_daily or ERA5dr e.g.'
                            'oggm.shop.ecmwf.process_ecmwf_data'
                            '(gd, dataset="ERA5dr")')

                    raise InvalidParamsError(text)

            elif self.grad_type == 'cte':
                # if grad_type is chosen cte, we use the default_grad!
                grad = self.prcp * 0 + default_grad
            else:
                raise InvalidParamsError('grad_type can be either cte,'
                                         'var or var_an_cycle')
            self.grad = grad
            self.ref_hgt = xr_nc.ref_hgt
            self.ys = self.years[0] if ys is None else ys
            self.ye = self.years[-1] if ye is None else ye
Ejemplo n.º 22
0
def init_glacier_regions(rgidf=None, *, reset=False, force=False,
                         from_prepro_level=None, prepro_border=None,
                         prepro_rgi_version=None, prepro_base_url=None,
                         from_tar=False, delete_tar=False,
                         use_demo_glaciers=None):
    """DEPRECATED: Initializes the list of Glacier Directories for this run.

    This is the very first task to do (always). If the directories are already
    available in the working directory, use them. If not, create new ones.

    Parameters
    ----------
    rgidf : GeoDataFrame or list of ids, optional for pre-computed runs
        the RGI glacier outlines. If unavailable, OGGM will parse the
        information from the glacier directories found in the working
        directory. It is required for new runs.
    reset : bool
        delete the existing glacier directories if found.
    force : bool
        setting `reset=True` will trigger a yes/no question to the user. Set
        `force=True` to avoid this.
    from_prepro_level : int
        get the gdir data from the official pre-processed pool. See the
        documentation for more information
    prepro_border : int
        for `from_prepro_level` only: if you want to override the default
        behavior which is to use `cfg.PARAMS['border']`
    prepro_rgi_version : str
        for `from_prepro_level` only: if you want to override the default
        behavior which is to use `cfg.PARAMS['rgi_version']`
    prepro_base_url : str
        for `from_prepro_level` only: if you want to override the default
        URL from which to download the gdirs. Default currently is
        https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.1/
    use_demo_glaciers : bool
        whether to check the demo glaciers for download (faster than the
        standard prepro downloads). The default is to decide whether or
        not to check based on simple criteria such as glacier list size.
    from_tar : bool, default=False
        extract the gdir data from a tar file. If set to `True`,
        will check for a tar file at the expected location in `base_dir`.
    delete_tar : bool, default=False
        delete the original tar file after extraction.
    delete_tar : bool, default=False
        delete the original tar file after extraction.

    Returns
    -------
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
        the initialised glacier directories

    Notes
    -----
    This task is deprecated in favor of the more explicit
    init_glacier_directories. Indeed, init_glacier_directories is very
    similar to init_glacier_regions, but it does not process the DEMs:
    a glacier directory is valid also without DEM.
    """

    _check_duplicates(rgidf)

    if reset and not force:
        reset = utils.query_yes_no('Delete all glacier directories?')

    if prepro_border is None:
        prepro_border = int(cfg.PARAMS['border'])

    if from_prepro_level and prepro_border not in [10, 80, 160, 250]:
        if 'test' not in utils._downloads.GDIR_URL:
            raise InvalidParamsError("prepro_border or cfg.PARAMS['border'] "
                                     "should be one of: 10, 80, 160, 250.")

    # if reset delete also the log directory
    if reset:
        fpath = os.path.join(cfg.PATHS['working_dir'], 'log')
        if os.path.exists(fpath):
            rmtree(fpath)

    gdirs = []
    new_gdirs = []
    if rgidf is None:
        if reset:
            raise ValueError('Cannot use reset without setting rgidf')
        log.workflow('init_glacier_regions by parsing available folders '
                     '(can be slow).')
        # The dirs should be there already
        gl_dir = os.path.join(cfg.PATHS['working_dir'], 'per_glacier')
        for root, _, files in os.walk(gl_dir):
            if files and ('dem.tif' in files):
                gdirs.append(oggm.GlacierDirectory(os.path.basename(root)))
    else:

        # Check if dataframe or list of strs
        try:
            entities = []
            for _, entity in rgidf.iterrows():
                entities.append(entity)
        except AttributeError:
            entities = utils.tolist(rgidf)

        # Check demo
        if use_demo_glaciers is None:
            use_demo_glaciers = len(entities) < 100

        if from_prepro_level is not None:
            log.workflow('init_glacier_regions from prepro level {} on '
                         '{} glaciers.'.format(from_prepro_level,
                                               len(entities)))
            # Read the hash dictionary before we use multiproc
            if cfg.PARAMS['dl_verify']:
                utils.get_dl_verify_data('cluster.klima.uni-bremen.de')
            gdirs = execute_entity_task(gdir_from_prepro, entities,
                                        from_prepro_level=from_prepro_level,
                                        prepro_border=prepro_border,
                                        prepro_rgi_version=prepro_rgi_version,
                                        check_demo_glacier=use_demo_glaciers,
                                        base_url=prepro_base_url)
        else:
            # We can set the intersects file automatically here
            if (cfg.PARAMS['use_intersects'] and
                    len(cfg.PARAMS['intersects_gdf']) == 0):
                rgi_ids = np.unique(np.sort([entity.RGIId for entity in
                                             entities]))
                rgi_version = rgi_ids[0].split('-')[0][-2:]
                fp = utils.get_rgi_intersects_entities(rgi_ids,
                                                       version=rgi_version)
                cfg.set_intersects_db(fp)

            gdirs = execute_entity_task(utils.GlacierDirectory, entities,
                                        reset=reset,
                                        from_tar=from_tar,
                                        delete_tar=delete_tar)

            for gdir in gdirs:
                if not os.path.exists(gdir.get_filepath('dem')):
                    new_gdirs.append(gdir)

    if len(new_gdirs) > 0:
        # If not initialized, run the task in parallel
        execute_entity_task(tasks.define_glacier_region, new_gdirs)

    return gdirs
Ejemplo n.º 23
0
def sia_thickness_via_optim(slope,
                            width,
                            flux,
                            shape='rectangular',
                            glen_a=None,
                            fs=None,
                            t_lambda=None):
    """Compute the thickness numerically instead of analytically.

    It's the only way that works for trapezoid shapes.

    Parameters
    ----------
    slope : -np.gradient(hgt, dx)
    width : section width in m
    flux : mass flux in m3 s-1
    shape : 'rectangular', 'trapezoid' or 'parabolic'
    glen_a : Glen A, defaults to PARAMS
    fs : sliding, defaults to PARAMS
    t_lambda: the trapezoid lambda, defaults to PARAMS

    Returns
    -------
    the ice thickness (in m)
    """

    if len(np.atleast_1d(slope)) > 1:
        shape = utils.tolist(shape, len(slope))
        t_lambda = utils.tolist(t_lambda, len(slope))
        out = []
        for sl, w, f, s, t in zip(slope, width, flux, shape, t_lambda):
            out.append(
                sia_thickness_via_optim(sl,
                                        w,
                                        f,
                                        shape=s,
                                        glen_a=glen_a,
                                        fs=fs,
                                        t_lambda=t))
        return np.asarray(out)

    # Sanity
    if flux <= 0:
        return 0
    if width <= MIN_WIDTH_FOR_INV:
        return 0

    if glen_a is None:
        glen_a = cfg.PARAMS['inversion_glen_a']
    if fs is None:
        fs = cfg.PARAMS['inversion_fs']
    if t_lambda is None:
        t_lambda = cfg.PARAMS['trapezoid_lambdas']
    if shape not in ['parabolic', 'rectangular', 'trapezoid']:
        raise InvalidParamsError('shape must be `parabolic`, `trapezoid` '
                                 'or `rectangular`, not: {}'.format(shape))

    # Ice flow params
    n = cfg.PARAMS['glen_n']
    fd = 2 / (n + 2) * glen_a
    rho = cfg.PARAMS['ice_density']
    rhogh = (rho * cfg.G * slope)**n

    # To avoid geometrical inconsistencies
    max_h = width / t_lambda if shape == 'trapezoid' else 1e4

    def to_minimize(h):
        u = (h**(n + 1)) * fd * rhogh + (h**(n - 1)) * fs * rhogh
        if shape == 'parabolic':
            sect = 2. / 3. * width * h
        elif shape == 'trapezoid':
            w0m = width - t_lambda * h
            sect = (width + w0m) / 2 * h
        else:
            sect = width * h
        return sect * u - flux

    out_h, r = optimize.brentq(to_minimize, 0, max_h, full_output=True)
    return out_h
Ejemplo n.º 24
0
def process_histalp_data(gdir):
    """Processes and writes the HISTALP baseline climate data for this glacier.

    Extracts the nearest timeseries and writes everything to a NetCDF file.

    Parameters
    ----------
    gdir : :py:class:`oggm.GlacierDirectory`
        the glacier directory to process
    """

    if cfg.PATHS.get('climate_file', None):
        warnings.warn("You seem to have set a custom climate file for this "
                      "run, but are using the default HISTALP climate file "
                      "instead.")

    if cfg.PARAMS['baseline_climate'] != 'HISTALP':
        raise InvalidParamsError("cfg.PARAMS['baseline_climate'] should be "
                                 "set to HISTALP.")

    # read the time out of the pure netcdf file
    ft = get_histalp_file('tmp')
    fp = get_histalp_file('pre')
    with utils.ncDataset(ft) as nc:
        vt = nc.variables['time']
        assert vt[0] == 0
        assert vt[-1] == vt.shape[0] - 1
        t0 = vt.units.split(' since ')[1][:7]
        time_t = pd.date_range(start=t0, periods=vt.shape[0], freq='MS')
    with utils.ncDataset(fp) as nc:
        vt = nc.variables['time']
        assert vt[0] == 0.5
        assert vt[-1] == vt.shape[0] - .5
        t0 = vt.units.split(' since ')[1][:7]
        time_p = pd.date_range(start=t0, periods=vt.shape[0], freq='MS')

    # Now open with salem
    nc_ts_tmp = salem.GeoNetcdf(ft, time=time_t)
    nc_ts_pre = salem.GeoNetcdf(fp, time=time_p)

    # set temporal subset for the ts data (hydro years)
    # the reference time is given by precip, which is shorter
    sm = cfg.PARAMS['hydro_month_nh']
    em = sm - 1 if (sm > 1) else 12
    yrs = nc_ts_pre.time.year
    y0, y1 = yrs[0], yrs[-1]
    if cfg.PARAMS['baseline_y0'] != 0:
        y0 = cfg.PARAMS['baseline_y0']
    if cfg.PARAMS['baseline_y1'] != 0:
        y1 = cfg.PARAMS['baseline_y1']

    nc_ts_tmp.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                         t1='{}-{:02d}-01'.format(y1, em))
    nc_ts_pre.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                         t1='{}-{:02d}-01'.format(y1, em))
    time = nc_ts_pre.time
    ny, r = divmod(len(time), 12)
    assert r == 0

    # Units
    assert nc_ts_tmp._nc.variables['HSURF'].units.lower() in [
        'm', 'meters', 'meter', 'metres', 'metre'
    ]
    assert nc_ts_tmp._nc.variables['T_2M'].units.lower() in [
        'degc', 'degrees', 'degrees celcius', 'degree', 'c'
    ]
    assert nc_ts_pre._nc.variables['TOT_PREC'].units.lower() in [
        'kg m-2', 'l m-2', 'mm', 'millimeters', 'millimeter'
    ]

    # geoloc
    lon = gdir.cenlon
    lat = gdir.cenlat
    nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
    nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

    # read the data
    temp = nc_ts_tmp.get_vardata('T_2M')
    prcp = nc_ts_pre.get_vardata('TOT_PREC')
    hgt = nc_ts_tmp.get_vardata('HSURF')
    ref_lon = nc_ts_tmp.get_vardata('lon')
    ref_lat = nc_ts_tmp.get_vardata('lat')
    source = nc_ts_tmp._nc.title[:7]
    nc_ts_tmp._nc.close()
    nc_ts_pre._nc.close()

    # Should we compute the gradient?
    use_grad = cfg.PARAMS['temp_use_local_gradient']
    igrad = None
    if use_grad:
        igrad = np.zeros(len(time)) * np.NaN
        for t, loct in enumerate(temp):
            slope, _, _, p_val, _ = stats.linregress(hgt.flatten(),
                                                     loct.flatten())
            igrad[t] = slope if (p_val < 0.01) else np.NaN

    gdir.write_monthly_climate_file(time,
                                    prcp[:, 1, 1],
                                    temp[:, 1, 1],
                                    hgt[1, 1],
                                    ref_lon[1],
                                    ref_lat[1],
                                    gradient=igrad)
    # metadata
    out = {
        'baseline_climate_source': source,
        'baseline_hydro_yr_0': y0 + 1,
        'baseline_hydro_yr_1': y1
    }
    gdir.write_json(out, 'climate_info')
Ejemplo n.º 25
0
def process_histalp_data(gdir, y0=None, y1=None, output_filesuffix=None):
    """Processes and writes the HISTALP baseline climate data for this glacier.

    Extracts the nearest timeseries and writes everything to a NetCDF file.

    Parameters
    ----------
    gdir : :py:class:`oggm.GlacierDirectory`
        the glacier directory to process
    y0 : int
        the starting year of the timeseries to write. The default is to take
        1850 (because the data is quite bad before that)
    y1 : int
        the starting year of the timeseries to write. The default is to take
        the entire time period available in the file, but with this kwarg
        you can shorten it (to save space or to crop bad data)
    output_filesuffix : str
        this add a suffix to the output file (useful to avoid overwriting
        previous experiments)
    """

    if cfg.PATHS.get('climate_file', None):
        warnings.warn("You seem to have set a custom climate file for this "
                      "run, but are using the default HISTALP climate file "
                      "instead.")

    if cfg.PARAMS['baseline_climate'] != 'HISTALP':
        raise InvalidParamsError("cfg.PARAMS['baseline_climate'] should be "
                                 "set to HISTALP.")

    # read the time out of the pure netcdf file
    ft = get_histalp_file('tmp')
    fp = get_histalp_file('pre')
    with utils.ncDataset(ft) as nc:
        vt = nc.variables['time']
        assert vt[0] == 0
        assert vt[-1] == vt.shape[0] - 1
        t0 = vt.units.split(' since ')[1][:7]
        time_t = pd.date_range(start=t0, periods=vt.shape[0], freq='MS')
    with utils.ncDataset(fp) as nc:
        vt = nc.variables['time']
        assert vt[0] == 0.5
        assert vt[-1] == vt.shape[0] - .5
        t0 = vt.units.split(' since ')[1][:7]
        time_p = pd.date_range(start=t0, periods=vt.shape[0], freq='MS')

    # Now open with salem
    nc_ts_tmp = salem.GeoNetcdf(ft, time=time_t)
    nc_ts_pre = salem.GeoNetcdf(fp, time=time_p)

    # Some default
    if y0 is None:
        y0 = 1850

    # set temporal subset for the ts data (hydro years)
    # the reference time is given by precip, which is shorter
    sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere]
    em = sm - 1 if (sm > 1) else 12
    yrs = nc_ts_pre.time.year
    y0 = yrs[0] if y0 is None else y0
    y1 = yrs[-1] if y1 is None else y1

    nc_ts_tmp.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                         t1='{}-{:02d}-01'.format(y1, em))
    nc_ts_pre.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                         t1='{}-{:02d}-01'.format(y1, em))
    time = nc_ts_pre.time
    ny, r = divmod(len(time), 12)
    assert r == 0

    # Units
    assert nc_ts_tmp._nc.variables['HSURF'].units.lower() in [
        'm', 'meters', 'meter', 'metres', 'metre'
    ]
    assert nc_ts_tmp._nc.variables['T_2M'].units.lower() in [
        'degc', 'degrees', 'degrees celcius', 'degree', 'c'
    ]
    assert nc_ts_pre._nc.variables['TOT_PREC'].units.lower() in [
        'kg m-2', 'l m-2', 'mm', 'millimeters', 'millimeter'
    ]

    # geoloc
    lon = gdir.cenlon
    lat = gdir.cenlat
    nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
    nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

    # read the data
    temp = nc_ts_tmp.get_vardata('T_2M')
    prcp = nc_ts_pre.get_vardata('TOT_PREC')
    hgt = nc_ts_tmp.get_vardata('HSURF')
    ref_lon = nc_ts_tmp.get_vardata('lon')
    ref_lat = nc_ts_tmp.get_vardata('lat')
    source = nc_ts_tmp._nc.title[:7]
    nc_ts_tmp._nc.close()
    nc_ts_pre._nc.close()

    # Should we compute the gradient?
    use_grad = cfg.PARAMS['temp_use_local_gradient']
    igrad = None
    if use_grad:
        igrad = np.zeros(len(time)) * np.NaN
        for t, loct in enumerate(temp):
            slope, _, _, p_val, _ = stats.linregress(hgt.flatten(),
                                                     loct.flatten())
            igrad[t] = slope if (p_val < 0.01) else np.NaN

    gdir.write_monthly_climate_file(time,
                                    prcp[:, 1, 1],
                                    temp[:, 1, 1],
                                    hgt[1, 1],
                                    ref_lon[1],
                                    ref_lat[1],
                                    gradient=igrad,
                                    filesuffix=output_filesuffix,
                                    source=source)
Ejemplo n.º 26
0
def process_cru_data(gdir, tmp_file=None, pre_file=None, y0=None, y1=None,
                     output_filesuffix=None):
    """Processes and writes the CRU baseline climate data for this glacier.

    Interpolates the CRU TS data to the high-resolution CL2 climatologies
    (provided with OGGM) and writes everything to a NetCDF file.

    Parameters
    ----------
    gdir : :py:class:`oggm.GlacierDirectory`
        the glacier directory to process
    tmp_file : str
        path to the CRU temperature file (defaults to the current OGGM chosen
        CRU version)
    pre_file : str
        path to the CRU precip file (defaults to the current OGGM chosen
        CRU version)
    y0 : int
        the starting year of the timeseries to write. The default is to take
        the entire time period available in the file, but with this kwarg
        you can shorten it (to save space or to crop bad data)
    y1 : int
        the starting year of the timeseries to write. The default is to take
        the entire time period available in the file, but with this kwarg
        you can shorten it (to save space or to crop bad data)
    output_filesuffix : str
        this add a suffix to the output file (useful to avoid overwriting
        previous experiments)
    """

    if cfg.PATHS.get('climate_file', None):
        warnings.warn("You seem to have set a custom climate file for this "
                      "run, but are using the default CRU climate "
                      "file instead.")

    if cfg.PARAMS['baseline_climate'] != 'CRU':
        raise InvalidParamsError("cfg.PARAMS['baseline_climate'] should be "
                                 "set to CRU")

    # read the climatology
    ncclim = salem.GeoNetcdf(get_cru_cl_file())

    # and the TS data
    if tmp_file is None:
        tmp_file = get_cru_file('tmp')
    if pre_file is None:
        pre_file = get_cru_file('pre')
    nc_ts_tmp = salem.GeoNetcdf(tmp_file, monthbegin=True)
    nc_ts_pre = salem.GeoNetcdf(pre_file, monthbegin=True)

    # set temporal subset for the ts data (hydro years)
    sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere]
    em = sm - 1 if (sm > 1) else 12
    yrs = nc_ts_pre.time.year
    y0 = yrs[0] if y0 is None else y0
    y1 = yrs[-1] if y1 is None else y1

    nc_ts_tmp.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                         t1='{}-{:02d}-01'.format(y1, em))
    nc_ts_pre.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                         t1='{}-{:02d}-01'.format(y1, em))
    time = nc_ts_pre.time
    ny, r = divmod(len(time), 12)
    assert r == 0

    lon = gdir.cenlon
    lat = gdir.cenlat

    # This is guaranteed to work because I prepared the file (I hope)
    ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

    # get climatology data
    loc_hgt = ncclim.get_vardata('elev')
    loc_tmp = ncclim.get_vardata('temp')
    loc_pre = ncclim.get_vardata('prcp')
    loc_lon = ncclim.get_vardata('lon')
    loc_lat = ncclim.get_vardata('lat')

    # see if the center is ok
    if not np.isfinite(loc_hgt[1, 1]):
        # take another candidate where finite
        isok = np.isfinite(loc_hgt)

        # wait: some areas are entirely NaNs, make the subset larger
        _margin = 1
        while not np.any(isok):
            _margin += 1
            ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=_margin)
            loc_hgt = ncclim.get_vardata('elev')
            isok = np.isfinite(loc_hgt)
        if _margin > 1:
            log.debug('(%s) I had to look up for far climate pixels: %s',
                      gdir.rgi_id, _margin)

        # Take the first candidate (doesn't matter which)
        lon, lat = ncclim.grid.ll_coordinates
        lon = lon[isok][0]
        lat = lat[isok][0]
        # Resubset
        ncclim.set_subset()
        ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
        loc_hgt = ncclim.get_vardata('elev')
        loc_tmp = ncclim.get_vardata('temp')
        loc_pre = ncclim.get_vardata('prcp')
        loc_lon = ncclim.get_vardata('lon')
        loc_lat = ncclim.get_vardata('lat')

    assert np.isfinite(loc_hgt[1, 1])
    isok = np.isfinite(loc_hgt)
    hgt_f = loc_hgt[isok].flatten()
    assert len(hgt_f) > 0.

    # Should we compute the gradient?
    use_grad = cfg.PARAMS['temp_use_local_gradient']
    ts_grad = None
    if use_grad and len(hgt_f) >= 5:
        ts_grad = np.zeros(12) * np.NaN
        for i in range(12):
            loc_tmp_mth = loc_tmp[i, ...][isok].flatten()
            slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth)
            ts_grad[i] = slope if (p_val < 0.01) else np.NaN
        # convert to a timeseries and hydrological years
        ts_grad = ts_grad.tolist()
        ts_grad = ts_grad[em:] + ts_grad[0:em]
        ts_grad = np.asarray(ts_grad * ny)

    # maybe this will throw out of bounds warnings
    nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
    nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

    # compute monthly anomalies
    # of temp
    ts_tmp = nc_ts_tmp.get_vardata('tmp', as_xarray=True)
    ts_tmp_avg = ts_tmp.sel(time=slice('1961-01-01', '1990-12-01'))
    ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time')
    ts_tmp = ts_tmp.groupby('time.month') - ts_tmp_avg
    # of precip
    ts_pre = nc_ts_pre.get_vardata('pre', as_xarray=True)
    ts_pre_avg = ts_pre.sel(time=slice('1961-01-01', '1990-12-01'))
    ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time')
    ts_pre_ano = ts_pre.groupby('time.month') - ts_pre_avg
    # scaled anomalies is the default. Standard anomalies above
    # are used later for where ts_pre_avg == 0
    ts_pre = ts_pre.groupby('time.month') / ts_pre_avg

    # interpolate to HR grid
    if np.any(~np.isfinite(ts_tmp[:, 1, 1])):
        # Extreme case, middle pix is not valid
        # take any valid pix from the 3*3 (and hope there's one)
        found_it = False
        for idi in range(2):
            for idj in range(2):
                if np.all(np.isfinite(ts_tmp[:, idj, idi])):
                    ts_tmp[:, 1, 1] = ts_tmp[:, idj, idi]
                    ts_pre[:, 1, 1] = ts_pre[:, idj, idi]
                    ts_pre_ano[:, 1, 1] = ts_pre_ano[:, idj, idi]
                    found_it = True
        if not found_it:
            msg = '({}) there is no climate data'.format(gdir.rgi_id)
            raise MassBalanceCalibrationError(msg)
    elif np.any(~np.isfinite(ts_tmp)):
        # maybe the side is nan, but we can do nearest
        ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid,
                                              interp='nearest')
        ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid,
                                              interp='nearest')
        ts_pre_ano = ncclim.grid.map_gridded_data(ts_pre_ano.values,
                                                  nc_ts_pre.grid,
                                                  interp='nearest')
    else:
        # We can do bilinear
        ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid,
                                              interp='linear')
        ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid,
                                              interp='linear')
        ts_pre_ano = ncclim.grid.map_gridded_data(ts_pre_ano.values,
                                                  nc_ts_pre.grid,
                                                  interp='linear')

    # take the center pixel and add it to the CRU CL clim
    # for temp
    loc_tmp = xr.DataArray(loc_tmp[:, 1, 1], dims=['month'],
                           coords={'month': ts_tmp_avg.month})
    ts_tmp = xr.DataArray(ts_tmp[:, 1, 1], dims=['time'],
                          coords={'time': time})
    ts_tmp = ts_tmp.groupby('time.month') + loc_tmp
    # for prcp
    loc_pre = xr.DataArray(loc_pre[:, 1, 1], dims=['month'],
                           coords={'month': ts_pre_avg.month})
    ts_pre = xr.DataArray(ts_pre[:, 1, 1], dims=['time'],
                          coords={'time': time})
    ts_pre_ano = xr.DataArray(ts_pre_ano[:, 1, 1], dims=['time'],
                              coords={'time': time})
    # scaled anomalies
    ts_pre = ts_pre.groupby('time.month') * loc_pre
    # standard anomalies
    ts_pre_ano = ts_pre_ano.groupby('time.month') + loc_pre
    # Correct infinite values with standard anomalies
    ts_pre.values = np.where(np.isfinite(ts_pre.values),
                             ts_pre.values,
                             ts_pre_ano.values)
    # The last step might create negative values (unlikely). Clip them
    ts_pre.values = utils.clip_min(ts_pre.values, 0)

    # done
    loc_hgt = loc_hgt[1, 1]
    loc_lon = loc_lon[1]
    loc_lat = loc_lat[1]
    assert np.isfinite(loc_hgt)
    assert np.all(np.isfinite(ts_pre.values))
    assert np.all(np.isfinite(ts_tmp.values))

    gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values,
                                    loc_hgt, loc_lon, loc_lat,
                                    filesuffix=output_filesuffix,
                                    gradient=ts_grad,
                                    source=nc_ts_tmp._nc.title[:10])

    ncclim._nc.close()
    nc_ts_tmp._nc.close()
    nc_ts_pre._nc.close()
Ejemplo n.º 27
0
def init_glacier_directories(rgidf=None, *, reset=False, force=False,
                             from_prepro_level=None, prepro_border=None,
                             prepro_rgi_version=None, prepro_base_url=None,
                             from_tar=False, delete_tar=False,
                             use_demo_glaciers=None):
    """Initializes the list of Glacier Directories for this run.

    This is the very first task to do (always). If the directories are already
    available in the working directory, use them. If not, create new ones.

    Parameters
    ----------
    rgidf : GeoDataFrame or list of ids, optional for pre-computed runs
        the RGI glacier outlines. If unavailable, OGGM will parse the
        information from the glacier directories found in the working
        directory. It is required for new runs.
    reset : bool
        delete the existing glacier directories if found.
    force : bool
        setting `reset=True` will trigger a yes/no question to the user. Set
        `force=True` to avoid this.
    from_prepro_level : int
        get the gdir data from the official pre-processed pool. See the
        documentation for more information
    prepro_border : int
        for `from_prepro_level` only: if you want to override the default
        behavior which is to use `cfg.PARAMS['border']`
    prepro_rgi_version : str
        for `from_prepro_level` only: if you want to override the default
        behavior which is to use `cfg.PARAMS['rgi_version']`
    prepro_base_url : str
        for `from_prepro_level` only: if you want to override the default
        URL from which to download the gdirs. Default currently is
        https://cluster.klima.uni-bremen.de/~fmaussion/gdirs/oggm_v1.1/
    use_demo_glaciers : bool
        whether to check the demo glaciers for download (faster than the
        standard prepro downloads). The default is to decide whether or
        not to check based on simple criteria such as glacier list size.
    from_tar : bool, default=False
        extract the gdir data from a tar file. If set to `True`,
        will check for a tar file at the expected location in `base_dir`.
    delete_tar : bool, default=False
        delete the original tar file after extraction.

    Returns
    -------
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
        the initialised glacier directories

    Notes
    -----
    This task is very similar to init_glacier_regions, with one main
    difference: it does not process the DEMs for this glacier.
    Eventually, init_glacier_regions will be deprecated and removed from the
    codebase.
    """

    _check_duplicates(rgidf)

    if reset and not force:
        reset = utils.query_yes_no('Delete all glacier directories?')

    if from_prepro_level:
        url = utils.get_prepro_base_url(base_url=prepro_base_url,
                                        border=prepro_border,
                                        prepro_level=from_prepro_level,
                                        rgi_version=prepro_rgi_version)
        if cfg.PARAMS['has_internet'] and not utils.url_exists(url):
            raise InvalidParamsError("base url seems unreachable with these "
                                     "parameters: {}".format(url))

    # if reset delete also the log directory
    if reset:
        fpath = os.path.join(cfg.PATHS['working_dir'], 'log')
        if os.path.exists(fpath):
            rmtree(fpath)

    if rgidf is None:
        # Infer the glacier directories from folders available in working dir
        if reset:
            raise ValueError('Cannot use reset without setting rgidf')
        log.workflow('init_glacier_directories by parsing all available '
                     'folders (this takes time: if possible, provide rgidf '
                     'instead).')
        # The dirs should be there already
        gl_dir = os.path.join(cfg.PATHS['working_dir'], 'per_glacier')
        gdirs = []
        for root, _, files in os.walk(gl_dir):
            if files and ('outlines.shp' in files or
                          'outlines.tar.gz' in files):
                gdirs.append(oggm.GlacierDirectory(os.path.basename(root)))
    else:
        # Create glacier directories from input
        # Check if dataframe or list of str
        try:
            entities = []
            for _, entity in rgidf.iterrows():
                entities.append(entity)
        except AttributeError:
            entities = utils.tolist(rgidf)

        # Check demo
        if use_demo_glaciers is None:
            use_demo_glaciers = len(entities) < 100

        if from_prepro_level is not None:
            log.workflow('init_glacier_directories from prepro level {} on '
                         '{} glaciers.'.format(from_prepro_level,
                                               len(entities)))
            # Read the hash dictionary before we use multiproc
            if cfg.PARAMS['dl_verify']:
                utils.get_dl_verify_data('cluster.klima.uni-bremen.de')
            gdirs = execute_entity_task(gdir_from_prepro, entities,
                                        from_prepro_level=from_prepro_level,
                                        prepro_border=prepro_border,
                                        prepro_rgi_version=prepro_rgi_version,
                                        check_demo_glacier=use_demo_glaciers,
                                        base_url=prepro_base_url)
        else:
            # We can set the intersects file automatically here
            if (cfg.PARAMS['use_intersects'] and
                    len(cfg.PARAMS['intersects_gdf']) == 0):
                try:
                    rgi_ids = np.unique(np.sort([entity.RGIId for entity in
                                                 entities]))
                    rgi_version = rgi_ids[0].split('-')[0][-2:]
                    fp = utils.get_rgi_intersects_entities(rgi_ids,
                                                           version=rgi_version)
                    cfg.set_intersects_db(fp)
                except AttributeError:
                    # List of str
                    pass

            gdirs = execute_entity_task(utils.GlacierDirectory, entities,
                                        reset=reset,
                                        from_tar=from_tar,
                                        delete_tar=delete_tar)

    return gdirs
Ejemplo n.º 28
0
def run_prepro_levels(rgi_version=None,
                      rgi_reg=None,
                      border=None,
                      output_folder='',
                      working_dir='',
                      dem_source='',
                      is_test=False,
                      test_ids=None,
                      demo=False,
                      test_rgidf=None,
                      test_intersects_file=None,
                      test_topofile=None,
                      disable_mp=False,
                      params_file=None,
                      elev_bands=False,
                      match_regional_geodetic_mb=False,
                      match_geodetic_mb_per_glacier=False,
                      evolution_model='fl_sia',
                      centerlines_only=False,
                      override_params=None,
                      add_consensus=False,
                      start_level=None,
                      start_base_url=None,
                      max_level=5,
                      ref_tstars_base_url='',
                      logging_level='WORKFLOW',
                      disable_dl_verify=False,
                      dynamic_spinup=False,
                      continue_on_error=True):
    """Generate the preprocessed OGGM glacier directories for this OGGM version

    Parameters
    ----------
    rgi_version : str
        the RGI version to use (defaults to cfg.PARAMS)
    rgi_reg : str
        the RGI region to process
    border : int
        the number of pixels at the maps border
    output_folder : str
        path to the output folder (where to put the preprocessed tar files)
    dem_source : str
        which DEM source to use: default, SOURCE_NAME or ALL
    working_dir : str
        path to the OGGM working directory
    ref_tstars_base_url : str
        url where to find the pre-calibrated reference tstar list.
        Required as of v1.4.
    params_file : str
        path to the OGGM parameter file (to override defaults)
    is_test : bool
        to test on a couple of glaciers only!
    test_ids : list
        if is_test: list of ids to process
    demo : bool
        to run the prepro for the list of demo glaciers
    test_rgidf : shapefile
        for testing purposes only
    test_intersects_file : shapefile
        for testing purposes only
    test_topofile : str
        for testing purposes only
    test_crudir : str
        for testing purposes only
    disable_mp : bool
        disable multiprocessing
    elev_bands : bool
        compute all flowlines based on the Huss&Hock 2015 method instead
        of the OGGM default, which is a mix of elev_bands and centerlines.
    centerlines_only : bool
        compute all flowlines based on the OGGM centerline(s) method instead
        of the OGGM default, which is a mix of elev_bands and centerlines.
    match_regional_geodetic_mb : str
        match the regional mass-balance estimates at the regional level
        ('hugonnet': Hugonnet et al., 2020 or 'zemp': Zemp et al., 2019).
    match_geodetic_mb_per_glacier : str
        match the mass-balance estimates at the glacier level
        (currently only 'hugonnet': Hugonnet et al., 2020).
    evolution_model : str
        which geometry evolution model to use: `fl_sia` (default),
        or `massredis` (mass redistribution curve).
    add_consensus : bool
        adds (reprojects) the consensus estimates thickness to the glacier
        directories. With elev_bands=True, the data will also be binned.
    start_level : int
        the pre-processed level to start from (default is to start from
        scratch). If set, you'll need to indicate start_base_url as well.
    start_base_url : str
        the pre-processed base-url to fetch the data from.
    max_level : int
        the maximum pre-processing level before stopping
    logging_level : str
        the logging level to use (DEBUG, INFO, WARNING, WORKFLOW)
    override_params : dict
        a dict of parameters to override.
    disable_dl_verify : bool
        disable the hash verification of OGGM downloads
    dynamic_spinup: str
        include a dynamic spinup matching 'area' OR 'volume' at the RGI-date
    """

    # Input check
    if max_level not in [1, 2, 3, 4, 5]:
        raise InvalidParamsError('max_level should be one of [1, 2, 3, 4, 5]')

    if start_level is not None:
        if start_level not in [0, 1, 2]:
            raise InvalidParamsError('start_level should be one of [0, 1, 2]')
        if start_level > 0 and start_base_url is None:
            raise InvalidParamsError('With start_level, please also indicate '
                                     'start_base_url')
    else:
        start_level = 0

    if match_regional_geodetic_mb and match_geodetic_mb_per_glacier:
        raise InvalidParamsError(
            'match_regional_geodetic_mb incompatible with '
            'match_geodetic_mb_per_glacier!')

    if match_geodetic_mb_per_glacier and match_geodetic_mb_per_glacier != 'hugonnet':
        raise InvalidParamsError('Currently only `hugonnet` is available for '
                                 'match_geodetic_mb_per_glacier.')

    if evolution_model not in ['fl_sia', 'massredis']:
        raise InvalidParamsError('evolution_model should be one of '
                                 "['fl_sia', 'massredis'].")

    if dynamic_spinup and dynamic_spinup not in ['area', 'volume']:
        raise InvalidParamsError(f"Dynamic spinup option '{dynamic_spinup}' "
                                 "not supported")

    if dynamic_spinup and evolution_model == 'massredis':
        raise InvalidParamsError("Dynamic spinup is not working/tested"
                                 "with massredis!")

    # Time
    start = time.time()

    def _time_log():
        # Log util
        m, s = divmod(time.time() - start, 60)
        h, m = divmod(m, 60)
        log.workflow('OGGM prepro_levels is done! Time needed: '
                     '{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)))

    # Local paths
    if override_params is None:
        override_params = {}

    utils.mkdir(working_dir)
    override_params['working_dir'] = working_dir

    # Initialize OGGM and set up the run parameters
    cfg.initialize(file=params_file,
                   params=override_params,
                   logging_level=logging_level,
                   future=True)

    if match_geodetic_mb_per_glacier and (cfg.PARAMS['hydro_month_nh'] != 1 or
                                          cfg.PARAMS['hydro_month_sh'] != 1):
        raise InvalidParamsError('We recommend to set hydro_month_nh and sh '
                                 'to 1 for the geodetic MB calibration per '
                                 'glacier.')

    # Use multiprocessing?
    cfg.PARAMS['use_multiprocessing'] = not disable_mp

    # How many grid points around the glacier?
    # Make it large if you expect your glaciers to grow large
    cfg.PARAMS['border'] = border

    # Set to True for operational runs
    cfg.PARAMS['continue_on_error'] = continue_on_error

    # Check for the integrity of the files OGGM downloads at run time
    # For large files (e.g. using a 1 tif DEM like ALASKA) calculating the hash
    # takes a long time, so deactivating this can make sense
    cfg.PARAMS['dl_verify'] = not disable_dl_verify

    # Other things that make sense
    cfg.PARAMS['store_model_geometry'] = True

    # Log the parameters
    msg = '# OGGM Run parameters:'
    for k, v in cfg.PARAMS.items():
        if type(v) in [pd.DataFrame, dict]:
            continue
        msg += '\n    {}: {}'.format(k, v)
    log.workflow(msg)

    if rgi_version is None:
        rgi_version = cfg.PARAMS['rgi_version']
    output_base_dir = os.path.join(output_folder, 'RGI{}'.format(rgi_version),
                                   'b_{:03d}'.format(border))

    # Add a package version file
    utils.mkdir(output_base_dir)
    opath = os.path.join(output_base_dir, 'package_versions.txt')
    with open(opath, 'w') as vfile:
        vfile.write(utils.show_versions(logger=log))

    if demo:
        rgidf = utils.get_rgi_glacier_entities(cfg.DATA['demo_glaciers'].index)
    elif test_rgidf is None:
        # Get the RGI file
        rgidf = gpd.read_file(
            utils.get_rgi_region_file(rgi_reg, version=rgi_version))
        # We use intersects
        rgif = utils.get_rgi_intersects_region_file(rgi_reg,
                                                    version=rgi_version)
        cfg.set_intersects_db(rgif)

        # Some RGI input quality checks - this is based on visual checks
        # of large glaciers in the RGI
        ids_to_ice_cap = [
            'RGI60-05.10315',  # huge Greenland ice cap
            'RGI60-03.01466',  # strange thing next to Devon
            'RGI60-09.00918',  # Academy of sciences Ice cap
            'RGI60-09.00969',
            'RGI60-09.00958',
            'RGI60-09.00957',
        ]
        rgidf.loc[rgidf.RGIId.isin(ids_to_ice_cap), 'Form'] = '1'

        # In AA almost all large ice bodies are actually ice caps
        if rgi_reg == '19':
            rgidf.loc[rgidf.Area > 100, 'Form'] = '1'

        # For greenland we omit connectivity level 2
        if rgi_reg == '05':
            rgidf = rgidf.loc[rgidf['Connect'] != 2]
    else:
        rgidf = test_rgidf
        cfg.set_intersects_db(test_intersects_file)

    if is_test:
        if test_ids is not None:
            rgidf = rgidf.loc[rgidf.RGIId.isin(test_ids)]
        else:
            rgidf = rgidf.sample(4)

        if max_level > 2:
            # Also use ref tstars
            utils.apply_test_ref_tstars()

    if max_level > 2 and ref_tstars_base_url:
        workflow.download_ref_tstars(base_url=ref_tstars_base_url)

    log.workflow('Starting prepro run for RGI reg: {} '
                 'and border: {}'.format(rgi_reg, border))
    log.workflow('Number of glaciers: {}'.format(len(rgidf)))

    # L0 - go
    if start_level == 0:
        gdirs = workflow.init_glacier_directories(rgidf,
                                                  reset=True,
                                                  force=True)

        # Glacier stats
        sum_dir = os.path.join(output_base_dir, 'L0', 'summary')
        utils.mkdir(sum_dir)
        opath = os.path.join(sum_dir,
                             'glacier_statistics_{}.csv'.format(rgi_reg))
        utils.compile_glacier_statistics(gdirs, path=opath)

        # L0 OK - compress all in output directory
        log.workflow('L0 done. Writing to tar...')
        level_base_dir = os.path.join(output_base_dir, 'L0')
        workflow.execute_entity_task(utils.gdir_to_tar,
                                     gdirs,
                                     delete=False,
                                     base_dir=level_base_dir)
        utils.base_dir_to_tar(level_base_dir)
        if max_level == 0:
            _time_log()
            return
    else:
        gdirs = workflow.init_glacier_directories(
            rgidf,
            reset=True,
            force=True,
            from_prepro_level=start_level,
            prepro_border=border,
            prepro_rgi_version=rgi_version,
            prepro_base_url=start_base_url)

    # L1 - Add dem files
    if start_level == 0:
        if test_topofile:
            cfg.PATHS['dem_file'] = test_topofile

        # Which DEM source?
        if dem_source.upper() == 'ALL':
            # This is the complex one, just do the job and leave
            log.workflow('Running prepro on ALL sources')
            for i, s in enumerate(utils.DEM_SOURCES):
                rs = i == 0
                log.workflow('Running prepro on sources: {}'.format(s))
                gdirs = workflow.init_glacier_directories(rgidf,
                                                          reset=rs,
                                                          force=rs)
                workflow.execute_entity_task(tasks.define_glacier_region,
                                             gdirs,
                                             source=s)
                workflow.execute_entity_task(_rename_dem_folder,
                                             gdirs,
                                             source=s)

            # make a GeoTiff mask of the glacier, choose any source
            workflow.execute_entity_task(gis.rasterio_glacier_mask,
                                         gdirs,
                                         source='ALL')

            # Compress all in output directory
            level_base_dir = os.path.join(output_base_dir, 'L1')
            workflow.execute_entity_task(utils.gdir_to_tar,
                                         gdirs,
                                         delete=False,
                                         base_dir=level_base_dir)
            utils.base_dir_to_tar(level_base_dir)

            _time_log()
            return

        # Force a given source
        source = dem_source.upper() if dem_source else None

        # L1 - go
        workflow.execute_entity_task(tasks.define_glacier_region,
                                     gdirs,
                                     source=source)

        # Glacier stats
        sum_dir = os.path.join(output_base_dir, 'L1', 'summary')
        utils.mkdir(sum_dir)
        opath = os.path.join(sum_dir,
                             'glacier_statistics_{}.csv'.format(rgi_reg))
        utils.compile_glacier_statistics(gdirs, path=opath)

        # L1 OK - compress all in output directory
        log.workflow('L1 done. Writing to tar...')
        level_base_dir = os.path.join(output_base_dir, 'L1')
        workflow.execute_entity_task(utils.gdir_to_tar,
                                     gdirs,
                                     delete=False,
                                     base_dir=level_base_dir)
        utils.base_dir_to_tar(level_base_dir)
        if max_level == 1:
            _time_log()
            return

    # L2 - Tasks
    if start_level <= 1:
        # Check which glaciers will be processed as what
        if elev_bands:
            gdirs_band = gdirs
            gdirs_cent = []
        elif centerlines_only:
            gdirs_band = []
            gdirs_cent = gdirs
        else:
            # Default is to centerlines_only, but it used to be a mix
            # (e.g. bands for ice caps, etc)
            # I still keep this logic here in case we want to mix again
            gdirs_band = []
            gdirs_cent = gdirs

        log.workflow('Start flowline processing with: '
                     'N centerline type: {}, '
                     'N elev bands type: {}.'
                     ''.format(len(gdirs_cent), len(gdirs_band)))

        # HH2015 method
        workflow.execute_entity_task(tasks.simple_glacier_masks, gdirs_band)

        # Centerlines OGGM
        workflow.execute_entity_task(tasks.glacier_masks, gdirs_cent)

        if add_consensus:
            from oggm.shop.bedtopo import add_consensus_thickness
            workflow.execute_entity_task(add_consensus_thickness, gdirs_band)
            workflow.execute_entity_task(add_consensus_thickness, gdirs_cent)

            # Elev bands with var data
            vn = 'consensus_ice_thickness'
            workflow.execute_entity_task(tasks.elevation_band_flowline,
                                         gdirs_band,
                                         bin_variables=vn)
            workflow.execute_entity_task(
                tasks.fixed_dx_elevation_band_flowline,
                gdirs_band,
                bin_variables=vn)
        else:
            # HH2015 method without it
            task_list = [
                tasks.elevation_band_flowline,
                tasks.fixed_dx_elevation_band_flowline,
            ]
            for task in task_list:
                workflow.execute_entity_task(task, gdirs_band)

        # Centerlines OGGM
        task_list = [
            tasks.compute_centerlines,
            tasks.initialize_flowlines,
            tasks.catchment_area,
            tasks.catchment_intersections,
            tasks.catchment_width_geom,
            tasks.catchment_width_correction,
        ]
        for task in task_list:
            workflow.execute_entity_task(task, gdirs_cent)

        # Same for all glaciers
        if border >= 20:
            task_list = [
                tasks.compute_downstream_line,
                tasks.compute_downstream_bedshape,
            ]
            for task in task_list:
                workflow.execute_entity_task(task, gdirs)
        else:
            log.workflow('L2: for map border values < 20, wont compute '
                         'downstream lines.')

        # Glacier stats
        sum_dir = os.path.join(output_base_dir, 'L2', 'summary')
        utils.mkdir(sum_dir)
        opath = os.path.join(sum_dir,
                             'glacier_statistics_{}.csv'.format(rgi_reg))
        utils.compile_glacier_statistics(gdirs, path=opath)

        # And for level 2: shapes
        if len(gdirs_cent) > 0:
            opath = os.path.join(sum_dir, 'centerlines_{}.shp'.format(rgi_reg))
            utils.write_centerlines_to_shape(gdirs_cent,
                                             to_tar=True,
                                             path=opath)

        # L2 OK - compress all in output directory
        log.workflow('L2 done. Writing to tar...')
        level_base_dir = os.path.join(output_base_dir, 'L2')
        workflow.execute_entity_task(utils.gdir_to_tar,
                                     gdirs,
                                     delete=False,
                                     base_dir=level_base_dir)
        utils.base_dir_to_tar(level_base_dir)
        if max_level == 2:
            _time_log()
            return

    # L3 - Tasks
    sum_dir = os.path.join(output_base_dir, 'L3', 'summary')
    utils.mkdir(sum_dir)

    # Climate
    workflow.execute_entity_task(tasks.process_climate_data, gdirs)

    if cfg.PARAMS['climate_qc_months'] > 0:
        workflow.execute_entity_task(tasks.historical_climate_qc, gdirs)

    if match_geodetic_mb_per_glacier:
        utils.get_geodetic_mb_dataframe()  # Small optim to avoid concurrency
        workflow.execute_entity_task(
            tasks.mu_star_calibration_from_geodetic_mb, gdirs)
        workflow.execute_entity_task(tasks.apparent_mb_from_any_mb, gdirs)
    else:
        workflow.execute_entity_task(tasks.local_t_star, gdirs)
        workflow.execute_entity_task(tasks.mu_star_calibration, gdirs)

    # Inversion: we match the consensus
    filter = border >= 20
    workflow.calibrate_inversion_from_consensus(gdirs,
                                                apply_fs_on_mismatch=True,
                                                error_on_mismatch=False,
                                                filter_inversion_output=filter)

    # Do we want to match geodetic estimates?
    # This affects only the bias so we can actually do this *after*
    # the inversion, but we really want to take calving into account here
    if match_regional_geodetic_mb:
        opath = os.path.join(
            sum_dir, 'fixed_geometry_mass_balance_'
            'before_match_{}.csv'.format(rgi_reg))
        utils.compile_fixed_geometry_mass_balance(gdirs, path=opath)
        workflow.match_regional_geodetic_mb(gdirs,
                                            rgi_reg=rgi_reg,
                                            dataset=match_regional_geodetic_mb)

    # We get ready for modelling
    if border >= 20:
        workflow.execute_entity_task(tasks.init_present_time_glacier, gdirs)
    else:
        log.workflow(
            'L3: for map border values < 20, wont initialize glaciers '
            'for the run.')
    # Glacier stats
    opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg))
    utils.compile_glacier_statistics(gdirs, path=opath)
    opath = os.path.join(sum_dir, 'climate_statistics_{}.csv'.format(rgi_reg))
    utils.compile_climate_statistics(gdirs, path=opath)
    opath = os.path.join(sum_dir,
                         'fixed_geometry_mass_balance_{}.csv'.format(rgi_reg))
    utils.compile_fixed_geometry_mass_balance(gdirs, path=opath)

    # L3 OK - compress all in output directory
    log.workflow('L3 done. Writing to tar...')
    level_base_dir = os.path.join(output_base_dir, 'L3')
    workflow.execute_entity_task(utils.gdir_to_tar,
                                 gdirs,
                                 delete=False,
                                 base_dir=level_base_dir)
    utils.base_dir_to_tar(level_base_dir)
    if max_level == 3:
        _time_log()
        return
    if border < 20:
        log.workflow('L3: for map border values < 20, wont compute L4 and L5.')
        _time_log()
        return

    # L4 - No tasks: add some stats for consistency and make the dirs small
    sum_dir_L3 = sum_dir
    sum_dir = os.path.join(output_base_dir, 'L4', 'summary')
    utils.mkdir(sum_dir)

    # Copy L3 files for consistency
    for bn in [
            'glacier_statistics', 'climate_statistics',
            'fixed_geometry_mass_balance'
    ]:
        ipath = os.path.join(sum_dir_L3, bn + '_{}.csv'.format(rgi_reg))
        opath = os.path.join(sum_dir, bn + '_{}.csv'.format(rgi_reg))
        shutil.copyfile(ipath, opath)

    # Copy mini data to new dir
    mini_base_dir = os.path.join(working_dir, 'mini_perglacier',
                                 'RGI{}'.format(rgi_version),
                                 'b_{:03d}'.format(border))
    mini_gdirs = workflow.execute_entity_task(tasks.copy_to_basedir,
                                              gdirs,
                                              base_dir=mini_base_dir)

    # L4 OK - compress all in output directory
    log.workflow('L4 done. Writing to tar...')
    level_base_dir = os.path.join(output_base_dir, 'L4')
    workflow.execute_entity_task(utils.gdir_to_tar,
                                 mini_gdirs,
                                 delete=False,
                                 base_dir=level_base_dir)
    utils.base_dir_to_tar(level_base_dir)
    if max_level == 4:
        _time_log()
        return

    # L5 - spinup run in mini gdirs
    gdirs = mini_gdirs

    # Get end date. The first gdir might have blown up, try some others
    i = 0
    while True:
        if i >= len(gdirs):
            raise RuntimeError('Found no valid glaciers!')
        try:
            y0 = gdirs[i].get_climate_info()['baseline_hydro_yr_0']
            # One adds 1 because the run ends at the end of the year
            ye = gdirs[i].get_climate_info()['baseline_hydro_yr_1'] + 1
            break
        except BaseException:
            i += 1

    # Which model?
    if evolution_model == 'massredis':
        from oggm.core.flowline import MassRedistributionCurveModel
        evolution_model = MassRedistributionCurveModel
    else:
        from oggm.core.flowline import FluxBasedModel
        evolution_model = FluxBasedModel

    # OK - run
    if dynamic_spinup:
        workflow.execute_entity_task(
            tasks.run_dynamic_spinup,
            gdirs,
            evolution_model=evolution_model,
            minimise_for=dynamic_spinup,
            precision_percent=1,
            output_filesuffix='_dynamic_spinup',
        )
        workflow.execute_entity_task(tasks.run_from_climate_data,
                                     gdirs,
                                     min_ys=y0,
                                     ye=ye,
                                     evolution_model=evolution_model,
                                     init_model_filesuffix='_dynamic_spinup',
                                     output_filesuffix='_hist_spin')
        workflow.execute_entity_task(tasks.merge_consecutive_run_outputs,
                                     gdirs,
                                     input_filesuffix_1='_dynamic_spinup',
                                     input_filesuffix_2='_hist_spin',
                                     output_filesuffix='_historical_spinup',
                                     delete_input=True)

    workflow.execute_entity_task(tasks.run_from_climate_data,
                                 gdirs,
                                 min_ys=y0,
                                 ye=ye,
                                 evolution_model=evolution_model,
                                 output_filesuffix='_historical')

    # Now compile the output
    sum_dir = os.path.join(output_base_dir, 'L5', 'summary')
    utils.mkdir(sum_dir)
    opath = os.path.join(sum_dir, f'historical_run_output_{rgi_reg}.nc')
    utils.compile_run_output(gdirs, path=opath, input_filesuffix='_historical')

    if dynamic_spinup:
        opath = os.path.join(sum_dir,
                             f'historical_spinup_run_output_{rgi_reg}.nc')
        utils.compile_run_output(gdirs,
                                 path=opath,
                                 input_filesuffix='_historical_spinup')

    # Glacier statistics we recompute here for error analysis
    opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg))
    utils.compile_glacier_statistics(gdirs, path=opath)

    # Other stats for consistency
    for bn in ['climate_statistics', 'fixed_geometry_mass_balance']:
        ipath = os.path.join(sum_dir_L3, bn + '_{}.csv'.format(rgi_reg))
        opath = os.path.join(sum_dir, bn + '_{}.csv'.format(rgi_reg))
        shutil.copyfile(ipath, opath)

    # Add the extended files
    pf = os.path.join(sum_dir, 'historical_run_output_{}.nc'.format(rgi_reg))
    mf = os.path.join(sum_dir,
                      'fixed_geometry_mass_balance_{}.csv'.format(rgi_reg))
    # This is crucial - extending calving only possible with L3 data!!!
    sf = os.path.join(sum_dir_L3, 'glacier_statistics_{}.csv'.format(rgi_reg))
    opath = os.path.join(
        sum_dir, 'historical_run_output_extended_{}.nc'.format(rgi_reg))
    utils.extend_past_climate_run(past_run_file=pf,
                                  fixed_geometry_mb_file=mf,
                                  glacier_statistics_file=sf,
                                  path=opath)

    # L5 OK - compress all in output directory
    log.workflow('L5 done. Writing to tar...')
    level_base_dir = os.path.join(output_base_dir, 'L5')
    workflow.execute_entity_task(utils.gdir_to_tar,
                                 gdirs,
                                 delete=False,
                                 base_dir=level_base_dir)
    utils.base_dir_to_tar(level_base_dir)

    _time_log()
Ejemplo n.º 29
0
    def _reproject_and_write_shapefile(self, entity, filesuffix=''):

        # Make a local glacier map
        params = dict(name='tmerc',
                      lat_0=0.,
                      lon_0=self.cenlon,
                      k=0.9996,
                      x_0=0,
                      y_0=0,
                      datum='WGS84')
        proj4_str = "+proj={name} +lat_0={lat_0} +lon_0={lon_0} +k={k} " \
                    "+x_0={x_0} +y_0={y_0} +datum={datum}".format(**params)

        # Reproject
        proj_in = pyproj.Proj("epsg:4326", preserve_units=True)
        proj_out = pyproj.Proj(proj4_str, preserve_units=True)

        # transform geometry to map
        project = partial(transform_proj, proj_in, proj_out)
        geometry = shp_trafo(project, entity['geometry'])
        geometry = multipolygon_to_polygon(geometry, gdir=self)

        # Save transformed geometry to disk
        entity = entity.copy()
        entity['geometry'] = geometry

        # Do we want to use the RGI area or ours?
        if not cfg.PARAMS['use_rgi_area']:
            # Update Area
            area = geometry.area * 1e-6
            entity['Area'] = area

        # Avoid fiona bug: https://github.com/Toblerity/Fiona/issues/365
        for k, s in entity.iteritems():
            if type(s) in [np.int32, np.int64]:
                entity[k] = int(s)
        towrite = gpd.GeoDataFrame(entity).T
        towrite.crs = proj4_str

        # Write shapefile
        self.write_shapefile(towrite, 'outlines', filesuffix=filesuffix)

        # Also transform the intersects if necessary
        gdf = cfg.PARAMS['intersects_gdf']
        if len(gdf) > 0:
            gdf = gdf.loc[((gdf.RGIId_1 == self.rgi_id) |
                           (gdf.RGIId_2 == self.rgi_id))]
            if len(gdf) > 0:
                gdf = salem.transform_geopandas(gdf, to_crs=proj_out)
                if hasattr(gdf.crs, 'srs'):
                    # salem uses pyproj
                    gdf.crs = gdf.crs.srs
                self.write_shapefile(gdf, 'intersects')
        else:
            # Sanity check
            if cfg.PARAMS['use_intersects']:
                raise InvalidParamsError(
                    'You seem to have forgotten to set the '
                    'intersects file for this run. OGGM '
                    'works better with such a file. If you '
                    'know what your are doing, set '
                    "cfg.PARAMS['use_intersects'] = False to "
                    "suppress this error.")
Ejemplo n.º 30
0
def process_era5_daily_data(gdir,
                            y0=None,
                            y1=None,
                            output_filesuffix='_daily',
                            cluster=False):
    """Processes and writes the era5 daily baseline climate data for a glacier.
    into climate_historical_daily.nc

    Extracts the nearest timeseries and writes everything to a NetCDF file.
    This uses only the ERA5 daily temperatures. The precipitation, lapse
    rate and standard deviations are used from ERA5dr.

    TODO: see _verified_download_helper no known hash for
    era5_daily_t2m_1979-2018_flat.nc and era5_glacier_invariant_flat
    ----------
    y0 : int
        the starting year of the timeseries to write. The default is to take
        the entire time period available in the file, but with this kwarg
        you can shorten it (to save space or to crop bad data)
    y1 : int
        the starting year of the timeseries to write. The default is to take
        the entire time period available in the file, but with this kwarg
        you can shorten it (to save space or to crop bad data)
    output_filesuffix : str
        this add a suffix to the output file (useful to avoid overwriting
        previous experiments)
    cluster : bool
        default is False, if this is run on the cluster, set it to True,
        because we do not need to download the files

    """

    # era5daily only for temperature
    dataset = 'ERA5_daily'
    # for the other variables use the data of ERA5dr
    dataset_othervars = 'ERA5dr'

    # get the central longitude/latidudes of the glacier
    lon = gdir.cenlon + 360 if gdir.cenlon < 0 else gdir.cenlon
    lat = gdir.cenlat

    cluster_path = '/home/www/oggm/climate/'

    if cluster:
        path = cluster_path + BASENAMES[dataset]['tmp']
    else:
        path = get_ecmwf_file(dataset, 'tmp')

    # Use xarray to read the data
    # would go faster with netCDF -.-
    with xr.open_dataset(path) as ds:
        assert ds.longitude.min() >= 0

        # set temporal subset for the ts data (hydro years)
        if gdir.hemisphere == 'nh':
            sm = cfg.PARAMS['hydro_month_nh']
        elif gdir.hemisphere == 'sh':
            sm = cfg.PARAMS['hydro_month_sh']

        em = sm - 1 if (sm > 1) else 12

        yrs = ds['time.year'].data
        y0 = yrs[0] if y0 is None else y0
        y1 = yrs[-1] if y1 is None else y1

        if y1 > 2018 or y0 < 1979:
            text = 'The climate files only go from 1979--2018,\
                choose another y0 and y1'

            raise InvalidParamsError(text)
        # if default settings: this is the last day in March or September
        time_f = '{}-{:02d}'.format(y1, em)
        end_day = int(ds.sel(time=time_f).time.dt.daysinmonth[-1].values)

        #  this was tested also for hydro_month = 1
        ds = ds.sel(time=slice('{}-{:02d}-01'.format(y0, sm),
                               '{}-{:02d}-{}'.format(y1, em, end_day)))

        try:
            # computing all the distances and choose the nearest gridpoint
            c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
            ds = ds.isel(points=c.argmin())
        # I turned this around
        except ValueError:
            ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
            # normally if I do the flattening, this here should not occur

        # temperature should be in degree Celsius for the glacier climate files
        temp = ds['t2m'].data - 273.15
        time = ds.time.data

        ref_lon = float(ds['longitude'])
        ref_lat = float(ds['latitude'])

        ref_lon = ref_lon - 360 if ref_lon > 180 else ref_lon

    # pre should be done as in ERA5dr datasets
    with xr.open_dataset(get_ecmwf_file(dataset_othervars, 'pre')) as ds:
        assert ds.longitude.min() >= 0

        yrs = ds['time.year'].data
        y0 = yrs[0] if y0 is None else y0
        y1 = yrs[-1] if y1 is None else y1
        # Attention here we take the same y0 and y1 as given from the
        # daily tmp dataset (goes till end of 2018)

        ds = ds.sel(time=slice('{}-{:02d}-01'.format(y0, sm),
                               '{}-{:02d}-01'.format(y1, em)))
        try:
            # prcp is not flattened, so this here should work normally
            ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
        except ValueError:
            # if Flattened ERA5_precipitation?
            c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
            ds = ds.isel(points=c.argmin())

        # the prcp dataset needs to be restructured to have values for each day
        prcp = ds['tp'].data * 1000
        # just assume that precipitation is every day the same:
        prcp = np.repeat(prcp, ds['time.daysinmonth'])
        # Attention the unit is now prcp per day
        # (not per month as in OGGM default:
        # prcp = ds['tp'].data * 1000 * ds['time.daysinmonth']

    if cluster:
        path_inv = cluster_path + BASENAMES[dataset]['inv']
    else:
        path_inv = get_ecmwf_file(dataset, 'inv')
    with xr.open_dataset(path_inv) as ds:
        assert ds.longitude.min() >= 0
        ds = ds.isel(time=0)
        try:
            # Flattened ERA5_invariant (only possibility at the moment)
            c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
            ds = ds.isel(points=c.argmin())
        except ValueError:
            # this should not occur
            ds = ds.sel(longitude=lon, latitude=lat, method='nearest')

        G = cfg.G  # 9.80665
        hgt = ds['z'].data / G

    gradient = None
    temp_std = None
    path_lapserates = get_ecmwf_file(dataset_othervars, 'lapserates')
    with xr.open_dataset(path_lapserates) as ds:
        assert ds.longitude.min() >= 0

        yrs = ds['time.year'].data
        y0 = yrs[0] if y0 is None else y0
        y1 = yrs[-1] if y1 is None else y1
        # Attention here we take the same y0 and y1 as given from the
        # daily tmp dataset (goes till end of 2018)

        ds = ds.sel(time=slice('{}-{:02d}-01'.format(y0, sm),
                               '{}-{:02d}-01'.format(y1, em)))

        # no flattening done for the ERA5dr gradient dataset
        ds = ds.sel(longitude=lon, latitude=lat, method='nearest')

        # get the monthly gradient values
        gradient = ds['lapserate'].data

        # gradient needs to be restructured to have values for each day
        gradient = np.repeat(gradient, ds['time.daysinmonth'])
        # assume same gradient for each day

    # OK, ready to write
    write_climate_file(gdir,
                       time,
                       prcp,
                       temp,
                       hgt,
                       ref_lon,
                       ref_lat,
                       filesuffix=output_filesuffix,
                       gradient=gradient,
                       temp_std=temp_std,
                       source=dataset,
                       file_name='climate_historical')