예제 #1
0
파일: climate.py 프로젝트: OGGM/oggm
def _get_ref_glaciers(gdirs):
    """Get the list of glaciers we have valid data for."""

    flink, mbdatadir = utils.get_wgms_files()
    dfids = pd.read_csv(flink)['RGI_ID'].values

    # TODO: we removed marine glaciers here. Is it ok?
    ref_gdirs = []
    for g in gdirs:
        if g.rgi_id not in dfids or g.terminus_type != 'Land-terminating':
            continue
        mbdf = g.get_ref_mb_data()
        if len(mbdf) >= 5:
            ref_gdirs.append(g)
    return ref_gdirs
예제 #2
0
def _get_ref_glaciers(gdirs):
    """Get the list of glaciers we have valid data for."""

    flink, _ = utils.get_wgms_files()
    dfids = pd.read_csv(flink)[gdirs[0].rgi_version + '_ID'].values

    # TODO: we removed marine glaciers here. Is it ok?
    ref_gdirs = []
    for g in gdirs:
        if g.rgi_id not in dfids or g.terminus_type != 'Land-terminating':
            continue
        mbdf = g.get_ref_mb_data()
        if len(mbdf) >= 5:
            ref_gdirs.append(g)
    return ref_gdirs
예제 #3
0
    def test_download_demo_files(self):

        f = utils.get_demo_file('Hintereisferner.shp')
        self.assertTrue(os.path.exists(f))

        sh = salem.read_shapefile(f)
        self.assertTrue(hasattr(sh, 'geometry'))

        # Data files
        cfg.initialize()

        lf, df = utils.get_wgms_files()
        self.assertTrue(os.path.exists(lf))

        lf = utils.get_glathida_file()
        self.assertTrue(os.path.exists(lf))
예제 #4
0
    def test_download_demo_files(self):

        f = utils.get_demo_file('Hintereisferner.shp')
        self.assertTrue(os.path.exists(f))

        sh = salem.read_shapefile(f)
        self.assertTrue(hasattr(sh, 'geometry'))

        # Data files
        cfg.initialize()

        lf, df = utils.get_wgms_files()
        self.assertTrue(os.path.exists(df))

        lf = utils.get_glathida_file()
        self.assertTrue(os.path.exists(lf))
예제 #5
0
파일: itmix.py 프로젝트: anton-ub/oggm
def get_rgi_df(reset=False):
    """This function prepares a kind of `fake` RGI file, with the updated
    geometries for ITMIX.
    """

    # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers
    RGI_DIR = utils.get_rgi_dir()

    df_rgi_file = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_shp.pkl')
    if os.path.exists(df_rgi_file) and not reset:
        rgidf = pd.read_pickle(df_rgi_file)
    else:
        linkf = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_links.pkl')
        df_itmix = pd.read_pickle(linkf)

        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)

        f = utils.get_glathida_file()
        gtd_df = pd.read_csv(f)

        divides = []
        rgidf = []
        _rgi_ids_for_overwrite = []
        for i, row in df_itmix.iterrows():

            log.info('Prepare RGI df for ' + row.name)

            # read the rgi region
            rgi_shp = find_path(RGI_DIR, row['rgi_reg'] + '_rgi50_*.shp')
            rgi_df = salem.read_shapefile(rgi_shp, cached=True)

            rgi_parts = row.T['rgi_parts_ids']
            sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy()

            # use the ITMIX shape where possible
            if row.name in [
                    'Hellstugubreen', 'Freya', 'Aqqutikitsoq', 'Brewster',
                    'Kesselwandferner', 'NorthGlacier', 'SouthGlacier',
                    'Tasman', 'Unteraar', 'Washmawapta', 'Columbia'
            ]:
                shf = find_path(SEARCHD, '*_' + row.name + '*.shp')
                shp = salem.read_shapefile(shf)
                if row.name == 'Unteraar':
                    shp = shp.iloc[[-1]]
                if 'LineString' == shp.iloc[0].geometry.type:
                    shp.loc[shp.index[0],
                            'geometry'] = shpg.Polygon(shp.iloc[0].geometry)
                if shp.iloc[0].geometry.type == 'MultiLineString':
                    # Columbia
                    geometry = shp.iloc[0].geometry
                    parts = list(geometry)
                    for p in parts:
                        assert p.type == 'LineString'
                    exterior = shpg.Polygon(parts[0])
                    # let's assume that all other polygons are in fact interiors
                    interiors = []
                    for p in parts[1:]:
                        assert exterior.contains(p)
                        interiors.append(p)
                    geometry = shpg.Polygon(parts[0], interiors)
                    assert 'Polygon' in geometry.type
                    shp.loc[shp.index[0], 'geometry'] = geometry

                assert len(shp) == 1
                area_km2 = shp.iloc[0].geometry.area * 1e-6
                shp = salem.gis.transform_geopandas(shp)
                shp = shp.iloc[0].geometry
                sel = sel.iloc[[0]]
                sel.loc[sel.index[0], 'geometry'] = shp
                sel.loc[sel.index[0], 'Area'] = area_km2
            elif row.name == 'Urumqi':
                # ITMIX Urumqi is in fact two glaciers
                shf = find_path(SEARCHD, '*_' + row.name + '*.shp')
                shp2 = salem.read_shapefile(shf)
                assert len(shp2) == 2
                for k in [0, 1]:
                    shp = shp2.iloc[[k]].copy()
                    area_km2 = shp.iloc[0].geometry.area * 1e-6
                    shp = salem.gis.transform_geopandas(shp)
                    shp = shp.iloc[0].geometry
                    assert sel.loc[sel.index[k],
                                   'geometry'].contains(shp.centroid)
                    sel.loc[sel.index[k], 'geometry'] = shp
                    sel.loc[sel.index[k], 'Area'] = area_km2
                assert len(sel) == 2
            elif len(rgi_parts) > 1:
                # Ice-caps. Make divides
                # First we gather all the parts:
                sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy()
                # Make the multipolygon for the record
                multi = shpg.MultiPolygon([g for g in sel.geometry])
                # update the RGI attributes. We take a dummy rgi ID
                new_area = np.sum(sel.Area)
                found = False
                for i in range(len(sel)):
                    tsel = sel.iloc[[i]].copy()
                    if 'Multi' in tsel.loc[tsel.index[0], 'geometry'].type:
                        continue
                    else:
                        found = True
                        sel = tsel
                        break
                if not found:
                    raise RuntimeError()

                inif = 0.
                add = 1e-5
                if row.name == 'Devon':
                    inif = 0.001
                    add = 1e-4
                while True:
                    buff = multi.buffer(inif)
                    if 'Multi' in buff.type:
                        inif += add
                    else:
                        break
                x, y = multi.centroid.xy
                if 'Multi' in buff.type:
                    raise RuntimeError
                sel.loc[sel.index[0], 'geometry'] = buff
                sel.loc[sel.index[0], 'Area'] = new_area
                sel.loc[sel.index[0], 'CenLon'] = np.asarray(x)[0]
                sel.loc[sel.index[0], 'CenLat'] = np.asarray(y)[0]

                # Divides db
                div_sel = dict()
                for k, v in sel.iloc[0].iteritems():
                    if k == 'geometry':
                        div_sel[k] = multi
                    elif k == 'RGIId':
                        div_sel['RGIID'] = v
                    else:
                        div_sel[k] = v
                divides.append(div_sel)
            else:
                pass

            # add glacier name to the entity
            name = ['I:' + row.name] * len(sel)
            add_n = sel.RGIId.isin(wgms_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'W-' + name[z]
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'G-' + name[z]
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

            # Add divides to the original one
            adf = pd.DataFrame(divides)
            adf.to_pickle(cfg.PATHS['itmix_divs'])

        log.info('N glaciers ITMIX: {}'.format(len(rgidf)))

        # WGMS glaciers which are not already there
        # Actually we should remove the data of those 7 to be honest...
        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)
        wgms_df = wgms_df.loc[~wgms_df.RGI_ID.isin(_rgi_ids_for_overwrite)]

        log.info('N glaciers WGMS: {}'.format(len(wgms_df)))
        for i, row in wgms_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split('-')[1].split('.')[0]
            # read the rgi region
            rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp')
            rgi_df = salem.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _cor = row.NAME.replace('/', 'or').replace('.',
                                                       '').replace(' ', '-')
            name = ['W:' + _cor] * len(sel)
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'G-' + name[z]
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

        _rgi_ids_for_overwrite.extend(wgms_df.RGI_ID.values)

        # GTD glaciers which are not already there
        # Actually we should remove the data of those 2 to be honest...
        gtd_df = gtd_df.loc[~gtd_df.RGI_ID.isin(_rgi_ids_for_overwrite)]
        log.info('N glaciers GTD: {}'.format(len(gtd_df)))

        for i, row in gtd_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split('-')[1].split('.')[0]
            # read the rgi region
            rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp')
            rgi_df = salem.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _corname = row.NAME.replace('/',
                                        'or').replace('.',
                                                      '').replace(' ', '-')
            name = ['G:' + _corname] * len(sel)
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

        # Save for not computing each time
        rgidf = pd.concat(rgidf)
        rgidf.to_pickle(df_rgi_file)

    return rgidf
예제 #6
0
파일: climate.py 프로젝트: TimoRoth/oggm
def compute_ref_t_stars(gdirs):
    """ Detects the best t* for the reference glaciers.

    Parameters
    ----------
    gdirs: list of oggm.GlacierDirectory objects
    """

    log.info('Compute the reference t* and mu* for WGMS glaciers')

    # Get ref glaciers (all glaciers with MB)
    flink, mbdatadir = utils.get_wgms_files()
    dfids = pd.read_csv(flink)['RGI_ID'].values

    # Reference glaciers only if in the list
    # TODO: we removed marine glaciers here. Is it ok?
    ref_gdirs = [g for g in gdirs if (g.rgi_id in dfids and
                                      g.terminus_type=='Land-terminating')]

    # Loop
    only_one = []  # start to store the glaciers with just one t*
    per_glacier = dict()
    for gdir in ref_gdirs:
        # all possible mus
        mu_candidates(gdir)
        # list of mus compatibles with refmb
        reff = os.path.join(mbdatadir, 'mbdata_' + gdir.rgi_id + '.csv')
        mbdf = pd.read_csv(reff).set_index('YEAR')
        t_star, res_bias = t_star_from_refmb(gdir, mbdf['ANNUAL_BALANCE'])

        # if we have just one candidate this is good
        if len(t_star) == 1:
            only_one.append(gdir.rgi_id)
        # this might be more than one, we'll have to select them later
        per_glacier[gdir.rgi_id] = (gdir, t_star, res_bias)

    # At least of of the X glaciers should have a single t*, otherwise we dont
    # know how to start
    if len(only_one) == 0:
        if os.path.basename(os.path.dirname(flink)) == 'test-workflow':
            # TODO: hardcoded shit here, for the test workflow
            only_one.append('RGI40-11.00887')
            gdir, t_star, res_bias = per_glacier['RGI40-11.00887']
            per_glacier['RGI40-11.00887'] = (gdir, [t_star[-1]], [res_bias[-1]])
        else:
            raise RuntimeError('Didnt expect to be here.')


    log.info('%d out of %d have only one possible t*. Start from here',
             len(only_one), len(ref_gdirs))

    # Ok. now loop over the nearest glaciers until all have a unique t*
    while True:
        ids_left = [id for id in per_glacier.keys() if id not in only_one]
        if len(ids_left) == 0:
            break

        # Compute the summed distance to all glaciers with one t*
        distances = []
        for id in ids_left:
            gdir, t_star, res_bias = per_glacier[id]
            lon, lat = gdir.cenlon, gdir.cenlat
            ldis = 0.
            for id_o in only_one:
                ogdir, _, _ = per_glacier[id_o]
                ldis += utils.haversine(lon, lat, ogdir.cenlon, ogdir.cenlat)
            distances.append(ldis)

        # Take the shortest and choose the best t*
        gdir, t_star, res_bias = per_glacier[ids_left[np.argmin(distances)]]
        distances = []
        for tt in t_star:
            ldis = 0.
            for id_o in only_one:
                _, ot_star, _ = per_glacier[id_o]
                ldis += np.abs(tt - ot_star)
            distances.append(ldis)
        amin = np.argmin(distances)
        per_glacier[gdir.rgi_id] = (gdir, [t_star[amin]], [res_bias[amin]])
        only_one.append(gdir.rgi_id)

    # Write out the data
    rgis_ids, t_stars,  biases, lons, lats = [], [], [], [], []
    for id, (gdir, t_star, res_bias) in per_glacier.items():
        rgis_ids.append(id)
        t_stars.append(t_star[0])
        biases.append(res_bias[0])
        lats.append(gdir.cenlat)
        lons.append(gdir.cenlon)
    df = pd.DataFrame(index=rgis_ids)
    df['tstar'] = t_stars
    df['bias'] = biases
    df['lon'] = lons
    df['lat'] = lats
    file = os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv')
    df.sort_index().to_csv(file)
예제 #7
0
파일: itmix.py 프로젝트: JohannesUIBK/oggm
def get_rgi_df(reset=False):
    """This function prepares a kind of `fake` RGI file, with the updated
    geometries for ITMIX.
    """

    # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers
    RGI_DIR = utils.get_rgi_dir()

    df_rgi_file = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_shp.pkl')
    if os.path.exists(df_rgi_file) and not reset:
        rgidf = pd.read_pickle(df_rgi_file)
    else:
        linkf = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_links.pkl')
        df_itmix = pd.read_pickle(linkf)

        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)

        f = utils.get_glathida_file()
        gtd_df = pd.read_csv(f)

        divides = []
        rgidf = []
        _rgi_ids_for_overwrite = []
        for i, row in df_itmix.iterrows():

            log.info('Prepare RGI df for ' + row.name)

            # read the rgi region
            rgi_shp = find_path(RGI_DIR, row['rgi_reg'] + '_rgi50_*.shp')
            rgi_df = salem.read_shapefile(rgi_shp, cached=True)

            rgi_parts = row.T['rgi_parts_ids']
            sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy()

            # use the ITMIX shape where possible
            if row.name in ['Hellstugubreen', 'Freya', 'Aqqutikitsoq',
                            'Brewster', 'Kesselwandferner', 'NorthGlacier',
                            'SouthGlacier', 'Tasman', 'Unteraar',
                            'Washmawapta', 'Columbia']:
                shf = find_path(SEARCHD, '*_' + row.name + '*.shp')
                shp = salem.read_shapefile(shf)
                if row.name == 'Unteraar':
                    shp = shp.iloc[[-1]]
                if 'LineString' == shp.iloc[0].geometry.type:
                    shp.loc[shp.index[0], 'geometry'] = shpg.Polygon(shp.iloc[0].geometry)
                if shp.iloc[0].geometry.type == 'MultiLineString':
                    # Columbia
                    geometry = shp.iloc[0].geometry
                    parts = list(geometry)
                    for p in parts:
                        assert p.type == 'LineString'
                    exterior = shpg.Polygon(parts[0])
                    # let's assume that all other polygons are in fact interiors
                    interiors = []
                    for p in parts[1:]:
                        assert exterior.contains(p)
                        interiors.append(p)
                    geometry = shpg.Polygon(parts[0], interiors)
                    assert 'Polygon' in geometry.type
                    shp.loc[shp.index[0], 'geometry'] = geometry

                assert len(shp) == 1
                area_km2 = shp.iloc[0].geometry.area * 1e-6
                shp = salem.gis.transform_geopandas(shp)
                shp = shp.iloc[0].geometry
                sel = sel.iloc[[0]]
                sel.loc[sel.index[0], 'geometry'] = shp
                sel.loc[sel.index[0], 'Area'] = area_km2
            elif row.name == 'Urumqi':
                # ITMIX Urumqi is in fact two glaciers
                shf = find_path(SEARCHD, '*_' + row.name + '*.shp')
                shp2 = salem.read_shapefile(shf)
                assert len(shp2) == 2
                for k in [0, 1]:
                    shp = shp2.iloc[[k]].copy()
                    area_km2 = shp.iloc[0].geometry.area * 1e-6
                    shp = salem.gis.transform_geopandas(shp)
                    shp = shp.iloc[0].geometry
                    assert sel.loc[sel.index[k], 'geometry'].contains(shp.centroid)
                    sel.loc[sel.index[k], 'geometry'] = shp
                    sel.loc[sel.index[k], 'Area'] = area_km2
                assert len(sel) == 2
            elif len(rgi_parts) > 1:
                # Ice-caps. Make divides
                # First we gather all the parts:
                sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy()
                # Make the multipolygon for the record
                multi = shpg.MultiPolygon([g for g in sel.geometry])
                # update the RGI attributes. We take a dummy rgi ID
                new_area = np.sum(sel.Area)
                found = False
                for i in range(len(sel)):
                    tsel = sel.iloc[[i]].copy()
                    if 'Multi' in tsel.loc[tsel.index[0], 'geometry'].type:
                        continue
                    else:
                        found = True
                        sel = tsel
                        break
                if not found:
                    raise RuntimeError()

                inif = 0.
                add = 1e-5
                if row.name == 'Devon':
                    inif = 0.001
                    add = 1e-4
                while True:
                    buff = multi.buffer(inif)
                    if 'Multi' in buff.type:
                        inif += add
                    else:
                        break
                x, y = multi.centroid.xy
                if 'Multi' in buff.type:
                    raise RuntimeError
                sel.loc[sel.index[0], 'geometry'] = buff
                sel.loc[sel.index[0], 'Area'] = new_area
                sel.loc[sel.index[0], 'CenLon'] = np.asarray(x)[0]
                sel.loc[sel.index[0], 'CenLat'] = np.asarray(y)[0]

                # Divides db
                div_sel = dict()
                for k, v in sel.iloc[0].iteritems():
                    if k == 'geometry':
                        div_sel[k] = multi
                    elif k == 'RGIId':
                        div_sel['RGIID'] = v
                    else:
                        div_sel[k] = v
                divides.append(div_sel)
            else:
                pass

            # add glacier name to the entity
            name = ['I:' + row.name] * len(sel)
            add_n = sel.RGIId.isin(wgms_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'W-' + name[z]
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'G-' + name[z]
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

            # Add divides to the original one
            adf = pd.DataFrame(divides)
            adf.to_pickle(cfg.PATHS['itmix_divs'])

        log.info('N glaciers ITMIX: {}'.format(len(rgidf)))

        # WGMS glaciers which are not already there
        # Actually we should remove the data of those 7 to be honest...
        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)
        wgms_df = wgms_df.loc[~ wgms_df.RGI_ID.isin(_rgi_ids_for_overwrite)]

        log.info('N glaciers WGMS: {}'.format(len(wgms_df)))
        for i, row in wgms_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split('-')[1].split('.')[0]
            # read the rgi region
            rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp')
            rgi_df = salem.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _cor = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-')
            name = ['W:' + _cor] * len(sel)
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'G-' + name[z]
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

        _rgi_ids_for_overwrite.extend(wgms_df.RGI_ID.values)

        # GTD glaciers which are not already there
        # Actually we should remove the data of those 2 to be honest...
        gtd_df = gtd_df.loc[~ gtd_df.RGI_ID.isin(_rgi_ids_for_overwrite)]
        log.info('N glaciers GTD: {}'.format(len(gtd_df)))

        for i, row in gtd_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split('-')[1].split('.')[0]
            # read the rgi region
            rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp')
            rgi_df = salem.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _corname = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-')
            name = ['G:' + _corname] * len(sel)
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

        # Save for not computing each time
        rgidf = pd.concat(rgidf)
        rgidf.to_pickle(df_rgi_file)

    return rgidf
예제 #8
0
# Use multiprocessing?
cfg.PARAMS['use_multiprocessing'] = True

# Set to True for operational runs - here we want all glaciers to run
cfg.PARAMS['continue_on_error'] = False

if baseline == 'HISTALP':
    # Other params: see https://oggm.org/2018/08/10/histalp-parameters/
    cfg.PARAMS['baseline_y0'] = 1850
    cfg.PARAMS['prcp_scaling_factor'] = 1.75
    cfg.PARAMS['temp_melt'] = -1.75

# Get the reference glacier ids (they are different for each RGI version)
rgi_dir = utils.get_rgi_dir(version=rgi_version)
df, _ = utils.get_wgms_files()
rids = df['RGI{}0_ID'.format(rgi_version[0])]

# We can't do Antarctica
rids = [rid for rid in rids if not ('-19.' in rid)]

# For HISTALP only RGI reg 11
if baseline == 'HISTALP':
    rids = [rid for rid in rids if '-11.' in rid]

# Make a new dataframe with those (this takes a while)
log.info('Reading the RGI shapefiles...')
rgidf = utils.get_rgi_glacier_entities(rids, version=rgi_version)
log.info('For RGIV{} we have {} candidate reference '
         'glaciers.'.format(rgi_version, len(rgidf)))
예제 #9
0
# No need for intersects since this has an effect on the inversion only
cfg.PARAMS['use_intersects'] = False

# Use multiprocessing?
cfg.PARAMS['use_multiprocessing'] = True

# Set to True for operational runs
cfg.PARAMS['continue_on_error'] = False

# Pre-download other files which will be needed later
_ = utils.get_cru_file(var='tmp')
_ = utils.get_cru_file(var='pre')
rgi_dir = utils.get_rgi_dir(version=rgi_version)

# Get the reference glacier ids (they are different for each RGI version)
df, _ = utils.get_wgms_files(version=rgi_version)
rids = df['RGI{}0_ID'.format(rgi_version)]

# Make a new dataframe with those (this takes a while)
log.info('Reading the RGI shapefiles...')
rgidf = []
for reg in df['RGI_REG'].unique():
    if reg == '19':
        continue  # we have no climate data in Antarctica
    fn = '*' + reg + '_rgi{}0_*.shp'.format(rgi_version)
    fs = list(sorted(glob(path.join(rgi_dir, '*', fn))))[0]
    sh = gpd.read_file(fs)
    rgidf.append(sh.loc[sh.RGIId.isin(rids)])
rgidf = pd.concat(rgidf)
rgidf.crs = sh.crs  # for geolocalisation
예제 #10
0
파일: climate.py 프로젝트: MachineAi/oggm
def compute_ref_t_stars(gdirs):
    """ Detects the best t* for the reference glaciers.

    Parameters
    ----------
    gdirs: list of oggm.GlacierDirectory objects
    """

    log.info('Compute the reference t* and mu* for WGMS glaciers')

    # Get ref glaciers (all glaciers with MB)
    flink, mbdatadir = utils.get_wgms_files()
    dfids = pd.read_csv(flink)['RGI_ID'].values

    # Reference glaciers only if in the list
    # TODO: we removed marine glaciers here. Is it ok?
    ref_gdirs = [
        g for g in gdirs
        if (g.rgi_id in dfids and g.terminus_type == 'Land-terminating')
    ]

    # Loop
    only_one = []  # start to store the glaciers with just one t*
    per_glacier = dict()
    for gdir in ref_gdirs:
        # all possible mus
        mu_candidates(gdir)
        # list of mus compatibles with refmb
        reff = os.path.join(mbdatadir, 'mbdata_' + gdir.rgi_id + '.csv')
        mbdf = pd.read_csv(reff).set_index('YEAR')
        t_star, res_bias = t_star_from_refmb(gdir, mbdf['ANNUAL_BALANCE'])

        # if we have just one candidate this is good
        if len(t_star) == 1:
            only_one.append(gdir.rgi_id)
        # this might be more than one, we'll have to select them later
        per_glacier[gdir.rgi_id] = (gdir, t_star, res_bias)

    # At least of of the X glaciers should have a single t*, otherwise we dont
    # know how to start
    if len(only_one) == 0:
        if os.path.basename(os.path.dirname(flink)) == 'test-workflow':
            # TODO: hardcoded shit here, for the test workflow
            only_one.append('RGI40-11.00887')
            gdir, t_star, res_bias = per_glacier['RGI40-11.00887']
            per_glacier['RGI40-11.00887'] = (gdir, [t_star[-1]],
                                             [res_bias[-1]])
        else:
            raise RuntimeError('Didnt expect to be here.')

    log.info('%d out of %d have only one possible t*. Start from here',
             len(only_one), len(ref_gdirs))

    # Ok. now loop over the nearest glaciers until all have a unique t*
    while True:
        ids_left = [id for id in per_glacier.keys() if id not in only_one]
        if len(ids_left) == 0:
            break

        # Compute the summed distance to all glaciers with one t*
        distances = []
        for id in ids_left:
            gdir, t_star, res_bias = per_glacier[id]
            lon, lat = gdir.cenlon, gdir.cenlat
            ldis = 0.
            for id_o in only_one:
                ogdir, _, _ = per_glacier[id_o]
                ldis += utils.haversine(lon, lat, ogdir.cenlon, ogdir.cenlat)
            distances.append(ldis)

        # Take the shortest and choose the best t*
        gdir, t_star, res_bias = per_glacier[ids_left[np.argmin(distances)]]
        distances = []
        for tt in t_star:
            ldis = 0.
            for id_o in only_one:
                _, ot_star, _ = per_glacier[id_o]
                ldis += np.abs(tt - ot_star)
            distances.append(ldis)
        amin = np.argmin(distances)
        per_glacier[gdir.rgi_id] = (gdir, [t_star[amin]], [res_bias[amin]])
        only_one.append(gdir.rgi_id)

    # Write out the data
    rgis_ids, t_stars, biases, lons, lats = [], [], [], [], []
    for id, (gdir, t_star, res_bias) in per_glacier.items():
        rgis_ids.append(id)
        t_stars.append(t_star[0])
        biases.append(res_bias[0])
        lats.append(gdir.cenlat)
        lons.append(gdir.cenlon)
    df = pd.DataFrame(index=rgis_ids)
    df['tstar'] = t_stars
    df['bias'] = biases
    df['lon'] = lons
    df['lat'] = lats
    file = os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv')
    df.sort_index().to_csv(file)
예제 #11
0
def mb_calibration(rgi_version, baseline):
    """Run the mass balance calibration for the VAS model. RGI version and
    baseline climate must be given.

    Parameters
    ----------
    rgi_version : str
        Version (and subversion) of the RGI, e.g., '62'
    baseline : str
        'HISTALP' or 'CRU', name of the baseline climate

    """

    # initialize OGGM and set up the run parameters
    vascaling.initialize(logging_level='WORKFLOW')

    # LOCAL paths (where to write the OGGM run output)
    # dirname = 'VAS_ref_mb_{}_RGIV{}'.format(baseline, rgi_version)
    # wdir = utils.gettempdir(dirname, home=True, reset=True)
    # utils.mkdir(wdir, reset=True)
    # cfg.PATHS['working_dir'] = wdir

    # CLUSTER paths
    wdir = os.environ.get('WORKDIR', '')
    cfg.PATHS['working_dir'] = wdir

    # we are running the calibration ourselves
    cfg.PARAMS['run_mb_calibration'] = True
    # we are using which baseline data?
    cfg.PARAMS['baseline_climate'] = baseline
    # no need for intersects since this has an effect on the inversion only
    cfg.PARAMS['use_intersects'] = False
    # use multiprocessing?
    cfg.PARAMS['use_multiprocessing'] = True
    # set to True for operational runs
    cfg.PARAMS['continue_on_error'] = True
    # 10 is only for OGGM-VAS, OGGM needs 80 to run
    cfg.PARAMS['border'] = 80

    if baseline == 'HISTALP':
        # OGGM HISTALP PARAMETERS from Matthias Dusch
        # see https://oggm.org/2018/08/10/histalp-parameters/
        # cfg.PARAMS['prcp_scaling_factor'] = 1.75
        # cfg.PARAMS['temp_melt'] = -1.75
        # cfg.PARAMS['temp_all_solid'] = 0
        # cfg.PARAMS['prcp_default_gradient'] = 0

        # VAS HISTALP PARAMETERS from x-validation
        cfg.PARAMS['prcp_scaling_factor'] = 2.5
        cfg.PARAMS['temp_melt'] = -0.5
        cfg.PARAMS['temp_all_solid'] = 0
        cfg.PARAMS['prcp_default_gradient'] = 0

    elif baseline == 'CRU':
        # using the parameters from Marzeion et al. (2012)
        # cfg.PARAMS['prcp_scaling_factor'] = 2.5
        # cfg.PARAMS['temp_melt'] = 1
        # cfg.PARAMS['temp_all_solid'] = 3
        # cfg.PARAMS['prcp_default_gradient'] = 3e-4

        # using the parameters from Malles and Marzeion 2020
        cfg.PARAMS['prcp_scaling_factor'] = 3
        cfg.PARAMS['temp_melt'] = 0
        cfg.PARAMS['temp_all_solid'] = 4
        cfg.PARAMS['prcp_default_gradient'] = 4e-4

    # the next step is to get all the reference glaciers,
    # i.e. glaciers with mass balance measurements.

    # get the reference glacier ids (they are different for each RGI version)
    df, _ = utils.get_wgms_files()
    rids = df['RGI{}0_ID'.format(rgi_version[0])]

    # we can't do Antarctica
    rids = [rid for rid in rids if not ('-19.' in rid)]

    # For HISTALP only RGI reg 11.01 (ALPS)
    if baseline == 'HISTALP':
        rids = [rid for rid in rids if '-11' in rid]

    # initialize the glacier regions
    base_url = "https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.4/" \
               "L3-L5_files/CRU/elev_bands/qc3/pcp2.5/match_geod"
    # Go - get the pre-processed glacier directories
    gdirs = workflow.init_glacier_directories(rids, from_prepro_level=3,
                                              prepro_base_url=base_url,
                                              prepro_rgi_version=rgi_version)

    # Some glaciers in RGI Region 11 are not inside the HISTALP domain
    if baseline == 'HISTALP':
        gdirs = [gdir for gdir in gdirs if gdir.rgi_subregion == '11-01']

    # get reference glaciers with mass balance measurements
    gdirs = utils.get_ref_mb_glaciers(gdirs)

    # make a new dataframe with those (this takes a while)
    print('For RGIV{} we have {} candidate reference '
          'glaciers.'.format(rgi_version, len(gdirs)))

    # run climate tasks
    vascaling.compute_ref_t_stars(gdirs)
    # execute_entity_task(vascaling.local_t_star, gdirs)

    # we store the associated params
    mb_calib = gdirs[0].read_pickle('climate_info')['mb_calib_params']
    with open(os.path.join(wdir, 'mb_calib_params.json'), 'w') as fp:
        json.dump(mb_calib, fp)
예제 #12
0
파일: itmix.py 프로젝트: Enaith/oggm
def get_rgi_df(reset=False):
    # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers

    df_rgi_file = os.path.expanduser('~/itmix_rgi_shp.pkl')
    if os.path.exists(df_rgi_file) and not reset:
        rgidf = pd.read_pickle(df_rgi_file)
    else:
        linkf = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_links.pkl')
        df_itmix = pd.read_pickle(linkf)

        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)

        f = utils.get_glathida_file()
        gtd_df = pd.read_csv(f)

        rgidf = []
        _rgi_ids = []
        for i, row in df_itmix.iterrows():
            # read the rgi region
            rgi_shp = os.path.join(RGI_DIR, "*",
                                   row['rgi_reg'] + '_rgi50_*.shp')
            rgi_shp = list(glob.glob(rgi_shp))[0]
            rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True)

            rgi_parts = row.T['rgi_parts_ids']
            sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy()
            _rgi_ids.extend(rgi_parts)

            # use the ITMIX shape where possible
            if row.name in ['Hellstugubreen', 'Freya', 'Aqqutikitsoq',
                            'Brewster', 'Kesselwandferner', 'NorthGlacier',
                            'SouthGlacier', 'Tasman', 'Unteraar',
                            'Washmawapta']:
                for shf in glob.glob(itmix_cfg.itmix_data_dir + '*/*/*_' +
                                             row.name + '*.shp'):
                    pass
                shp = salem.utils.read_shapefile(shf)
                if row.name == 'Unteraar':
                    shp = shp.iloc[[-1]]
                if 'LineString' == shp.iloc[0].geometry.type:
                    shp.loc[shp.index[0], 'geometry'] = shpg.Polygon(shp.iloc[0].geometry)
                assert len(shp) == 1
                area_km2 = shp.iloc[0].geometry.area * 1e-6
                shp = salem.gis.transform_geopandas(shp)
                shp = shp.iloc[0].geometry
                sel = sel.iloc[[0]]
                sel.loc[sel.index[0], 'geometry'] = shp
                sel.loc[sel.index[0], 'Area'] = area_km2
            elif row.name == 'Urumqi':
                # ITMIX Urumqi is in fact two glaciers
                for shf in glob.glob(itmix_cfg.itmix_data_dir + '*/*/*_' +
                                             row.name + '*.shp'):
                    pass
                shp2 = salem.utils.read_shapefile(shf)
                assert len(shp2) == 2
                for k in [0, 1]:
                    shp = shp2.iloc[[k]].copy()
                    area_km2 = shp.iloc[0].geometry.area * 1e-6
                    shp = salem.gis.transform_geopandas(shp)
                    shp = shp.iloc[0].geometry
                    assert sel.loc[sel.index[k], 'geometry'].contains(shp.centroid)
                    sel.loc[sel.index[k], 'geometry'] = shp
                    sel.loc[sel.index[k], 'Area'] = area_km2
                assert len(sel) == 2
            else:
                pass

            # add glacier name to the entity
            name = ['I:' + row.name] * len(sel)
            add_n = sel.RGIId.isin(wgms_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'W-' + name[z]
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'G-' + name[z]
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

        # WGMS glaciers which are not already there
        # Actually we should remove the data of those 7 to be honest...
        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)
        print('N WGMS before: {}'.format(len(wgms_df)))
        wgms_df = wgms_df.loc[~ wgms_df.RGI_ID.isin(_rgi_ids)]
        print('N WGMS after: {}'.format(len(wgms_df)))

        for i, row in wgms_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split('-')[1].split('.')[0]
            # read the rgi region
            rgi_shp = os.path.join(RGI_DIR, "*",
                                   reg + '_rgi50_*.shp')
            rgi_shp = list(glob.glob(rgi_shp))[0]
            rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _cor = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-')
            name = ['W:' + _cor] * len(sel)
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'G-' + name[z]
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

        _rgi_ids.extend(wgms_df.RGI_ID.values)

        # GTD glaciers which are not already there
        # Actually we should remove the data of those 2 to be honest...
        print('N GTD before: {}'.format(len(gtd_df)))
        gtd_df = gtd_df.loc[~ gtd_df.RGI_ID.isin(_rgi_ids)]
        print('N GTD after: {}'.format(len(gtd_df)))

        for i, row in gtd_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split('-')[1].split('.')[0]
            # read the rgi region
            rgi_shp = os.path.join(RGI_DIR, "*",
                                   reg + '_rgi50_*.shp')
            rgi_shp = list(glob.glob(rgi_shp))[0]
            rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _corname = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-')
            name = ['G:' + _corname] * len(sel)
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

        # Save for not computing each time
        rgidf = pd.concat(rgidf)
        rgidf.to_pickle(df_rgi_file)

    return rgidf
예제 #13
0
# Pre-download other files which will be needed later
utils.get_cru_cl_file()
utils.get_cru_file(var='tmp')
utils.get_cru_file(var='pre')

# Some globals for more control on what to run
RUN_GIS_mask = False
RUN_GIS_PREPRO = False # run GIS pre-processing tasks (before climate)
RUN_CLIMATE_PREPRO = False # run climate pre-processing tasks
RUN_INVERSION = False  # run bed inversion

# Read RGI file
rgidf = salem.read_shapefile(RGI_FILE, cached=True)

# get WGMS glaciers
flink, mbdatadir = utils.get_wgms_files()
ids_with_mb = flink['RGI50_ID'].values

if PC:
    # Keep id's of glaciers in WGMS and GlathiDa V2
    keep_ids = ['RGI50-01.02228', 'RGI50-01.00037', 'RGI50-01.16316',
                'RGI50-01.00570', 'RGI50-01.22699']

    # Glaciers in the McNabb data base
    terminus_data_ids = ['RGI50-01.10689', 'RGI50-01.23642']

    keep_indexes = [((i in keep_ids) or (i in ids_with_mb) or
                      (i in terminus_data_ids)) for i in rgidf.RGIID]

    rgidf = rgidf.iloc[keep_indexes]
예제 #14
0
def mb_calibration(rgi_version, baseline):
    """ Run the mass balance calibration for the VAS model. RGI version and
    baseline cliamte must be given.

    :param rgi_version: int, RGI version
    :param baseline: str, baseline climate 'HISTALP' or 'CRU'
    """

    # initialize OGGM and set up the run parameters
    vascaling.initialize(logging_level='WORKFLOW')

    # local paths (where to write the OGGM run output)
    # dirname = 'VAS_ref_mb_{}_RGIV{}'.format(baseline, rgi_version)
    # wdir = utils.gettempdir(dirname, home=True, reset=True)
    # utils.mkdir(wdir, reset=True)
    wdir = os.environ['WORKDIR']
    cfg.PATHS['working_dir'] = wdir

    # we are running the calibration ourselves
    cfg.PARAMS['run_mb_calibration'] = True
    # we are using which baseline data?
    cfg.PARAMS['baseline_climate'] = baseline
    # no need for intersects since this has an effect on the inversion only
    cfg.PARAMS['use_intersects'] = False
    # use multiprocessing?
    cfg.PARAMS['use_multiprocessing'] = True
    # set to True for operational runs
    cfg.PARAMS['continue_on_error'] = True

    if baseline == 'HISTALP':
        # other params: see https://oggm.org/2018/08/10/histalp-parameters/
        # cfg.PARAMS['prcp_scaling_factor'] = 1.75
        # cfg.PARAMS['temp_melt'] = -1.75
        cfg.PARAMS['prcp_scaling_factor'] = 2.5
        cfg.PARAMS['temp_melt'] = -0.5
    elif baseline == 'CRU':
        # using the parameters from Marzeion et al. (2012)
        cfg.PARAMS['prcp_scaling_factor'] = 2.5
        cfg.PARAMS['temp_melt'] = 1
        cfg.PARAMS['temp_all_solid'] = 3

    # the next step is to get all the reference glaciers,
    # i.e. glaciers with mass balance measurements.

    # get the reference glacier ids (they are different for each RGI version)
    rgi_dir = utils.get_rgi_dir(version=rgi_version)
    df, _ = utils.get_wgms_files()
    rids = df['RGI{}0_ID'.format(rgi_version[0])]

    # we can't do Antarctica
    rids = [rid for rid in rids if not ('-19.' in rid)]

    # For HISTALP only RGI reg 11.01 (ALPS)
    if baseline == 'HISTALP' or True:
        rids = [rid for rid in rids if '-11' in rid]

    debug = False
    if debug:
        print("==================================\n"
              + "DEBUG MODE: only RGI60-11.00897\n"
              + "==================================")
        rids = [rid for rid in rids if '-11.00897' in rid]
        cfg.PARAMS['use_multiprocessing'] = False

    # make a new dataframe with those (this takes a while)
    print('Reading the RGI shapefiles...')
    rgidf = utils.get_rgi_glacier_entities(rids, version=rgi_version)
    print('For RGIV{} we have {} candidate reference '
          'glaciers.'.format(rgi_version, len(rgidf)))

    # initialize the glacier regions
    gdirs = workflow.init_glacier_directories(rgidf, reset=False, force=True)
    workflow.execute_entity_task(gis.define_glacier_region, gdirs)
    workflow.execute_entity_task(gis.glacier_masks, gdirs)

    # we need to know which period we have data for
    print('Process the climate data...')
    if baseline == 'CRU':
        execute_entity_task(tasks.process_cru_data, gdirs, print_log=False)
    elif baseline == 'HISTALP':
        # Some glaciers are not in Alps
        gdirs = [gdir for gdir in gdirs if gdir.rgi_subregion == '11-01']
        # cfg.PARAMS['continue_on_error'] = True
        execute_entity_task(tasks.process_histalp_data, gdirs, print_log=False,
                            y0=1850)
        # cfg.PARAMS['continue_on_error'] = False
    else:
        execute_entity_task(tasks.process_custom_climate_data,
                            gdirs, print_log=False)

    # get reference glaciers with mass balance measurements
    gdirs = utils.get_ref_mb_glaciers(gdirs)

    # keep only these glaciers
    rgidf = rgidf.loc[rgidf.RGIId.isin([g.rgi_id for g in gdirs])]

    # save to file
    rgidf.to_file(os.path.join(wdir, 'mb_ref_glaciers.shp'))
    print('For RGIV{} and {} we have {} reference glaciers'.format(rgi_version,
                                                                   baseline,
                                                                   len(rgidf)))

    # sort for more efficient parallel computing
    rgidf = rgidf.sort_values('Area', ascending=False)

    # newly initialize glacier directories
    gdirs = workflow.init_glacier_directories(rgidf, reset=False, force=True)
    workflow.execute_entity_task(gis.define_glacier_region, gdirs)
    workflow.execute_entity_task(gis.glacier_masks, gdirs)

    # run climate tasks
    vascaling.compute_ref_t_stars(gdirs)
    execute_entity_task(vascaling.local_t_star, gdirs)

    # we store the associated params
    mb_calib = gdirs[0].read_pickle('climate_info')['mb_calib_params']
    with open(os.path.join(wdir, 'mb_calib_params.json'), 'w') as fp:
        json.dump(mb_calib, fp)
예제 #15
0
cfg.PARAMS['optimize_inversion_params'] = False
cfg.PARAMS['invert_with_sliding'] = False
cfg.PARAMS['bed_shape'] = 'parabolic'

# Some globals for more control on what to run
RUN_GIS_PREPRO = False  # run GIS preprocessing tasks (before climate)
RUN_CLIMATE_PREPRO = False  # run climate preprocessing tasks
RUN_INVERSION = False  # run bed inversion
RUN_DYNAMICS = False  # run dybnamics

# Read RGI file
rgidf = salem.utils.read_shapefile(RGI_FILE, cached=True)

# Select some glaciers
# Get ref glaciers (all glaciers with MB)
flink, mbdatadir = utils.get_wgms_files()
ids_with_mb = pd.read_csv(flink)['RGI_ID'].values
# get some tw-glaciers that we want to test inside alaska region
keep_ids = ['RGI50-01.20791', 'RGI50-01.00037', 'RGI50-01.10402']
keep_indexes = [((i in keep_ids) or (i in ids_with_mb)) for i in rgidf.RGIID]
rgidf = rgidf.iloc[keep_indexes]

# keep_ids = ['RGI50-01.20791']
# keep_indexes = [(i in keep_ids) for i in rgidf.RGIID]
# rgidf = rgidf.iloc[keep_indexes]

log.info('Number of glaciers: {}'.format(len(rgidf)))

# Download other files if needed
_ = utils.get_cru_file(var='tmp')
예제 #16
0
def initialization_selection():
    # -------------
    # Initialization
    # -------------
    cfg.initialize()

    # working directories
    cfg.PATHS['working_dir'] = mbcfg.PATHS['working_dir']

    cfg.PATHS['rgi_version'] = mbcfg.PARAMS['rgi_version']

    # We are running the calibration ourselves
    cfg.PARAMS['run_mb_calibration'] = True

    # No need for intersects since this has an effect on the inversion only
    cfg.PARAMS['use_intersects'] = False

    # Use multiprocessing?
    cfg.PARAMS['use_multiprocessing'] = True

    # Set to True for operational runs
    # maybe also here?
    cfg.PARAMS['continue_on_error'] = False

    # set negative flux filtering to false. should be standard soon
    cfg.PARAMS['filter_for_neg_flux'] = False

    # Pre-download other files which will be needed later
    _ = utils.get_cru_file(var='tmp')
    _ = utils.get_cru_file(var='pre')
    rgi_dir = utils.get_rgi_dir(version=cfg.PATHS['rgi_version'])

    # Get the reference glacier ids (they are different for each RGI version)
    df, _ = utils.get_wgms_files()
    rids = df['RGI{}0_ID'.format(cfg.PATHS['rgi_version'])]

    # Make a new dataframe with those (this takes a while)
    rgidf = []
    for reg in df['RGI_REG'].unique():
        if reg == '19':
            continue  # we have no climate data in Antarctica
        if mbcfg.PARAMS['region'] is not None\
                and reg != mbcfg.PARAMS['region']:
            continue

        fn = '*' + reg + '_rgi{}0_*.shp'.format(cfg.PATHS['rgi_version'])
        fs = list(sorted(glob(os.path.join(rgi_dir, '*', fn))))[0]
        sh = gpd.read_file(fs)
        rgidf.append(sh.loc[sh.RGIId.isin(rids)])
    rgidf = pd.concat(rgidf)
    rgidf.crs = sh.crs  # for geolocalisation

    # reduce Europe to Histalp area (exclude Pyrenees, etc...)
    if mbcfg.PARAMS['histalp']:
        rgidf = rgidf.loc[(rgidf.CenLon >= 4) & (rgidf.CenLon < 20) &
                          (rgidf.CenLat >= 43) & (rgidf.CenLat < 47)]

    # We have to check which of them actually have enough mb data.
    # Let OGGM do it:
    gdirs = workflow.init_glacier_regions(rgidf)
    # We need to know which period we have data for

    if mbcfg.PARAMS['histalp']:
        cfg.PATHS['climate_file'] = mbcfg.PATHS['histalpfile']
        execute_entity_task(tasks.process_custom_climate_data, gdirs)
    else:
        execute_entity_task(tasks.process_cru_data, gdirs, print_log=False)

    gdirs = utils.get_ref_mb_glaciers(gdirs)
    # Keep only these
    rgidf = rgidf.loc[rgidf.RGIId.isin([g.rgi_id for g in gdirs])]

    # Save
    rgidf.to_file(os.path.join(cfg.PATHS['working_dir'],
                               'mb_ref_glaciers.shp'))

    # Sort for more efficient parallel computing
    rgidf = rgidf.sort_values('Area', ascending=False)

    # Go - initialize working directories
    gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True)

    return gdirs
예제 #17
0
# Use multiprocessing?
cfg.PARAMS['use_multiprocessing'] = True

# Set to True for operational runs - here we want all glaciers to run
cfg.PARAMS['continue_on_error'] = False

if baseline == 'HISTALP':
    # Other params: see https://oggm.org/2018/08/10/histalp-parameters/
    cfg.PARAMS['baseline_y0'] = 1850
    cfg.PARAMS['prcp_scaling_factor'] = 1.75
    cfg.PARAMS['temp_melt'] = -1.75

# Get the reference glacier ids (they are different for each RGI version)
rgi_dir = utils.get_rgi_dir(version=rgi_version)
df, _ = utils.get_wgms_files()
rids = df['RGI{}0_ID'.format(rgi_version[0])]

# We can't do Antarctica
rids = [rid for rid in rids if not ('-19.' in rid)]

# For HISTALP only RGI reg 11
if baseline == 'HISTALP':
    rids = [rid for rid in rids if '-11.' in rid]

# Make a new dataframe with those (this takes a while)
log.info('Reading the RGI shapefiles...')
rgidf = utils.get_rgi_glacier_entities(rids, version=rgi_version)
log.info('For RGIV{} we have {} candidate reference '
         'glaciers.'.format(rgi_version, len(rgidf)))
예제 #18
0
def mb_calibration(rgi_version, baseline):
    """ Run the mass balance calibration for the VAS model. RGI version and
    baseline cliamte must be given.

    :param rgi_version: int, RGI version
    :param baseline: str, baseline climate 'HISTALP' or 'CRU'
    """

    # initialize OGGM and set up the run parameters
    vascaling.initialize(logging_level='WORKFLOW')

    # LOCAL paths (where to write the OGGM run output)
    # dirname = 'VAS_ref_mb_{}_RGIV{}'.format(baseline, rgi_version)
    # wdir = utils.gettempdir(dirname, home=True, reset=True)
    # utils.mkdir(wdir, reset=True)
    # cfg.PATHS['working_dir'] = wdir

    # CLUSTER paths
    wdir = os.environ.get('WORKDIR', '')
    cfg.PATHS['working_dir'] = wdir

    # we are running the calibration ourselves
    cfg.PARAMS['run_mb_calibration'] = True
    # we are using which baseline data?
    cfg.PARAMS['baseline_climate'] = baseline
    # no need for intersects since this has an effect on the inversion only
    cfg.PARAMS['use_intersects'] = False
    # use multiprocessing?
    cfg.PARAMS['use_multiprocessing'] = True
    # set to True for operational runs
    cfg.PARAMS['continue_on_error'] = True
    # 10 is only for OGGM-VAS, OGGM needs 80 to run
    cfg.PARAMS['border'] = 80

    if baseline == 'HISTALP':
        # other params: see https://oggm.org/2018/08/10/histalp-parameters/
        # cfg.PARAMS['prcp_scaling_factor'] = 1.75
        # cfg.PARAMS['temp_melt'] = -1.75
        cfg.PARAMS['prcp_scaling_factor'] = 2.5
        cfg.PARAMS['temp_melt'] = -0.5
    elif baseline == 'CRU':
        # using the parameters from Marzeion et al. (2012)
        # cfg.PARAMS['prcp_scaling_factor'] = 2.5
        # cfg.PARAMS['temp_melt'] = 1
        # cfg.PARAMS['temp_all_solid'] = 3
        # using the parameters from Malles and Marzeion 2020
        cfg.PARAMS['prcp_scaling_factor'] = 3
        cfg.PARAMS['temp_melt'] = 0
        cfg.PARAMS['temp_all_solid'] = 4
        # cfg.PARAMS['prcp_gradient'] = 4

    # the next step is to get all the reference glaciers,
    # i.e. glaciers with mass balance measurements.

    # get the reference glacier ids (they are different for each RGI version)
    df, _ = utils.get_wgms_files()
    rids = df['RGI{}0_ID'.format(rgi_version[0])]

    # we can't do Antarctica
    rids = [rid for rid in rids if not ('-19.' in rid)]

    # For HISTALP only RGI reg 11.01 (ALPS)
    if baseline == 'HISTALP':
        rids = [rid for rid in rids if '-11' in rid]

    # make a new dataframe with those (this takes a while)
    print('Reading the RGI shapefiles...')
    rgidf = utils.get_rgi_glacier_entities(rids, version=rgi_version)
    print('For RGIV{} we have {} candidate reference '
          'glaciers.'.format(rgi_version, len(rgidf)))

    # initialize the glacier regions
    base_url = 'https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.4/' \
               'L3-L5_files/RGIV62_fleb_qc3_CRU_pcp2.5'
    # Go - get the pre-processed glacier directories
    gdirs = workflow.init_glacier_directories(rids,
                                              from_prepro_level=3,
                                              prepro_base_url=base_url,
                                              prepro_rgi_version=rgi_version)

    # Some glaciers in RGI Region 11 are not inside the HISTALP domain
    if baseline == 'HISTALP':
        gdirs = [gdir for gdir in gdirs if gdir.rgi_subregion == '11-01']

    # get reference glaciers with mass balance measurements
    gdirs = utils.get_ref_mb_glaciers(gdirs)

    # keep only these glaciers
    rgidf = rgidf.loc[rgidf.RGIId.isin([g.rgi_id for g in gdirs])]

    # save to file
    rgidf.to_file(os.path.join(wdir, 'mb_ref_glaciers.shp'))
    print('For RGIV{} and {} we have {} reference glaciers'.format(
        rgi_version, baseline, len(rgidf)))

    # sort for more efficient parallel computing
    rgidf = rgidf.sort_values('Area', ascending=False)

    # newly initialize glacier directories
    gdirs = workflow.init_glacier_directories(rgidf, reset=False, force=True)
    workflow.execute_entity_task(gis.define_glacier_region, gdirs)
    workflow.execute_entity_task(gis.glacier_masks, gdirs)

    # run climate tasks
    vascaling.compute_ref_t_stars(gdirs)
    execute_entity_task(vascaling.local_t_star, gdirs)

    # we store the associated params
    mb_calib = gdirs[0].read_pickle('climate_info')['mb_calib_params']
    with open(os.path.join(wdir, 'mb_calib_params.json'), 'w') as fp:
        json.dump(mb_calib, fp)
예제 #19
0
파일: climate.py 프로젝트: OGGM/oggm
def compute_ref_t_stars(gdirs):
    """ Detects the best t* for the reference glaciers.

    Parameters
    ----------
    gdirs: list of oggm.GlacierDirectory objects
    """

    log.info('Compute the reference t* and mu* for WGMS glaciers')

    # Reference glaciers only if in the list and period is good
    ref_gdirs = _get_ref_glaciers(gdirs)

    # Loop
    only_one = []  # start to store the glaciers with just one t*
    per_glacier = dict()
    for gdir in ref_gdirs:
        # all possible mus
        mu_candidates(gdir)
        # list of mus compatibles with refmb
        mbdf = gdir.get_ref_mb_data()['ANNUAL_BALANCE']
        t_star, res_bias, prcp_fac = t_star_from_refmb(gdir, mbdf)
        # store the mb (could be useful later)
        gdir.write_pickle(mbdf, 'ref_massbalance')

        # if we have just one candidate this is good
        if len(t_star) == 1:
            only_one.append(gdir.rgi_id)
        # this might be more than one, we'll have to select them later
        per_glacier[gdir.rgi_id] = (gdir, t_star, res_bias, prcp_fac)

    # At least of of the X glaciers should have a single t*, otherwise we dont
    # know how to start
    if len(only_one) == 0:
        flink, mbdatadir = utils.get_wgms_files()
        if os.path.basename(os.path.dirname(flink)) == 'test-workflow':
            # TODO: hardcoded stuff here, for the test workflow
            only_one.append('RGI40-11.00897')
            gdir, t_star, res_bias, prcp_fac = per_glacier['RGI40-11.00897']
            per_glacier['RGI40-11.00897'] = (gdir, [t_star[-1]],
                                             [res_bias[-1]], prcp_fac)
        else:
            raise RuntimeError('We need at least one glacier with one '
                               'tstar only.')

    log.info('%d out of %d have only one possible t*. Start from here',
             len(only_one), len(ref_gdirs))

    # Ok. now loop over the nearest glaciers until all have a unique t*
    while True:
        ids_left = [id for id in per_glacier.keys() if id not in only_one]
        if len(ids_left) == 0:
            break

        # Compute the summed distance to all glaciers with one t*
        distances = []
        for id in ids_left:
            gdir = per_glacier[id][0]
            lon, lat = gdir.cenlon, gdir.cenlat
            ldis = 0.
            for id_o in only_one:
                ogdir = per_glacier[id_o][0]
                ldis += utils.haversine(lon, lat, ogdir.cenlon, ogdir.cenlat)
            distances.append(ldis)

        # Take the shortest and choose the best t*
        pg = per_glacier[ids_left[np.argmin(distances)]]
        gdir, t_star, res_bias, prcp_fac = pg
        distances = []
        for tt in t_star:
            ldis = 0.
            for id_o in only_one:
                _, ot_star, _, _ = per_glacier[id_o]
                ldis += np.abs(tt - ot_star)
            distances.append(ldis)
        amin = np.argmin(distances)
        per_glacier[gdir.rgi_id] = (gdir, [t_star[amin]], [res_bias[amin]],
                                    prcp_fac)
        only_one.append(gdir.rgi_id)

    # Write out the data
    rgis_ids, t_stars, prcp_facs,  biases, lons, lats = [], [], [], [], [], []
    for id, (gdir, t_star, res_bias, prcp_fac) in per_glacier.items():
        rgis_ids.append(id)
        t_stars.append(t_star[0])
        prcp_facs.append(prcp_fac)
        biases.append(res_bias[0])
        lats.append(gdir.cenlat)
        lons.append(gdir.cenlon)
    df = pd.DataFrame(index=rgis_ids)
    df['lon'] = lons
    df['lat'] = lats
    df['tstar'] = t_stars
    df['prcp_fac'] = prcp_facs
    df['bias'] = biases
    file = os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv')
    df.sort_index().to_csv(file)
예제 #20
0
def prepare_divides(rgi_f):
    """Processes the rgi file and writes the intersects to OUTDIR"""

    rgi_reg = os.path.basename(rgi_f).split('_')[0]

    print('Start RGI reg ' + rgi_reg + ' ...')
    start_time = time.time()

    wgms, _ = get_wgms_files()
    f = glob(INDIR_DIVIDES + '*/*-' + rgi_reg + '.shp')[0]

    df = gpd.read_file(f)
    rdf = gpd.read_file(rgi_f)

    # Read glacier attrs
    key2 = {'0': 'Land-terminating',
            '1': 'Marine-terminating',
            '2': 'Lake-terminating',
            '3': 'Dry calving',
            '4': 'Regenerated',
            '5': 'Shelf-terminating',
            '9': 'Not assigned',
            }
    TerminusType = [key2[gtype[1]] for gtype in df.GlacType]
    IsTidewater = np.array([ttype in ['Marine-terminating', 'Lake-terminating']
                            for ttype in TerminusType])

    # Plots
    # dfref = df.loc[df.RGIId.isin(wgms.RGI50_ID)]
    # for gid in np.unique(dfref.GLIMSId):
    #     dfs = dfref.loc[dfref.GLIMSId == gid]
    #     dfs.plot(cmap='Set3', linestyle='-', linewidth=5);

    # Filter
    df = df.loc[~IsTidewater]
    df = df.loc[~df.RGIId.isin(wgms.RGI50_ID)]

    df['CenLon'] = pd.to_numeric(df['CenLon'])
    df['CenLat'] = pd.to_numeric(df['CenLat'])
    df['Area'] = pd.to_numeric(df['Area'])

    # Correct areas and stuffs
    n_gl_before = len(df)
    divided_ids = []
    for rid in np.unique(df.RGIId):
        sdf = df.loc[df.RGIId == rid].copy()
        srdf = rdf.loc[rdf.RGIId == rid]

        # Correct Area
        sdf.Area = np.array([float(a) for a in sdf.Area])

        geo_is_ok = []
        new_geo = []
        for g, a in zip(sdf.geometry, sdf.Area):
            if a < 0.01*1e6:
                geo_is_ok.append(False)
                continue
            try:
                new_geo.append(multi_to_poly(g))
                geo_is_ok.append(True)
            except:
                geo_is_ok.append(False)

        sdf = sdf.loc[geo_is_ok]
        if len(sdf) < 2:
            # print(rid + ' is too small or has no valid divide...')
            df = df[df.RGIId != rid]
            continue

        area_km = sdf.Area * 1e-6

        cor_factor = srdf.Area.values / np.sum(area_km)
        if cor_factor > 1.2 or cor_factor < 0.8:
            # print(rid + ' is not OK...')
            df = df[df.RGIId != rid]
            continue
        area_km = cor_factor * area_km

        # Correct Centroid
        cenlon = [g.centroid.xy[0][0] for g in sdf.geometry]
        cenlat = [g.centroid.xy[1][0] for g in sdf.geometry]

        # ID
        new_id = [rid + '_d{:02}'.format(i + 1) for i in range(len(sdf))]

        # Write
        df.loc[sdf.index, 'Area'] = area_km
        df.loc[sdf.index, 'CenLon'] = cenlon
        df.loc[sdf.index, 'CenLat'] = cenlat
        df.loc[sdf.index, 'RGIId'] = new_id
        df.loc[sdf.index, 'geometry'] = new_geo

        divided_ids.append(rid)

    n_gl_after = len(df)

    # We make three data dirs: divides only, divides into rgi, divides + RGI
    bn = os.path.basename(rgi_f)
    bd = os.path.basename(os.path.dirname(rgi_f))
    base_dir_1 = OUTDIR_DIVIDES + '/RGIV5_DividesOnly/' + bd
    base_dir_2 = OUTDIR_DIVIDES + '/RGIV5_Corrected/' + bd
    base_dir_3 = OUTDIR_DIVIDES + '/RGIV5_OrigAndDivides/' + bd
    mkdir(base_dir_1, reset=True)
    mkdir(base_dir_2, reset=True)
    mkdir(base_dir_3, reset=True)

    df.to_file(os.path.join(base_dir_1, bn))

    dfa = pd.concat([df, rdf]).sort_values('RGIId')
    dfa.to_file(os.path.join(base_dir_3, bn))

    dfa = dfa.loc[~dfa.RGIId.isin(divided_ids)]
    dfa.to_file(os.path.join(base_dir_2, bn))

    print('RGI reg ' + rgi_reg +
          ' took {:.2f} seconds. We had to remove '
          '{} divides'.format(time.time() - start_time,
                              n_gl_before - n_gl_after))
    return
예제 #21
0
파일: itmix.py 프로젝트: alexjarosch/oggm
def get_rgi_df(reset=False):
    """This function prepares a kind of `fake` RGI file, with the updated
    geometries for ITMIX.
    """

    # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers
    RGI_DIR = utils.get_rgi_dir()

    df_rgi_file = os.path.join(DATA_DIR, "itmix", "itmix_rgi_shp.pkl")
    if os.path.exists(df_rgi_file) and not reset:
        rgidf = pd.read_pickle(df_rgi_file)
    else:
        linkf = os.path.join(DATA_DIR, "itmix", "itmix_rgi_links.pkl")
        df_itmix = pd.read_pickle(linkf)

        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)

        f = utils.get_glathida_file()
        gtd_df = pd.read_csv(f)

        divides = []
        rgidf = []
        _rgi_ids_for_overwrite = []
        for i, row in df_itmix.iterrows():

            log.info("Prepare RGI df for " + row.name)

            # read the rgi region
            rgi_shp = find_path(RGI_DIR, row["rgi_reg"] + "_rgi50_*.shp")
            rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True)

            rgi_parts = row.T["rgi_parts_ids"]
            sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy()

            # use the ITMIX shape where possible
            if row.name in [
                "Hellstugubreen",
                "Freya",
                "Aqqutikitsoq",
                "Brewster",
                "Kesselwandferner",
                "NorthGlacier",
                "SouthGlacier",
                "Tasman",
                "Unteraar",
                "Washmawapta",
                "Columbia",
            ]:
                shf = find_path(SEARCHD, "*_" + row.name + "*.shp")
                shp = salem.utils.read_shapefile(shf)
                if row.name == "Unteraar":
                    shp = shp.iloc[[-1]]
                if "LineString" == shp.iloc[0].geometry.type:
                    shp.loc[shp.index[0], "geometry"] = shpg.Polygon(shp.iloc[0].geometry)
                if shp.iloc[0].geometry.type == "MultiLineString":
                    # Columbia
                    geometry = shp.iloc[0].geometry
                    parts = list(geometry)
                    for p in parts:
                        assert p.type == "LineString"
                    exterior = shpg.Polygon(parts[0])
                    # let's assume that all other polygons are in fact interiors
                    interiors = []
                    for p in parts[1:]:
                        assert exterior.contains(p)
                        interiors.append(p)
                    geometry = shpg.Polygon(parts[0], interiors)
                    assert "Polygon" in geometry.type
                    shp.loc[shp.index[0], "geometry"] = geometry

                assert len(shp) == 1
                area_km2 = shp.iloc[0].geometry.area * 1e-6
                shp = salem.gis.transform_geopandas(shp)
                shp = shp.iloc[0].geometry
                sel = sel.iloc[[0]]
                sel.loc[sel.index[0], "geometry"] = shp
                sel.loc[sel.index[0], "Area"] = area_km2
            elif row.name == "Urumqi":
                # ITMIX Urumqi is in fact two glaciers
                shf = find_path(SEARCHD, "*_" + row.name + "*.shp")
                shp2 = salem.utils.read_shapefile(shf)
                assert len(shp2) == 2
                for k in [0, 1]:
                    shp = shp2.iloc[[k]].copy()
                    area_km2 = shp.iloc[0].geometry.area * 1e-6
                    shp = salem.gis.transform_geopandas(shp)
                    shp = shp.iloc[0].geometry
                    assert sel.loc[sel.index[k], "geometry"].contains(shp.centroid)
                    sel.loc[sel.index[k], "geometry"] = shp
                    sel.loc[sel.index[k], "Area"] = area_km2
                assert len(sel) == 2
            elif len(rgi_parts) > 1:
                # Ice-caps. Make divides
                # First we gather all the parts:
                sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy()
                # Make the multipolygon for the record
                multi = shpg.MultiPolygon([g for g in sel.geometry])
                # update the RGI attributes. We take a dummy rgi ID
                new_area = np.sum(sel.Area)
                found = False
                for i in range(len(sel)):
                    tsel = sel.iloc[[i]].copy()
                    if "Multi" in tsel.loc[tsel.index[0], "geometry"].type:
                        continue
                    else:
                        found = True
                        sel = tsel
                        break
                if not found:
                    raise RuntimeError()

                inif = 0.0
                add = 1e-5
                if row.name == "Devon":
                    inif = 0.001
                    add = 1e-4
                while True:
                    buff = multi.buffer(inif)
                    if "Multi" in buff.type:
                        inif += add
                    else:
                        break
                x, y = multi.centroid.xy
                if "Multi" in buff.type:
                    raise RuntimeError
                sel.loc[sel.index[0], "geometry"] = buff
                sel.loc[sel.index[0], "Area"] = new_area
                sel.loc[sel.index[0], "CenLon"] = np.asarray(x)[0]
                sel.loc[sel.index[0], "CenLat"] = np.asarray(y)[0]

                # Divides db
                div_sel = dict()
                for k, v in sel.iloc[0].iteritems():
                    if k == "geometry":
                        div_sel[k] = multi
                    elif k == "RGIId":
                        div_sel["RGIID"] = v
                    else:
                        div_sel[k] = v
                divides.append(div_sel)
            else:
                pass

            # add glacier name to the entity
            name = ["I:" + row.name] * len(sel)
            add_n = sel.RGIId.isin(wgms_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = "W-" + name[z]
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = "G-" + name[z]
            sel.loc[:, "Name"] = name
            rgidf.append(sel)

            # Add divides to the original one
            adf = pd.DataFrame(divides)
            adf.to_pickle(cfg.PATHS["itmix_divs"])

        log.info("N glaciers ITMIX: {}".format(len(rgidf)))

        # WGMS glaciers which are not already there
        # Actually we should remove the data of those 7 to be honest...
        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)
        wgms_df = wgms_df.loc[~wgms_df.RGI_ID.isin(_rgi_ids_for_overwrite)]

        log.info("N glaciers WGMS: {}".format(len(wgms_df)))
        for i, row in wgms_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split("-")[1].split(".")[0]
            # read the rgi region
            rgi_shp = find_path(RGI_DIR, reg + "_rgi50_*.shp")
            rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _cor = row.NAME.replace("/", "or").replace(".", "").replace(" ", "-")
            name = ["W:" + _cor] * len(sel)
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = "G-" + name[z]
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, "Name"] = name
            rgidf.append(sel)

        _rgi_ids_for_overwrite.extend(wgms_df.RGI_ID.values)

        # GTD glaciers which are not already there
        # Actually we should remove the data of those 2 to be honest...
        gtd_df = gtd_df.loc[~gtd_df.RGI_ID.isin(_rgi_ids_for_overwrite)]
        log.info("N glaciers GTD: {}".format(len(gtd_df)))

        for i, row in gtd_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split("-")[1].split(".")[0]
            # read the rgi region
            rgi_shp = find_path(RGI_DIR, reg + "_rgi50_*.shp")
            rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _corname = row.NAME.replace("/", "or").replace(".", "").replace(" ", "-")
            name = ["G:" + _corname] * len(sel)
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, "Name"] = name
            rgidf.append(sel)

        # Save for not computing each time
        rgidf = pd.concat(rgidf)
        rgidf.to_pickle(df_rgi_file)

    return rgidf