예제 #1
0
    def test_download_rgi(self):

        tmp = cfg.PATHS['rgi_dir']
        cfg.PATHS['rgi_dir'] = os.path.join(TEST_DIR, 'rgi_extract')

        of = utils.get_rgi_dir()
        of = os.path.join(of, '01_rgi50_Alaska', '01_rgi50_Alaska.shp')
        self.assertTrue(os.path.exists(of))

        cfg.PATHS['rgi_dir'] = tmp
예제 #2
0
    def test_download_rgi(self):

        cfg.initialize()

        tmp = cfg.PATHS['rgi_dir']
        cfg.PATHS['rgi_dir'] = TEST_DIR

        of = utils.get_rgi_dir()
        of = os.path.join(of, '01_rgi50_Alaska', '01_rgi50_Alaska.shp')
        self.assertTrue(os.path.exists(of))

        cfg.PATHS['rgi_dir'] = tmp
예제 #3
0
파일: itmix.py 프로젝트: JohannesUIBK/oggm
def get_rgi_df(reset=False):
    """This function prepares a kind of `fake` RGI file, with the updated
    geometries for ITMIX.
    """

    # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers
    RGI_DIR = utils.get_rgi_dir()

    df_rgi_file = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_shp.pkl')
    if os.path.exists(df_rgi_file) and not reset:
        rgidf = pd.read_pickle(df_rgi_file)
    else:
        linkf = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_links.pkl')
        df_itmix = pd.read_pickle(linkf)

        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)

        f = utils.get_glathida_file()
        gtd_df = pd.read_csv(f)

        divides = []
        rgidf = []
        _rgi_ids_for_overwrite = []
        for i, row in df_itmix.iterrows():

            log.info('Prepare RGI df for ' + row.name)

            # read the rgi region
            rgi_shp = find_path(RGI_DIR, row['rgi_reg'] + '_rgi50_*.shp')
            rgi_df = salem.read_shapefile(rgi_shp, cached=True)

            rgi_parts = row.T['rgi_parts_ids']
            sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy()

            # use the ITMIX shape where possible
            if row.name in ['Hellstugubreen', 'Freya', 'Aqqutikitsoq',
                            'Brewster', 'Kesselwandferner', 'NorthGlacier',
                            'SouthGlacier', 'Tasman', 'Unteraar',
                            'Washmawapta', 'Columbia']:
                shf = find_path(SEARCHD, '*_' + row.name + '*.shp')
                shp = salem.read_shapefile(shf)
                if row.name == 'Unteraar':
                    shp = shp.iloc[[-1]]
                if 'LineString' == shp.iloc[0].geometry.type:
                    shp.loc[shp.index[0], 'geometry'] = shpg.Polygon(shp.iloc[0].geometry)
                if shp.iloc[0].geometry.type == 'MultiLineString':
                    # Columbia
                    geometry = shp.iloc[0].geometry
                    parts = list(geometry)
                    for p in parts:
                        assert p.type == 'LineString'
                    exterior = shpg.Polygon(parts[0])
                    # let's assume that all other polygons are in fact interiors
                    interiors = []
                    for p in parts[1:]:
                        assert exterior.contains(p)
                        interiors.append(p)
                    geometry = shpg.Polygon(parts[0], interiors)
                    assert 'Polygon' in geometry.type
                    shp.loc[shp.index[0], 'geometry'] = geometry

                assert len(shp) == 1
                area_km2 = shp.iloc[0].geometry.area * 1e-6
                shp = salem.gis.transform_geopandas(shp)
                shp = shp.iloc[0].geometry
                sel = sel.iloc[[0]]
                sel.loc[sel.index[0], 'geometry'] = shp
                sel.loc[sel.index[0], 'Area'] = area_km2
            elif row.name == 'Urumqi':
                # ITMIX Urumqi is in fact two glaciers
                shf = find_path(SEARCHD, '*_' + row.name + '*.shp')
                shp2 = salem.read_shapefile(shf)
                assert len(shp2) == 2
                for k in [0, 1]:
                    shp = shp2.iloc[[k]].copy()
                    area_km2 = shp.iloc[0].geometry.area * 1e-6
                    shp = salem.gis.transform_geopandas(shp)
                    shp = shp.iloc[0].geometry
                    assert sel.loc[sel.index[k], 'geometry'].contains(shp.centroid)
                    sel.loc[sel.index[k], 'geometry'] = shp
                    sel.loc[sel.index[k], 'Area'] = area_km2
                assert len(sel) == 2
            elif len(rgi_parts) > 1:
                # Ice-caps. Make divides
                # First we gather all the parts:
                sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy()
                # Make the multipolygon for the record
                multi = shpg.MultiPolygon([g for g in sel.geometry])
                # update the RGI attributes. We take a dummy rgi ID
                new_area = np.sum(sel.Area)
                found = False
                for i in range(len(sel)):
                    tsel = sel.iloc[[i]].copy()
                    if 'Multi' in tsel.loc[tsel.index[0], 'geometry'].type:
                        continue
                    else:
                        found = True
                        sel = tsel
                        break
                if not found:
                    raise RuntimeError()

                inif = 0.
                add = 1e-5
                if row.name == 'Devon':
                    inif = 0.001
                    add = 1e-4
                while True:
                    buff = multi.buffer(inif)
                    if 'Multi' in buff.type:
                        inif += add
                    else:
                        break
                x, y = multi.centroid.xy
                if 'Multi' in buff.type:
                    raise RuntimeError
                sel.loc[sel.index[0], 'geometry'] = buff
                sel.loc[sel.index[0], 'Area'] = new_area
                sel.loc[sel.index[0], 'CenLon'] = np.asarray(x)[0]
                sel.loc[sel.index[0], 'CenLat'] = np.asarray(y)[0]

                # Divides db
                div_sel = dict()
                for k, v in sel.iloc[0].iteritems():
                    if k == 'geometry':
                        div_sel[k] = multi
                    elif k == 'RGIId':
                        div_sel['RGIID'] = v
                    else:
                        div_sel[k] = v
                divides.append(div_sel)
            else:
                pass

            # add glacier name to the entity
            name = ['I:' + row.name] * len(sel)
            add_n = sel.RGIId.isin(wgms_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'W-' + name[z]
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'G-' + name[z]
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

            # Add divides to the original one
            adf = pd.DataFrame(divides)
            adf.to_pickle(cfg.PATHS['itmix_divs'])

        log.info('N glaciers ITMIX: {}'.format(len(rgidf)))

        # WGMS glaciers which are not already there
        # Actually we should remove the data of those 7 to be honest...
        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)
        wgms_df = wgms_df.loc[~ wgms_df.RGI_ID.isin(_rgi_ids_for_overwrite)]

        log.info('N glaciers WGMS: {}'.format(len(wgms_df)))
        for i, row in wgms_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split('-')[1].split('.')[0]
            # read the rgi region
            rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp')
            rgi_df = salem.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _cor = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-')
            name = ['W:' + _cor] * len(sel)
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'G-' + name[z]
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

        _rgi_ids_for_overwrite.extend(wgms_df.RGI_ID.values)

        # GTD glaciers which are not already there
        # Actually we should remove the data of those 2 to be honest...
        gtd_df = gtd_df.loc[~ gtd_df.RGI_ID.isin(_rgi_ids_for_overwrite)]
        log.info('N glaciers GTD: {}'.format(len(gtd_df)))

        for i, row in gtd_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split('-')[1].split('.')[0]
            # read the rgi region
            rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp')
            rgi_df = salem.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _corname = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-')
            name = ['G:' + _corname] * len(sel)
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

        # Save for not computing each time
        rgidf = pd.concat(rgidf)
        rgidf.to_pickle(df_rgi_file)

    return rgidf
예제 #4
0
파일: run_rgi_region.py 프로젝트: OGGM/oggm
utils.mkdir(cfg.PATHS['topo_dir'])
utils.mkdir(cfg.PATHS['cru_dir'])
utils.mkdir(cfg.PATHS['rgi_dir'])

# Use multiprocessing?
cfg.PARAMS['use_multiprocessing'] = True
cfg.CONTINUE_ON_ERROR = True

# Other params
cfg.PARAMS['border'] = 80
cfg.PARAMS['temp_use_local_gradient'] = False
cfg.PARAMS['invert_with_sliding'] = False
cfg.PARAMS['bed_shape'] = 'mixed'

# Download RGI files
rgi_dir = utils.get_rgi_dir()
rgi_shp = list(glob(os.path.join(rgi_dir, "*", rgi_reg+ '_rgi50_*.shp')))
assert len(rgi_shp) == 1
rgidf = gpd.read_file(rgi_shp[0])

log.info('Number of glaciers: {}'.format(len(rgidf)))

# Download other files if needed
_ = utils.get_cru_file(var='tmp')
_ = utils.get_cru_file(var='pre')
_ = utils.get_demo_file('Hintereisferner.shp')

# Go - initialize working directories
# gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True)
gdirs = workflow.init_glacier_regions(rgidf)
예제 #5
0
utils.mkdir(cfg.PATHS['cru_dir'])
utils.mkdir(cfg.PATHS['rgi_dir'])
utils.mkdir(cfg.PATHS['tmp_dir'])

# Use multiprocessing?
cfg.PARAMS['use_multiprocessing'] = True
cfg.CONTINUE_ON_ERROR = False

# Read in the Benchmark RGI file
rgi_pkl_path = utils.aws_file_download('rgi_benchmark.pkl')
rgidf = pd.read_pickle(rgi_pkl_path)

# Remove glaciers causing issues
rgidf = rgidf.iloc[[s not in ('RGI50-11.00291', 'RGI50-03.02479') for s in rgidf['RGIId']]]

utils.get_rgi_dir()

log.info('Number of glaciers: {}'.format(len(rgidf)))

# Go - initialize working directories
# gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True)
gdirs = workflow.init_glacier_regions(rgidf)

# Prepro tasks
task_list = [
    tasks.glacier_masks,
    tasks.compute_centerlines,
    tasks.compute_downstream_lines,
    tasks.catchment_area,
    tasks.initialize_flowlines,
    tasks.catchment_width_geom,
예제 #6
0
cfg.PARAMS['baseline_climate'] = baseline

# Use multiprocessing?
cfg.PARAMS['use_multiprocessing'] = True

# Set to True for operational runs - here we want all glaciers to run
cfg.PARAMS['continue_on_error'] = False

if baseline == 'HISTALP':
    # Other params: see https://oggm.org/2018/08/10/histalp-parameters/
    cfg.PARAMS['baseline_y0'] = 1850
    cfg.PARAMS['prcp_scaling_factor'] = 1.75
    cfg.PARAMS['temp_melt'] = -1.75

# Get the reference glacier ids (they are different for each RGI version)
rgi_dir = utils.get_rgi_dir(version=rgi_version)
df, _ = utils.get_wgms_files()
rids = df['RGI{}0_ID'.format(rgi_version[0])]

# We can't do Antarctica
rids = [rid for rid in rids if not ('-19.' in rid)]

# For HISTALP only RGI reg 11
if baseline == 'HISTALP':
    rids = [rid for rid in rids if '-11.' in rid]

# Make a new dataframe with those (this takes a while)
log.info('Reading the RGI shapefiles...')
rgidf = utils.get_rgi_glacier_entities(rids, version=rgi_version)
log.info('For RGIV{} we have {} candidate reference '
         'glaciers.'.format(rgi_version, len(rgidf)))
cfg.PARAMS['filter_for_neg_flux'] = False

# Use multiprocessing?
cfg.PARAMS['use_multiprocessing'] = True

# Set to True for operational runs
cfg.PARAMS['continue_on_error'] = False

if baseline == 'HISTALP':
    # Other params: see https://oggm.org/2018/08/10/histalp-parameters/
    cfg.PARAMS['baseline_y0'] = 1850
    cfg.PARAMS['prcp_scaling_factor'] = 1.75
    cfg.PARAMS['temp_melt'] = -1.75

# Get the reference glacier ids (they are different for each RGI version)
rgi_dir = utils.get_rgi_dir(version=rgi_version)
df, _ = utils.get_wgms_files()
rids = df['RGI{}0_ID'.format(rgi_version[0])]

# We can't do Antarctica
rids = [rid for rid in rids if not ('-19.' in rid)]

# For HISTALP only RGI reg 11
if baseline == 'HISTALP':
    rids = [rid for rid in rids if '-11.' in rid]

# Make a new dataframe with those (this takes a while)
log.info('Reading the RGI shapefiles...')
rgidf = utils.get_rgi_glacier_entities(rids, version=rgi_version)
log.info('For RGIV{} we have {} candidate reference glaciers.'.format(
    rgi_version, len(rgidf)))
예제 #8
0
def initialization_selection():
    # -------------
    # Initialization
    # -------------
    cfg.initialize()

    # working directories
    cfg.PATHS['working_dir'] = mbcfg.PATHS['working_dir']

    cfg.PATHS['rgi_version'] = mbcfg.PARAMS['rgi_version']

    # We are running the calibration ourselves
    cfg.PARAMS['run_mb_calibration'] = True

    # No need for intersects since this has an effect on the inversion only
    cfg.PARAMS['use_intersects'] = False

    # Use multiprocessing?
    cfg.PARAMS['use_multiprocessing'] = True

    # Set to True for operational runs
    # maybe also here?
    cfg.PARAMS['continue_on_error'] = True

    # set negative flux filtering to false. should be standard soon
    cfg.PARAMS['filter_for_neg_flux'] = False

    # correct negative fluxes with flowline mus
    cfg.PARAMS['correct_for_neg_flux'] = True

    # use glacierwiede mu_star in order to finde t_star: it's faster!
    cfg.PARAMS['tstar_search_glacierwide'] = True

    # Pre-download other files which will be needed later
    # _ = cru.get_cru_file(var='tmp')
    # _ = cru.get_cru_file(var='pre')
    rgi_dir = utils.get_rgi_dir(version=cfg.PATHS['rgi_version'])

    # Get the reference glacier ids (they are different for each RGI version)
    df, _ = utils.get_wgms_files()
    rids = df['RGI{}0_ID'.format(cfg.PATHS['rgi_version'])]

    # Make a new dataframe with those (this takes a while)
    rgidf = []
    for reg in df['RGI_REG'].unique():
        if reg == '19':
            continue  # we have no climate data in Antarctica
        if mbcfg.PARAMS['region'] is not None \
                and reg != mbcfg.PARAMS['region']:
            continue

        fn = '*' + reg + '_rgi{}0_*.shp'.format(cfg.PATHS['rgi_version'])
        fs = list(sorted(glob(os.path.join(rgi_dir, '*', fn))))[0]
        sh = gpd.read_file(fs)
        rgidf.append(sh.loc[sh.RGIId.isin(rids)])
    rgidf = pd.concat(rgidf)
    rgidf.crs = sh.crs  # for geolocalisation

    # reduce Europe to Histalp area (exclude Pyrenees, etc...)
    rgidf = rgidf.loc[(rgidf.CenLon >= 4) & (rgidf.CenLon < 20) &
                      (rgidf.CenLat >= 43) & (rgidf.CenLat < 47)]

    # and set standard histalp values
    cfg.PARAMS['prcp_scaling_factor'] = 1.75
    cfg.PARAMS['temp_all_liq'] = 2.0
    cfg.PARAMS['temp_melt'] = -1.75
    cfg.PARAMS['temp_default_gradient'] = -0.0065

    # We have to check which of them actually have enough mb data.
    # Let OGGM do it:
    gdirs = workflow.init_glacier_regions(rgidf)
    # We need to know which period we have data for
    cfg.PARAMS['baseline_climate'] = 'HISTALP'
    execute_entity_task(tasks.process_histalp_data, gdirs)

    gdirs = utils.get_ref_mb_glaciers(gdirs)
    # Keep only these
    rgidf = rgidf.loc[rgidf.RGIId.isin([g.rgi_id for g in gdirs])]

    # Save
    rgidf.to_file(os.path.join(cfg.PATHS['working_dir'],
                               'mb_ref_glaciers.shp'))

    # Sort for more efficient parallel computing
    rgidf = rgidf.sort_values('Area', ascending=False)

    # Go - initialize working directories
    gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True)

    return gdirs
예제 #9
0
파일: itmix.py 프로젝트: MachineAi/oggm
def get_rgi_df(reset=False):
    """This function prepares a kind of `fake` RGI file, with the updated
    geometries for ITMIX.
    """

    # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers
    RGI_DIR = utils.get_rgi_dir()

    df_rgi_file = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_shp.pkl')
    if os.path.exists(df_rgi_file) and not reset:
        rgidf = pd.read_pickle(df_rgi_file)
    else:
        linkf = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_links.pkl')
        df_itmix = pd.read_pickle(linkf)

        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)

        f = utils.get_glathida_file()
        gtd_df = pd.read_csv(f)

        divides = []
        rgidf = []
        _rgi_ids_for_overwrite = []
        for i, row in df_itmix.iterrows():

            log.info('Prepare RGI df for ' + row.name)

            # read the rgi region
            rgi_shp = find_path(RGI_DIR, row['rgi_reg'] + '_rgi50_*.shp')
            rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True)

            rgi_parts = row.T['rgi_parts_ids']
            sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy()

            # use the ITMIX shape where possible
            if row.name in [
                    'Hellstugubreen', 'Freya', 'Aqqutikitsoq', 'Brewster',
                    'Kesselwandferner', 'NorthGlacier', 'SouthGlacier',
                    'Tasman', 'Unteraar', 'Washmawapta', 'Columbia'
            ]:
                shf = find_path(SEARCHD, '*_' + row.name + '*.shp')
                shp = salem.utils.read_shapefile(shf)
                if row.name == 'Unteraar':
                    shp = shp.iloc[[-1]]
                if 'LineString' == shp.iloc[0].geometry.type:
                    shp.loc[shp.index[0],
                            'geometry'] = shpg.Polygon(shp.iloc[0].geometry)
                if shp.iloc[0].geometry.type == 'MultiLineString':
                    # Columbia
                    geometry = shp.iloc[0].geometry
                    parts = list(geometry)
                    for p in parts:
                        assert p.type == 'LineString'
                    exterior = shpg.Polygon(parts[0])
                    # let's assume that all other polygons are in fact interiors
                    interiors = []
                    for p in parts[1:]:
                        assert exterior.contains(p)
                        interiors.append(p)
                    geometry = shpg.Polygon(parts[0], interiors)
                    assert 'Polygon' in geometry.type
                    shp.loc[shp.index[0], 'geometry'] = geometry

                assert len(shp) == 1
                area_km2 = shp.iloc[0].geometry.area * 1e-6
                shp = salem.gis.transform_geopandas(shp)
                shp = shp.iloc[0].geometry
                sel = sel.iloc[[0]]
                sel.loc[sel.index[0], 'geometry'] = shp
                sel.loc[sel.index[0], 'Area'] = area_km2
            elif row.name == 'Urumqi':
                # ITMIX Urumqi is in fact two glaciers
                shf = find_path(SEARCHD, '*_' + row.name + '*.shp')
                shp2 = salem.utils.read_shapefile(shf)
                assert len(shp2) == 2
                for k in [0, 1]:
                    shp = shp2.iloc[[k]].copy()
                    area_km2 = shp.iloc[0].geometry.area * 1e-6
                    shp = salem.gis.transform_geopandas(shp)
                    shp = shp.iloc[0].geometry
                    assert sel.loc[sel.index[k],
                                   'geometry'].contains(shp.centroid)
                    sel.loc[sel.index[k], 'geometry'] = shp
                    sel.loc[sel.index[k], 'Area'] = area_km2
                assert len(sel) == 2
            elif len(rgi_parts) > 1:
                # Ice-caps. Make divides
                # First we gather all the parts:
                sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy()
                # Make the multipolygon for the record
                multi = shpg.MultiPolygon([g for g in sel.geometry])
                # update the RGI attributes. We take a dummy rgi ID
                new_area = np.sum(sel.Area)
                found = False
                for i in range(len(sel)):
                    tsel = sel.iloc[[i]].copy()
                    if 'Multi' in tsel.loc[tsel.index[0], 'geometry'].type:
                        continue
                    else:
                        found = True
                        sel = tsel
                        break
                if not found:
                    raise RuntimeError()

                inif = 0.
                add = 1e-5
                if row.name == 'Devon':
                    inif = 0.001
                    add = 1e-4
                while True:
                    buff = multi.buffer(inif)
                    if 'Multi' in buff.type:
                        inif += add
                    else:
                        break
                x, y = multi.centroid.xy
                if 'Multi' in buff.type:
                    raise RuntimeError
                sel.loc[sel.index[0], 'geometry'] = buff
                sel.loc[sel.index[0], 'Area'] = new_area
                sel.loc[sel.index[0], 'CenLon'] = np.asarray(x)[0]
                sel.loc[sel.index[0], 'CenLat'] = np.asarray(y)[0]

                # Divides db
                div_sel = dict()
                for k, v in sel.iloc[0].iteritems():
                    if k == 'geometry':
                        div_sel[k] = multi
                    elif k == 'RGIId':
                        div_sel['RGIID'] = v
                    else:
                        div_sel[k] = v
                divides.append(div_sel)
            else:
                pass

            # add glacier name to the entity
            name = ['I:' + row.name] * len(sel)
            add_n = sel.RGIId.isin(wgms_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'W-' + name[z]
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'G-' + name[z]
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

            # Add divides to the original one
            adf = pd.DataFrame(divides)
            adf.to_pickle(cfg.PATHS['itmix_divs'])

        log.info('N glaciers ITMIX: {}'.format(len(rgidf)))

        # WGMS glaciers which are not already there
        # Actually we should remove the data of those 7 to be honest...
        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)
        wgms_df = wgms_df.loc[~wgms_df.RGI_ID.isin(_rgi_ids_for_overwrite)]

        log.info('N glaciers WGMS: {}'.format(len(wgms_df)))
        for i, row in wgms_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split('-')[1].split('.')[0]
            # read the rgi region
            rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp')
            rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _cor = row.NAME.replace('/', 'or').replace('.',
                                                       '').replace(' ', '-')
            name = ['W:' + _cor] * len(sel)
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = 'G-' + name[z]
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

        _rgi_ids_for_overwrite.extend(wgms_df.RGI_ID.values)

        # GTD glaciers which are not already there
        # Actually we should remove the data of those 2 to be honest...
        gtd_df = gtd_df.loc[~gtd_df.RGI_ID.isin(_rgi_ids_for_overwrite)]
        log.info('N glaciers GTD: {}'.format(len(gtd_df)))

        for i, row in gtd_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split('-')[1].split('.')[0]
            # read the rgi region
            rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp')
            rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _corname = row.NAME.replace('/',
                                        'or').replace('.',
                                                      '').replace(' ', '-')
            name = ['G:' + _corname] * len(sel)
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, 'Name'] = name
            rgidf.append(sel)

        # Save for not computing each time
        rgidf = pd.concat(rgidf)
        rgidf.to_pickle(df_rgi_file)

    return rgidf
예제 #10
0
cfg.PATHS['working_dir'] = WORKING_DIR
cfg.PATHS['topo_dir'] = os.path.join(DATA_DIR, 'topo')
cfg.PATHS['rgi_dir'] = os.path.join(DATA_DIR, 'rgi')
utils.mkdir(WORKING_DIR)
utils.mkdir(cfg.PATHS['topo_dir'])
utils.mkdir(cfg.PATHS['rgi_dir'])

# Use multiprocessing?
cfg.PARAMS['use_multiprocessing'] = False
cfg.PARAMS['border'] = 20
cfg.CONTINUE_ON_ERROR = False

# Read in the RGI file
rgisel = os.path.join(WORKING_DIR, 'rgi_selection.shp')
if not os.path.exists(rgisel):
    rgi_dir = utils.get_rgi_dir()
    regions = ['{:02d}'.format(int(p)) for p in range(1, 20)]
    files = [
        glob.glob(os.path.join(rgi_dir, '*', r + '_rgi50_*.shp'))[0]
        for r in regions
    ]
    rgidf = []
    for fs in files:
        sh = salem.read_shapefile(os.path.join(rgi_dir, fs), cached=True)
        percs = np.asarray([0, 25, 50, 75, 100])
        idppercs = np.round(percs * 0.01 * (len(sh) - 1)).astype(int)

        rgidf.append(sh.sort_values(by='Area').iloc[idppercs])
        rgidf.append(sh.sort_values(by='CenLon').iloc[idppercs])
        rgidf.append(sh.sort_values(by='CenLat').iloc[idppercs])
    rgidf = gpd.GeoDataFrame(pd.concat(rgidf))
예제 #11
0
def mb_calibration(rgi_version, baseline):
    """ Run the mass balance calibration for the VAS model. RGI version and
    baseline cliamte must be given.

    :param rgi_version: int, RGI version
    :param baseline: str, baseline climate 'HISTALP' or 'CRU'
    """

    # initialize OGGM and set up the run parameters
    vascaling.initialize(logging_level='WORKFLOW')

    # local paths (where to write the OGGM run output)
    # dirname = 'VAS_ref_mb_{}_RGIV{}'.format(baseline, rgi_version)
    # wdir = utils.gettempdir(dirname, home=True, reset=True)
    # utils.mkdir(wdir, reset=True)
    wdir = os.environ['WORKDIR']
    cfg.PATHS['working_dir'] = wdir

    # we are running the calibration ourselves
    cfg.PARAMS['run_mb_calibration'] = True
    # we are using which baseline data?
    cfg.PARAMS['baseline_climate'] = baseline
    # no need for intersects since this has an effect on the inversion only
    cfg.PARAMS['use_intersects'] = False
    # use multiprocessing?
    cfg.PARAMS['use_multiprocessing'] = True
    # set to True for operational runs
    cfg.PARAMS['continue_on_error'] = True

    if baseline == 'HISTALP':
        # other params: see https://oggm.org/2018/08/10/histalp-parameters/
        # cfg.PARAMS['prcp_scaling_factor'] = 1.75
        # cfg.PARAMS['temp_melt'] = -1.75
        cfg.PARAMS['prcp_scaling_factor'] = 2.5
        cfg.PARAMS['temp_melt'] = -0.5
    elif baseline == 'CRU':
        # using the parameters from Marzeion et al. (2012)
        cfg.PARAMS['prcp_scaling_factor'] = 2.5
        cfg.PARAMS['temp_melt'] = 1
        cfg.PARAMS['temp_all_solid'] = 3

    # the next step is to get all the reference glaciers,
    # i.e. glaciers with mass balance measurements.

    # get the reference glacier ids (they are different for each RGI version)
    rgi_dir = utils.get_rgi_dir(version=rgi_version)
    df, _ = utils.get_wgms_files()
    rids = df['RGI{}0_ID'.format(rgi_version[0])]

    # we can't do Antarctica
    rids = [rid for rid in rids if not ('-19.' in rid)]

    # For HISTALP only RGI reg 11.01 (ALPS)
    if baseline == 'HISTALP' or True:
        rids = [rid for rid in rids if '-11' in rid]

    debug = False
    if debug:
        print("==================================\n"
              + "DEBUG MODE: only RGI60-11.00897\n"
              + "==================================")
        rids = [rid for rid in rids if '-11.00897' in rid]
        cfg.PARAMS['use_multiprocessing'] = False

    # make a new dataframe with those (this takes a while)
    print('Reading the RGI shapefiles...')
    rgidf = utils.get_rgi_glacier_entities(rids, version=rgi_version)
    print('For RGIV{} we have {} candidate reference '
          'glaciers.'.format(rgi_version, len(rgidf)))

    # initialize the glacier regions
    gdirs = workflow.init_glacier_directories(rgidf, reset=False, force=True)
    workflow.execute_entity_task(gis.define_glacier_region, gdirs)
    workflow.execute_entity_task(gis.glacier_masks, gdirs)

    # we need to know which period we have data for
    print('Process the climate data...')
    if baseline == 'CRU':
        execute_entity_task(tasks.process_cru_data, gdirs, print_log=False)
    elif baseline == 'HISTALP':
        # Some glaciers are not in Alps
        gdirs = [gdir for gdir in gdirs if gdir.rgi_subregion == '11-01']
        # cfg.PARAMS['continue_on_error'] = True
        execute_entity_task(tasks.process_histalp_data, gdirs, print_log=False,
                            y0=1850)
        # cfg.PARAMS['continue_on_error'] = False
    else:
        execute_entity_task(tasks.process_custom_climate_data,
                            gdirs, print_log=False)

    # get reference glaciers with mass balance measurements
    gdirs = utils.get_ref_mb_glaciers(gdirs)

    # keep only these glaciers
    rgidf = rgidf.loc[rgidf.RGIId.isin([g.rgi_id for g in gdirs])]

    # save to file
    rgidf.to_file(os.path.join(wdir, 'mb_ref_glaciers.shp'))
    print('For RGIV{} and {} we have {} reference glaciers'.format(rgi_version,
                                                                   baseline,
                                                                   len(rgidf)))

    # sort for more efficient parallel computing
    rgidf = rgidf.sort_values('Area', ascending=False)

    # newly initialize glacier directories
    gdirs = workflow.init_glacier_directories(rgidf, reset=False, force=True)
    workflow.execute_entity_task(gis.define_glacier_region, gdirs)
    workflow.execute_entity_task(gis.glacier_masks, gdirs)

    # run climate tasks
    vascaling.compute_ref_t_stars(gdirs)
    execute_entity_task(vascaling.local_t_star, gdirs)

    # we store the associated params
    mb_calib = gdirs[0].read_pickle('climate_info')['mb_calib_params']
    with open(os.path.join(wdir, 'mb_calib_params.json'), 'w') as fp:
        json.dump(mb_calib, fp)
예제 #12
0
파일: itmix.py 프로젝트: alexjarosch/oggm
def get_rgi_df(reset=False):
    """This function prepares a kind of `fake` RGI file, with the updated
    geometries for ITMIX.
    """

    # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers
    RGI_DIR = utils.get_rgi_dir()

    df_rgi_file = os.path.join(DATA_DIR, "itmix", "itmix_rgi_shp.pkl")
    if os.path.exists(df_rgi_file) and not reset:
        rgidf = pd.read_pickle(df_rgi_file)
    else:
        linkf = os.path.join(DATA_DIR, "itmix", "itmix_rgi_links.pkl")
        df_itmix = pd.read_pickle(linkf)

        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)

        f = utils.get_glathida_file()
        gtd_df = pd.read_csv(f)

        divides = []
        rgidf = []
        _rgi_ids_for_overwrite = []
        for i, row in df_itmix.iterrows():

            log.info("Prepare RGI df for " + row.name)

            # read the rgi region
            rgi_shp = find_path(RGI_DIR, row["rgi_reg"] + "_rgi50_*.shp")
            rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True)

            rgi_parts = row.T["rgi_parts_ids"]
            sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy()

            # use the ITMIX shape where possible
            if row.name in [
                "Hellstugubreen",
                "Freya",
                "Aqqutikitsoq",
                "Brewster",
                "Kesselwandferner",
                "NorthGlacier",
                "SouthGlacier",
                "Tasman",
                "Unteraar",
                "Washmawapta",
                "Columbia",
            ]:
                shf = find_path(SEARCHD, "*_" + row.name + "*.shp")
                shp = salem.utils.read_shapefile(shf)
                if row.name == "Unteraar":
                    shp = shp.iloc[[-1]]
                if "LineString" == shp.iloc[0].geometry.type:
                    shp.loc[shp.index[0], "geometry"] = shpg.Polygon(shp.iloc[0].geometry)
                if shp.iloc[0].geometry.type == "MultiLineString":
                    # Columbia
                    geometry = shp.iloc[0].geometry
                    parts = list(geometry)
                    for p in parts:
                        assert p.type == "LineString"
                    exterior = shpg.Polygon(parts[0])
                    # let's assume that all other polygons are in fact interiors
                    interiors = []
                    for p in parts[1:]:
                        assert exterior.contains(p)
                        interiors.append(p)
                    geometry = shpg.Polygon(parts[0], interiors)
                    assert "Polygon" in geometry.type
                    shp.loc[shp.index[0], "geometry"] = geometry

                assert len(shp) == 1
                area_km2 = shp.iloc[0].geometry.area * 1e-6
                shp = salem.gis.transform_geopandas(shp)
                shp = shp.iloc[0].geometry
                sel = sel.iloc[[0]]
                sel.loc[sel.index[0], "geometry"] = shp
                sel.loc[sel.index[0], "Area"] = area_km2
            elif row.name == "Urumqi":
                # ITMIX Urumqi is in fact two glaciers
                shf = find_path(SEARCHD, "*_" + row.name + "*.shp")
                shp2 = salem.utils.read_shapefile(shf)
                assert len(shp2) == 2
                for k in [0, 1]:
                    shp = shp2.iloc[[k]].copy()
                    area_km2 = shp.iloc[0].geometry.area * 1e-6
                    shp = salem.gis.transform_geopandas(shp)
                    shp = shp.iloc[0].geometry
                    assert sel.loc[sel.index[k], "geometry"].contains(shp.centroid)
                    sel.loc[sel.index[k], "geometry"] = shp
                    sel.loc[sel.index[k], "Area"] = area_km2
                assert len(sel) == 2
            elif len(rgi_parts) > 1:
                # Ice-caps. Make divides
                # First we gather all the parts:
                sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy()
                # Make the multipolygon for the record
                multi = shpg.MultiPolygon([g for g in sel.geometry])
                # update the RGI attributes. We take a dummy rgi ID
                new_area = np.sum(sel.Area)
                found = False
                for i in range(len(sel)):
                    tsel = sel.iloc[[i]].copy()
                    if "Multi" in tsel.loc[tsel.index[0], "geometry"].type:
                        continue
                    else:
                        found = True
                        sel = tsel
                        break
                if not found:
                    raise RuntimeError()

                inif = 0.0
                add = 1e-5
                if row.name == "Devon":
                    inif = 0.001
                    add = 1e-4
                while True:
                    buff = multi.buffer(inif)
                    if "Multi" in buff.type:
                        inif += add
                    else:
                        break
                x, y = multi.centroid.xy
                if "Multi" in buff.type:
                    raise RuntimeError
                sel.loc[sel.index[0], "geometry"] = buff
                sel.loc[sel.index[0], "Area"] = new_area
                sel.loc[sel.index[0], "CenLon"] = np.asarray(x)[0]
                sel.loc[sel.index[0], "CenLat"] = np.asarray(y)[0]

                # Divides db
                div_sel = dict()
                for k, v in sel.iloc[0].iteritems():
                    if k == "geometry":
                        div_sel[k] = multi
                    elif k == "RGIId":
                        div_sel["RGIID"] = v
                    else:
                        div_sel[k] = v
                divides.append(div_sel)
            else:
                pass

            # add glacier name to the entity
            name = ["I:" + row.name] * len(sel)
            add_n = sel.RGIId.isin(wgms_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = "W-" + name[z]
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = "G-" + name[z]
            sel.loc[:, "Name"] = name
            rgidf.append(sel)

            # Add divides to the original one
            adf = pd.DataFrame(divides)
            adf.to_pickle(cfg.PATHS["itmix_divs"])

        log.info("N glaciers ITMIX: {}".format(len(rgidf)))

        # WGMS glaciers which are not already there
        # Actually we should remove the data of those 7 to be honest...
        f, d = utils.get_wgms_files()
        wgms_df = pd.read_csv(f)
        wgms_df = wgms_df.loc[~wgms_df.RGI_ID.isin(_rgi_ids_for_overwrite)]

        log.info("N glaciers WGMS: {}".format(len(wgms_df)))
        for i, row in wgms_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split("-")[1].split(".")[0]
            # read the rgi region
            rgi_shp = find_path(RGI_DIR, reg + "_rgi50_*.shp")
            rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _cor = row.NAME.replace("/", "or").replace(".", "").replace(" ", "-")
            name = ["W:" + _cor] * len(sel)
            add_n = sel.RGIId.isin(gtd_df.RGI_ID.values)
            for z, it in enumerate(add_n.values):
                if it:
                    name[z] = "G-" + name[z]
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, "Name"] = name
            rgidf.append(sel)

        _rgi_ids_for_overwrite.extend(wgms_df.RGI_ID.values)

        # GTD glaciers which are not already there
        # Actually we should remove the data of those 2 to be honest...
        gtd_df = gtd_df.loc[~gtd_df.RGI_ID.isin(_rgi_ids_for_overwrite)]
        log.info("N glaciers GTD: {}".format(len(gtd_df)))

        for i, row in gtd_df.iterrows():
            rid = row.RGI_ID
            reg = rid.split("-")[1].split(".")[0]
            # read the rgi region
            rgi_shp = find_path(RGI_DIR, reg + "_rgi50_*.shp")
            rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True)

            sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy()
            assert len(sel) == 1

            # add glacier name to the entity
            _corname = row.NAME.replace("/", "or").replace(".", "").replace(" ", "-")
            name = ["G:" + _corname] * len(sel)
            for n in name:
                if len(n) > 48:
                    raise
            sel.loc[:, "Name"] = name
            rgidf.append(sel)

        # Save for not computing each time
        rgidf = pd.concat(rgidf)
        rgidf.to_pickle(df_rgi_file)

    return rgidf