Example #1
0
def _area_select(gdf: gpd.GeoDataFrame, min_area: float,
                 max_area: float) -> gpd.GeoDataFrame:
    """Select basins from lower to higher levels that are between min_area and max_area in area.

    Start by working out if any basins at e.g. level 1 are selected.
    Then move on to higher levels (smaller basins).
    At each level, only add basins if the basin at the level below has not been added.
    e.g. level 3 basins 411 to 419 will not be added if at level 2 basin 41 was added.

    Can also be used as a method on a `gpd.GeoDataFrame`:
    `gdf.area_select(min_area, max_area)`

    :param gdf: hydrobasins geodataframe to traverse
    :param min_area: minimum area of basin
    :param max_area: maximum area of basin
    :return: filtered geodataframe from any level (favouring lower levels) with area between min and max
    """
    all_good_index: Set[int] = set()
    good_index = set()

    for level in range(gdf.LEVEL.min(), gdf.LEVEL.max() + 1):
        gdf_lev = gdf[gdf.LEVEL == level]

        gdf_too_large = gdf_lev[gdf.SUB_AREA > max_area]
        gdf_just_right = gdf_lev[(gdf.SUB_AREA <= max_area)
                                 & (gdf.SUB_AREA >= min_area)]
        gdf_too_small = gdf_lev[gdf.SUB_AREA < min_area]

        logger.debug(f'Level {level}')
        logger.debug(f'  too large: {len(gdf_too_large)}')
        logger.debug(f'  just right: {len(gdf_just_right)}')
        logger.debug(f'  too small: {len(gdf_too_small)}')

        if len(gdf_just_right) == 0:
            logger.debug(f'Skipping level: {level}')
            continue

        for irow_index in range(len(gdf_lev)):
            row_index = gdf_lev.index[irow_index]
            row = gdf_lev.iloc[irow_index]

            if max_area > row.SUB_AREA > min_area:
                larger_basin = gdf.find_next_level_larger(row.PFAF_ID)
                larger_basin_index = larger_basin.index[0] if len(
                    larger_basin) else -1
                if larger_basin_index not in all_good_index:
                    good_index.add(row_index)
                all_good_index.add(row_index)

    return gdf.loc[good_index]