def create_bbox(df): """Create bbox around dataframe Args: df ([type]): [description] Returns: [type]: [description] """ return pygeos.creation.box(pygeos.total_bounds(df.geometry)[0], pygeos.total_bounds(df.geometry)[1], pygeos.total_bounds(df.geometry)[2], pygeos.total_bounds(df.geometry)[3])
def get_slr(self): """Extract SLR for any geometries that overlap bounds where SLR is available Returns ------- dict {"slr_acres": <acres>, "slr": [<slr_0ft>, <slr_1ft>, ..., <slr_6ft>]} """ slr_bounds = gp.read_feather( slr_bounds_filename).geometry.values.data[0] ix = pg.intersects(self.geometry, slr_bounds) if not ix.sum(): # No overlap return None # only extract SLR where there are overlaps slr_results = extract_slr_by_geometry(self.shapes[ix], bounds=pg.total_bounds( self.geometry[ix])) # None only if no shape mask if slr_results is None: return None slr = [slr_results[i] for i in range(7)] return {"slr_acres": slr_results["shape_mask"], "slr": slr}
def summarize_by_huc12(geometries): """Calculate current and projected urbanization for each decade from 2020 to 2100. Parameters ---------- geometries : Series of pygeos geometries, indexed by HUC12 id """ index = [] results = [] for huc12, geometry in Bar( "Calculating Urbanization counts for HUC12", max=len(geometries) ).iter(geometries.iteritems()): zone_results = extract_by_geometry( [to_dict(geometry)], bounds=pg.total_bounds(geometry) ) if zone_results is None: continue index.append(huc12) results.append(zone_results) cols = ["shape_mask", "urban"] + URBAN_YEARS df = pd.DataFrame(results, index=index)[cols] df = df.reset_index().rename(columns={"index": "id"}).round() df.columns = [str(c) for c in df.columns] df.to_feather(results_filename)
def summarize_raster_by_geometry(geometries, extract_func, outfilename, progress_label="", bounds=None, **kwargs): """Summarize values of input dataset by geometry and writes results to a feather file, with one column for shape_mask and one for each raster value. Parameters ---------- geometries : Series of pygeos geometries, indexed by HUC12 / marine block extract_func : function that extracts results for each geometry outfilename : str progress_label : str """ if bounds is not None: # select only those areas that overlap input area tree = pg.STRtree(geometries) ix = tree.query(pg.box(*bounds)) geometries = geometries.iloc[ix].copy() if not len(geometries): return index = [] results = [] for ix, geometry in Bar(progress_label, max=len(geometries)).iter(geometries.iteritems()): zone_results = extract_func([to_dict(geometry)], bounds=pg.total_bounds(geometry), **kwargs) if zone_results is None: continue index.append(ix) results.append(zone_results) if not len(results): return df = pd.DataFrame(results, index=index) results = df[["shape_mask"]].copy() results.index.name = "id" avg_cols = [c for c in df.columns if c.endswith("_avg")] # each column is an array of counts for each for col in df.columns.difference(["shape_mask"] + avg_cols): s = df[col].apply(pd.Series).fillna(0) s.columns = [f"{col}_{c}" for c in s.columns] results = results.join(s) if len(avg_cols) > 0: results = results.join(df[avg_cols]).round() results.reset_index().to_feather(outfilename)
def create_grid(bbox,height): """Create a vector-based grid Args: bbox ([type]): [description] height ([type]): [description] Returns: [type]: [description] """ # set xmin,ymin,xmax,and ymax of the grid xmin, ymin = pygeos.total_bounds(bbox)[0],pygeos.total_bounds(bbox)[1] xmax, ymax = pygeos.total_bounds(bbox)[2],pygeos.total_bounds(bbox)[3] #estimate total rows and columns rows = int(numpy.ceil((ymax-ymin) / height)) cols = int(numpy.ceil((xmax-xmin) / height)) # set corner points x_left_origin = xmin x_right_origin = xmin + height y_top_origin = ymax y_bottom_origin = ymax - height # create actual grid res_geoms = [] for countcols in range(cols): y_top = y_top_origin y_bottom = y_bottom_origin for countrows in range(rows): res_geoms.append(( ((x_left_origin, y_top), (x_right_origin, y_top), (x_right_origin, y_bottom), (x_left_origin, y_bottom) ))) y_top = y_top - height y_bottom = y_bottom - height x_left_origin = x_left_origin + height x_right_origin = x_right_origin + height return pygeos.polygons(res_geoms)
def heatmap(pois, basemap_provider='OpenStreetMap', basemap_name='Mapnik', width='100%', height='100%', radius=10): """Generates a heatmap of the input POIs. Parameters: pois (GeoDataFrame): A POIs GeoDataFrame. basemap_provider (string): The basemap provider. basemap_name: The basemap itself as named by the provider. List and preview of available providers and their basemaps can be found in https://leaflet-extras.github.io/leaflet-providers/preview/ width (integer or percentage): Width of the map in pixels or percentage (default: 100%). height (integer or percentage): Height of the map in pixels or percentage (default: 100%). radius (float): Radius of each point of the heatmap (default: 10). Returns: A Folium Map object displaying the heatmap generated from the POIs. """ # Set the crs to WGS84 if pois.geometry.crs == 'EPSG:4326': pass else: pois.geometry.to_crs('EPSG:4326') # Automatically center the map at the center of the gdf's bounding box bb = pois.geometry.total_bounds() map_center = pg.get_coordinates(pg.centroid(bb))[0].tolist() tiles, attribution, max_zoom = get_provider_info(basemap_provider, basemap_name) heat_map = folium.Map(location=map_center, tiles=tiles, attr=attribution, max_zoom=max_zoom, width=width, height=height) # Automatically set zoom level bounds = pg.total_bounds(bb) heat_map.fit_bounds(([bounds[1], bounds[0]], [bounds[3], bounds[2]])) # List comprehension to make list of lists heat_data = pois.geometry.get_coordinates(invert=True) # Plot it on the map HeatMap(heat_data, radius=radius).add_to(heat_map) return heat_map
def __init__(self, geometry, crs, name): """Initialize a custom area from a pygeos geometry. Parameters ---------- geometry : pygeos Geometry crs : pyproj CRS object name : string name of custom area """ self.geometry = to_crs(geometry, crs, DATA_CRS) self.bounds = pg.total_bounds(self.geometry) # wrap geometry as a dict for rasterio self.shapes = np.asarray([to_dict(self.geometry[0])]) self.name = name
def get_slr(self): slr_bounds = gp.read_feather( slr_bounds_filename).geometry.values.data[0] ix = pg.intersects(self.geometry, slr_bounds) if not ix.sum(): # No overlap return None # only extract SLR where there are overlaps slr_results = extract_slr_by_geometry(self.shapes[ix], bounds=pg.total_bounds( self.geometry[ix])) # None only if no shape mask if slr_results is None: return None slr = [slr_results[i] for i in range(7)] return {"slr_acres": slr_results["shape_mask"], "slr": slr}
def summarize_by_huc12(geometries): """Summarize by HUC12 Parameters ---------- geometries : Series of pygeos geometries, indexed by HUC12 id """ # find the indexes of the geometries that overlap with SLR bounds; these are the only # ones that need to be analyzed for SLR impacts slr_bounds = gp.read_feather(slr_bounds_filename).geometry tree = pg.STRtree(geometries) ix = tree.query(slr_bounds.geometry.values.data[0], predicate="intersects") geometries = geometries.iloc[ix].copy() if not len(geometries): return results = [] index = [] for huc12, geometry in Bar( "Calculating SLR counts for HUC12", max=len(geometries) ).iter(geometries.iteritems()): zone_results = extract_by_geometry( [to_dict(geometry)], bounds=pg.total_bounds(geometry) ) if zone_results is None: continue index.append(huc12) results.append(zone_results) df = pd.DataFrame(results, index=index) # reorder columns df = df[["shape_mask"] + list(df.columns.difference(["shape_mask"]))] # extract only areas that actually had SLR pixels df = df[df[df.columns[1:]].sum(axis=1) > 0] df.columns = [str(c) for c in df.columns] df = df.reset_index().rename(columns={"index": "id"}).round() df.to_feather(results_filename)
def summarize_blueprint_by_geometry(geometries, outfilename, marine=False): counts = [] means = [] index = [] for ix, geometry in Bar( "Calculating overlap with Blueprint, Corridors, and Indicators", max=len(geometries), ).iter(geometries.iteritems()): zone_results = extract_by_geometry( [to_dict(geometry)], bounds=pg.total_bounds(geometry), marine=marine, zonal_means=True, ) if zone_results is None: continue index.append(ix) counts.append(zone_results["counts"]) means.append(zone_results["means"]) count_df = pd.DataFrame(counts, index=index) mean_df = pd.DataFrame(means, index=index).round() mean_df.columns = [f"{c}_avg" for c in mean_df.columns] results = count_df[["shape_mask"]].copy() results.index.name = "id" ### Export the Blueprint, corridors, and indicators # each column is an array of counts for each for col in count_df.columns.difference(["shape_mask"]): s = count_df[col].apply(pd.Series).fillna(0) s.columns = [f"{col}_{c}" for c in s.columns] results = results.join(s) results = results.join(mean_df) results.reset_index().to_feather(outfilename)
def get_input_area_mask(input_area): """Get input area mask, window, and transform for a given input area. Parameters ---------- input_area : str one of the major input area codes, e.g., "gh", "sa", etc Returns ------- mask, transform, window mask is 1 INSIDE input area, 0 outside """ values = [ e["value"] for e in INPUT_AREA_VALUES if input_area in set(e["id"].split(",")) ] bnd = get_input_area_boundary(input_area) ### Get window into raster for bounds of input area with rasterio.open(data_dir / "input_areas.tif") as src: window = src.window(*pg.total_bounds(bnd)) window_floored = window.round_offsets(op="floor", pixel_precision=3) w = math.ceil(window.width + window.col_off - window_floored.col_off) h = math.ceil(window.height + window.row_off - window_floored.row_off) window = Window(window_floored.col_off, window_floored.row_off, w, h) window = window.intersection(Window(0, 0, src.width, src.height)) transform = src.window_transform(window) data = src.read(1, window=window) mask = np.zeros(shape=data.shape, dtype="uint8") for value in values: mask[data == value] = 1 return mask, transform, window
def _create_mosaic( cls, features: Sequence[Dict], minzoom: int, maxzoom: int, quadkey_zoom: Optional[int] = None, accessor: Callable[[Dict], str] = default_accessor, asset_filter: Callable = default_filter, version: str = "0.0.2", quiet: bool = True, **kwargs, ): """Create mosaic definition content. Attributes: features (list): List of GeoJSON features. minzoom (int): Force mosaic min-zoom. maxzoom (int): Force mosaic max-zoom. quadkey_zoom (int): Force mosaic quadkey zoom (optional). accessor (callable): Function called on each feature to get its identifier (default is feature["properties"]["path"]). asset_filter (callable): Function to filter features. version (str): mosaicJSON definition version (default: 0.0.2). quiet (bool): Mask processing steps (default is True). kwargs (any): Options forwarded to `asset_filter` Returns: mosaic_definition (MosaicJSON): Mosaic definition. Examples: >>> MosaicJSON._create_mosaic([], 12, 14) """ quadkey_zoom = quadkey_zoom or minzoom if not quiet: click.echo(f"Get quadkey list for zoom: {quadkey_zoom}", err=True) # If Pygeos throws an error, fall back to non-vectorized operation # Ref: https://github.com/developmentseed/cogeo-mosaic/issues/81 try: dataset_geoms = polygons( [feat["geometry"]["coordinates"][0] for feat in features] ) except TypeError: dataset_geoms = [ polygons(feat["geometry"]["coordinates"][0]) for feat in features ] bounds = tuple(total_bounds(dataset_geoms)) tiles = burntiles.burn(features, quadkey_zoom) tiles = [mercantile.Tile(*tile) for tile in tiles] mosaic_definition: Dict[str, Any] = dict( mosaicjson=version, minzoom=minzoom, maxzoom=maxzoom, quadkey_zoom=quadkey_zoom, bounds=bounds, center=((bounds[0] + bounds[2]) / 2, (bounds[1] + bounds[3]) / 2, minzoom), tiles={}, version="1.0.0", ) if not quiet: click.echo("Feed Quadkey index", err=True) # Create tree and find assets that overlap each tile tree = STRtree(dataset_geoms) fout = os.devnull if quiet else sys.stderr with click.progressbar( # type: ignore tiles, file=fout, show_percent=True, label="Iterate over quadkeys" ) as bar: for tile in bar: quadkey = mercantile.quadkey(tile) tile_geom = polygons( mercantile.feature(tile)["geometry"]["coordinates"][0] ) # Find intersections from rtree intersections_idx = sorted( tree.query(tile_geom, predicate="intersects") ) if len(intersections_idx) == 0: continue intersect_dataset, intersect_geoms = zip( *[(features[idx], dataset_geoms[idx]) for idx in intersections_idx] ) dataset = asset_filter( tile, intersect_dataset, intersect_geoms, **kwargs ) if dataset: mosaic_definition["tiles"][quadkey] = [accessor(f) for f in dataset] return cls(**mosaic_definition)
def test_total_bounds_dimensions(geom): assert pygeos.total_bounds(geom).shape == (4, )
def test_total_bounds(geom, expected): assert_array_equal(pygeos.total_bounds(geom), expected)
] for aoi in aois: name = aoi["name"] path = aoi["path"] print(f"Creating report for {name}...") start = time() df = read_dataframe(f"examples/{path}.shp", columns=[]) geometry = pg.make_valid(df.geometry.values.data) # dissolve geometry = np.asarray([pg.union_all(geometry)]) extent_area = ( pg.area(pg.box(*pg.total_bounds(to_crs(geometry, df.crs, DATA_CRS)))) * M2_ACRES) print("Area of extent", extent_area.round()) ### calculate results, data must be in DATA_CRS print("Calculating results...") results = CustomArea(geometry, df.crs, name=name).get_results() if results is None: print(f"AOI: {path} does not overlap Blueprint") continue out_dir = Path("/tmp/aoi") / path if not out_dir.exists(): os.makedirs(out_dir)
def _create_mosaic( cls, features: Sequence[Dict], minzoom: int, maxzoom: int, quadkey_zoom: Optional[int] = None, accessor: Callable[[Dict], str] = default_accessor, asset_filter: Callable = default_filter, version: str = "0.0.2", quiet: bool = True, **kwargs, ): """ Create mosaic definition content. Attributes ---------- features : List, required List of GeoJSON features. minzoom: int, required Force mosaic min-zoom. maxzoom: int, required Force mosaic max-zoom. quadkey_zoom: int, optional Force mosaic quadkey zoom. accessor: callable, required Function called on each feature to get its identifier (default is feature["properties"]["path"]). asset_filter: callable, required Function to filter features. version: str, optional mosaicJSON definition version (default: 0.0.2). quiet: bool, optional (default: True) Mask processing steps. kwargs: any Options forwarded to `asset_filter` Returns ------- mosaic_definition : MosaicJSON Mosaic definition. """ quadkey_zoom = quadkey_zoom or minzoom if not quiet: click.echo(f"Get quadkey list for zoom: {quadkey_zoom}", err=True) # Find dataset geometries dataset_geoms = polygons( [feat["geometry"]["coordinates"][0] for feat in features]) bounds = list(total_bounds(dataset_geoms)) tiles = burntiles.burn(features, quadkey_zoom) tiles = [mercantile.Tile(*tile) for tile in tiles] mosaic_definition: Dict[str, Any] = dict( mosaicjson=version, minzoom=minzoom, maxzoom=maxzoom, quadkey_zoom=quadkey_zoom, bounds=bounds, center=((bounds[0] + bounds[2]) / 2, (bounds[1] + bounds[3]) / 2, minzoom), tiles={}, version="1.0.0", ) if not quiet: click.echo(f"Feed Quadkey index", err=True) # Create tree and find assets that overlap each tile tree = STRtree(dataset_geoms) for tile in tiles: quadkey = mercantile.quadkey(tile) tile_geom = polygons( mercantile.feature(tile)["geometry"]["coordinates"][0]) # Find intersections from rtree intersections_idx = sorted( tree.query(tile_geom, predicate="intersects")) if len(intersections_idx) == 0: continue intersect_dataset, intersect_geoms = zip( *[(features[idx], dataset_geoms[idx]) for idx in intersections_idx]) dataset = asset_filter(tile, intersect_dataset, intersect_geoms, **kwargs) if dataset: mosaic_definition["tiles"][quadkey] = [ accessor(f) for f in dataset ] return cls(**mosaic_definition)
os.makedirs(out_dir) df = read_dataframe(f"examples/{aoi_name}.shp") geometry = pg.make_valid(df.geometry.values.data) # dissolve geometry = np.asarray([pg.union_all(geometry)]) print("Calculating results...") results = CustomArea(geometry, df.crs, name="Test").get_results() # FIXME: # results = {"indicators": []} ### Convert to WGS84 for mapping geometry = to_crs(geometry, df.crs, GEO_CRS) bounds = pg.total_bounds(geometry) # only include urban up to 2060 has_urban = "proj_urban" in results and results["proj_urban"][4] > 0 has_slr = "slr" in results has_ownership = "ownership" in results has_protection = "protection" in results print("Creating maps...") task = render_maps( bounds, geometry=geometry[0], # indicators=results["indicators"], input_ids=results["input_ids"], urban=has_urban,
def summarize_by_areas(df, state, rank_only=False): """Calculate acres by value and area-weighted value for each CHAT field in fields. Parameters ---------- df : GeoDataFrame area(s) of interest state : str, one of ['ok', 'tx'] rank_only : bool (default False) if True, will only calculate areas for CHAT Rank Returns ------- DataFrame columns for total_acres, analysis_acrs, chat_acres, and avg (bare) and _x suffixed fields for each field """ if not df.index.name: df.index.name = "index" index_name = df.index.name df = df.reset_index() chat_df = gp.read_feather(chat_dir / f"{state}chat.feather") fields = ["chatrank"] if not rank_only: fields += [e["id"] for e in INPUTS[f"{state}chat"]["indicators"]] print("Intersecting with CHAT...") chat_df = intersection(df, chat_df) chat_df["acres"] = pg.area(chat_df.geometry_right.values.data) * M2_ACRES chat_df = chat_df.loc[chat_df.acres > 0].copy() if not len(chat_df): return None # total_acres = chat_df.groupby(index_name).geometry.first() total_acres = df.loc[df[index_name].isin(chat_df[index_name])].set_index(index_name) total_acres["total_acres"] = pg.area(total_acres.geometry.values.data) * M2_ACRES results = pd.DataFrame( chat_df.groupby(index_name).acres.sum().rename("chat_acres") ).join(total_acres[["total_acres"]], how="left") # intersect edge units with SE input areas to determine areas outside edge_df = explode( df.loc[ df[index_name].isin( results.loc[(results.chat_acres < results.total_acres - 1)].index ) ].copy()[[index_name, "geometry"]] ) print("Intersecting with input areas, this may take a while...") input_df = gp.read_feather(input_filename).reset_index(drop=True) # this is inverted because input_df performs better if prepared (left side) # note: we don't do intersection() here because of topology errors left = pd.Series(input_df.geometry.values.data, index=input_df.index) right = pd.Series(edge_df.geometry.values.data, index=edge_df.index) intersects = sjoin_geometry(left, right, predicate="intersects") tmp = input_df.loc[intersects.index.unique()] # have to make valid first or fails with topology errors tmp.geometry = pg.make_valid(tmp.geometry.values.data) # clip to general area, otherwise intersection takes a way long time clip_box = pg.box(*pg.total_bounds(edge_df.geometry.values.data)) tmp.geometry = pg.intersection(tmp.geometry.values.data, clip_box) tmp = tmp.join(intersects, how="inner").join( edge_df, on="index_right", rsuffix="_right" ) tmp.geometry_right = pg.intersection( tmp.geometry.values.data, tmp.geometry_right.values.data ) tmp["acres"] = pg.area(tmp.geometry_right.values.data) * M2_ACRES analysis_acres = ( tmp.groupby(index_name) .acres.sum() .round(ACRES_PRECISION) .rename("analysis_acres") ) # join analysis acres back to results results = results.join(analysis_acres) results.loc[results.analysis_acres.isnull(), "analysis_acres"] = results.total_acres area_results = dict() avg_results = dict() for field in fields: # Note: values are categorical, so this will add 0 area values for each category grouped = ( chat_df.groupby([index_name, field]) .acres.sum() .fillna(0) .round(ACRES_PRECISION) .reset_index() ) # create an array of [<acres for value 0>, <acres for value 1>,... ] area_results[field] = grouped.groupby(index_name).acres.apply(np.array) # exclude nodata to calculate area-weighted average values = grouped.loc[grouped[field] > 0].set_index(index_name) total_acres = values.groupby(level=0).acres.sum().rename("total") values = values.join(total_acres) values["wtd_value"] = (values.acres / values.total) * values[field].astype( "uint8" ) avg_results[field] = values.groupby(level=0).wtd_value.sum().round(1) area_results = pd.DataFrame(area_results) avg_results = pd.DataFrame(avg_results) results = results.join(avg_results).fillna(0) for field in fields: # convert areas array to columns s = area_results[field].apply(pd.Series) s.columns = [f"{field}_{c}" for c in s.columns] # drop any that are all 0; these are not present s = s.drop(columns=s.columns[s.max() == 0].tolist()) results = results.join(s) return results
def total_bounds(arr): if isinstance(arr, LazyObj): arr = arr.values() return pg.to_wkb(pg.box(*pg.total_bounds(pg.from_wkb(arr))))
def _total_bounds_single(self): return pg.box(*pg.total_bounds(self.to_pygeos()))
async def create_custom_report(ctx, zip_filename, dataset, layer, name=""): """Create a Blueprint report for a user-uploaded GIS file contained in a zip. Zip must contain either a shapefile or a file geodatabase. Parameters ---------- ctx : job context zip_filename : str full path to zip filename dataset : str full path to dataset within zip file layer : str name of layer within dataset name : str, optional (default: "") Name of area of interest (included in output report) Returns ------- str path to output file Raises ------ DataError Raised if bounds are too large or if area of interest doesn't overalap SA region """ errors = [] await set_progress(ctx["job_id"], 0, "Loading data") path = f"/vsizip/{zip_filename}/{dataset}" df = read_dataframe(path, layer=layer) geometry = pg.make_valid(df.geometry.values.data) await set_progress(ctx["job_id"], 5, "Preparing area of interest") # dissolve geometry = np.asarray([pg.union_all(geometry)]) geo_geometry = to_crs(geometry, df.crs, GEO_CRS) bounds = pg.total_bounds(geo_geometry) # estimate area extent_area = ( pg.area(pg.box(*pg.total_bounds(to_crs(geometry, df.crs, DATA_CRS)))) * M2_ACRES) if extent_area >= CUSTOM_REPORT_MAX_ACRES: raise DataError( f"The bounding box of your area of interest is too large ({extent_area:,.0f} acres), it must be < {CUSTOM_REPORT_MAX_ACRES:,.0f} acres." ) await set_progress(ctx["job_id"], 10, "Calculating results (this might take a while)") # calculate results, data must be in DATA_CRS print("Calculating results...") results = CustomArea(geometry, df.crs, name).get_results() if results is None: raise DataError( "area of interest does not overlap Southeast Blueprint") if name: results["name"] = name has_urban = "proj_urban" in results and results["proj_urban"][4] > 0 has_slr = "slr" in results has_ownership = "ownership" in results has_protection = "protection" in results # compile indicator IDs across all inputs indicators = [] for input_area in results["inputs"]: for ecosystem in input_area.get("ecosystems", []): indicators.extend([i["id"] for i in ecosystem["indicators"]]) await set_progress(ctx["job_id"], 25, "Creating maps (this might take a while)") print("Rendering maps...") maps, scale, map_errors = await render_maps( bounds, geometry=geo_geometry[0], input_ids=results["input_ids"], indicators=indicators, urban=has_urban, slr=has_slr, ownership=has_ownership, protection=has_protection, ) if map_errors: log.error(f"Map rendering errors: {map_errors}") if "basemap" in map_errors: errors.append("Error creating basemap for all maps") if "aoi" in map_errors: errors.append("Error rendering area of interest on maps") if set(map_errors.keys()).difference(["basemap", "aoi"]): errors.append("Error creating one or more maps") await set_progress(ctx["job_id"], 75, "Creating PDF (this might take a while)", errors=errors) results["scale"] = scale pdf = create_report(maps=maps, results=results) await set_progress(ctx["job_id"], 95, "Nearly done", errors=errors) fp, name = tempfile.mkstemp(suffix=".pdf", dir=TEMP_DIR) with open(fp, "wb") as out: out.write(pdf) await set_progress(ctx["job_id"], 100, "All done!", errors=errors) log.debug(f"Created PDF at: {name}") return name, errors
huc12["acres"] = (pg.area(huc12.geometry.values.data) * M2_ACRES).round().astype("uint") # for those that touch the edge of the region, drop any that are not >= 50% in # raster input area. We are not able to use polygon intersection because it # takes too long. tree = pg.STRtree(huc12.geometry.values.data) ix = tree.query(bnd, predicate="contains") edge_df = huc12.loc[~huc12.id.isin(huc12.iloc[ix].id)].copy() geometries = pd.Series(edge_df.geometry.values.data, index=edge_df.id) drop_ids = [] for id, geometry in Bar( "Calculating HUC12 overlap with input area", max=len(geometries) ).iter(geometries.iteritems()): percent_overlap = calculate_percent_overlap( input_area_mask, [to_dict(geometry)], bounds=pg.total_bounds(geometry) ) if percent_overlap < 50: drop_ids.append(id) print(f"Dropping {len(drop_ids)} HUC12s that do not sufficiently overlap input areas") huc12 = huc12.loc[~huc12.id.isin(drop_ids)].copy() # extract geographic bounds huc12_wgs84 = huc12.to_crs(GEO_CRS) huc12 = huc12.join(huc12_wgs84.bounds) # Save in EPSG:5070 for analysis huc12.to_feather(analysis_dir / "huc12.feather") write_dataframe(huc12, bnd_dir / "huc12.gpkg")