def close_gaps(df, tolerance): """Close gaps in LineString geometry where it should be contiguous. Snaps both lines to a centroid of a gap in between. """ geom = df.geometry.values.data coords = pygeos.get_coordinates(geom) indices = pygeos.get_num_coordinates(geom) # generate a list of start and end coordinates and create point geometries edges = [0] i = 0 for ind in indices: ix = i + ind edges.append(ix - 1) edges.append(ix) i = ix edges = edges[:-1] points = pygeos.points(np.unique(coords[edges], axis=0)) buffered = pygeos.buffer(points, tolerance) dissolved = pygeos.union_all(buffered) exploded = [ pygeos.get_geometry(dissolved, i) for i in range(pygeos.get_num_geometries(dissolved)) ] centroids = pygeos.centroid(exploded) snapped = pygeos.snap(geom, pygeos.union_all(centroids), tolerance) return snapped
def close_gaps(gdf, tolerance): """Close gaps in LineString geometry where it should be contiguous. Snaps both lines to a centroid of a gap in between. Parameters ---------- gdf : GeoDataFrame, GeoSeries GeoDataFrame or GeoSeries containing LineString representation of a network. tolerance : float nodes within a tolerance will be snapped together Returns ------- GeoSeries See also -------- momepy.extend_lines momepy.remove_false_nodes """ geom = gdf.geometry.values.data coords = pygeos.get_coordinates(geom) indices = pygeos.get_num_coordinates(geom) # generate a list of start and end coordinates and create point geometries edges = [0] i = 0 for ind in indices: ix = i + ind edges.append(ix - 1) edges.append(ix) i = ix edges = edges[:-1] points = pygeos.points(np.unique(coords[edges], axis=0)) buffered = pygeos.buffer(points, tolerance / 2) dissolved = pygeos.union_all(buffered) exploded = [ pygeos.get_geometry(dissolved, i) for i in range(pygeos.get_num_geometries(dissolved)) ] centroids = pygeos.centroid(exploded) snapped = pygeos.snap(geom, pygeos.union_all(centroids), tolerance) return gpd.GeoSeries(snapped, crs=gdf.crs)
def get_input_area_boundary(input_area): """Extract and union polygons associated with input area into a single boundary (Multi)Polygon. Parameters ---------- input_area : str id of input area Returns ------- (Multi)Polygon """ # have to make valid or we get errors during union for FL values = [ e["value"] for e in INPUT_AREA_VALUES if input_area in set(e["id"].split(",")) ] inputs_df = gp.read_feather(bnd_dir / "input_areas.feather") bnd = pg.union_all( pg.make_valid( inputs_df.loc[inputs_df.value.isin(values)].geometry.values.data)) return bnd
def setup(self): # create irregular polygons by merging overlapping point buffers self.left = pygeos.union_all( pygeos.buffer(pygeos.points(np.random.random((500, 2)) * 500), 15) ) # shift this up and right self.right = pygeos.apply(self.left, lambda x: x + 50)
def unary_union(data): if compat.USE_PYGEOS: return _pygeos_to_shapely(pygeos.union_all(data)) else: data = [g for g in data if g is not None] if data: return shapely.ops.unary_union(data) else: return None
def setup(self): # create irregular polygons by merging overlapping point buffers self.polygon = pygeos.union_all( pygeos.buffer(pygeos.points(np.random.random((1000, 2)) * 500), 10)) xmin = np.random.random(100) * 100 xmax = xmin + 100 ymin = np.random.random(100) * 100 ymax = ymin + 100 self.bounds = np.array([xmin, ymin, xmax, ymax]).T self.boxes = pygeos.box(xmin, ymin, xmax, ymax)
def _pandas(cls, column, **kwargs): shape = kwargs.get("shape") shape_format = kwargs.get("shape_format") column_shape_format = kwargs.get("column_shape_format") # Check that shape is given and given in the correct format if shape is not None: try: if shape_format == "wkt": shape_ref = geos.from_wkt(shape) elif shape_format == "wkb": shape_ref = geos.from_wkb(shape) elif shape_format == "geojson": shape_ref = geos.from_geojson(shape) else: raise NotImplementedError( "Shape constructor method not implemented. Must be in WKT, WKB, or GeoJSON format." ) except: raise Exception("A valid reference shape was not given.") else: raise Exception("A shape must be provided for this method.") # Load the column into a pygeos Geometry vector from numpy array (Series not supported). if column_shape_format == "wkt": shape_test = geos.from_wkt(column.to_numpy(), on_invalid="ignore") elif column_shape_format == "wkb": shape_test = geos.from_wkb(column.to_numpy(), on_invalid="ignore") else: raise NotImplementedError( "Column values shape format not implemented.") # Allow for an array of reference shapes to be provided. Return a union of all the shapes in the array (Polygon or Multipolygon) shape_ref = geos.union_all(shape_ref) # Prepare the geometries geos.prepare(shape_ref) geos.prepare(shape_test) column_centroids = geos.centroid(shape_test) print(column_centroids) return pd.Series(geos.within(column_centroids, shape_ref))
def _pandas(cls, column, **kwargs): column_shape_format = kwargs.get("column_shape_format") # Load the column into a pygeos Geometry vector from numpy array (Series not supported). if column_shape_format == "wkt": shape_test = geos.from_wkt(column.to_numpy(), on_invalid="ignore") elif column_shape_format == "wkb": shape_test = geos.from_wkb(column.to_numpy(), on_invalid="ignore") elif column_shape_format == "xy": shape_df = pd.DataFrame(column.to_list(), columns=("x", "y")) shape_test = geos.points(shape_df.lon, y=shape_df.lat) else: raise NotImplementedError( "Column values shape format not implemented.") shape_test = geos.union_all(shape_test) radius = geos.minimum_bounding_radius(shape_test) return radius
def setup(self): # create irregular polygons my merging overlapping point buffers self.polygons = pygeos.get_parts( pygeos.union_all( pygeos.buffer(pygeos.points(np.random.random((2000, 2)) * 500), 5))) self.tree = pygeos.STRtree(self.polygons) # initialize the tree by making a tiny query first self.tree.query(pygeos.points(0, 0)) # create points that extend beyond the domain of the above polygons to ensure # some don't overlap self.points = pygeos.points((np.random.random((2000, 2)) * 750) - 125) self.point_tree = pygeos.STRtree( pygeos.points(np.random.random((2000, 2)) * 750)) self.point_tree.query(pygeos.points(0, 0)) # create points on a grid for testing equidistant nearest neighbors # creates 2025 points grid_coords = np.mgrid[:45, :45].T.reshape(-1, 2) self.grid_point_tree = pygeos.STRtree(pygeos.points(grid_coords)) self.grid_points = pygeos.points(grid_coords + 0.5)
async def create_custom_report(ctx, zip_filename, dataset, layer, name=""): """Create a Blueprint report for a user-uploaded GIS file contained in a zip. Zip must contain either a shapefile or a file geodatabase. Parameters ---------- ctx : job context zip_filename : str full path to zip filename dataset : str full path to dataset within zip file layer : str name of layer within dataset name : str, optional (default: "") Name of area of interest (included in output report) Returns ------- str path to output file Raises ------ DataError Raised if bounds are too large or if area of interest doesn't overalap SA region """ errors = [] await set_progress(ctx["job_id"], 0, "Loading data") path = f"/vsizip/{zip_filename}/{dataset}" df = read_dataframe(path, layer=layer) geometry = pg.make_valid(df.geometry.values.data) await set_progress(ctx["job_id"], 5, "Preparing area of interest") # dissolve geometry = np.asarray([pg.union_all(geometry)]) geo_geometry = to_crs(geometry, df.crs, GEO_CRS) bounds = pg.total_bounds(geo_geometry) # estimate area extent_area = ( pg.area(pg.box(*pg.total_bounds(to_crs(geometry, df.crs, DATA_CRS)))) * M2_ACRES) if extent_area >= CUSTOM_REPORT_MAX_ACRES: raise DataError( f"The bounding box of your area of interest is too large ({extent_area:,.0f} acres), it must be < {CUSTOM_REPORT_MAX_ACRES:,.0f} acres." ) await set_progress(ctx["job_id"], 10, "Calculating results (this might take a while)") # calculate results, data must be in DATA_CRS print("Calculating results...") results = CustomArea(geometry, df.crs, name).get_results() if results is None: raise DataError( "area of interest does not overlap Southeast Blueprint") if name: results["name"] = name has_urban = "proj_urban" in results and results["proj_urban"][4] > 0 has_slr = "slr" in results has_ownership = "ownership" in results has_protection = "protection" in results # compile indicator IDs across all inputs indicators = [] for input_area in results["inputs"]: for ecosystem in input_area.get("ecosystems", []): indicators.extend([i["id"] for i in ecosystem["indicators"]]) await set_progress(ctx["job_id"], 25, "Creating maps (this might take a while)") print("Rendering maps...") maps, scale, map_errors = await render_maps( bounds, geometry=geo_geometry[0], input_ids=results["input_ids"], indicators=indicators, urban=has_urban, slr=has_slr, ownership=has_ownership, protection=has_protection, ) if map_errors: log.error(f"Map rendering errors: {map_errors}") if "basemap" in map_errors: errors.append("Error creating basemap for all maps") if "aoi" in map_errors: errors.append("Error rendering area of interest on maps") if set(map_errors.keys()).difference(["basemap", "aoi"]): errors.append("Error creating one or more maps") await set_progress(ctx["job_id"], 75, "Creating PDF (this might take a while)", errors=errors) results["scale"] = scale pdf = create_report(maps=maps, results=results) await set_progress(ctx["job_id"], 95, "Nearly done", errors=errors) fp, name = tempfile.mkstemp(suffix=".pdf", dir=TEMP_DIR) with open(fp, "wb") as out: out.write(pdf) await set_progress(ctx["job_id"], 100, "All done!", errors=errors) log.debug(f"Created PDF at: {name}") return name, errors
import rasterio import geopandas as gp import pygeos as pg from analysis.constants import DATA_CRS from analysis.lib.raster import add_overviews data_dir = Path("data/inputs") src_dir = data_dir / "threats/slr" boxes = [] for filename in (src_dir).glob("*.tif"): with rasterio.open(filename) as src: boxes.append(pg.box(*src.bounds)) # union them together into a single polygon bnd = pg.union_all(boxes) df = gp.GeoDataFrame({"geometry": [bnd], "index": [0]}, crs=DATA_CRS) df.to_feather(src_dir / "slr_bounds.feather") # For debugging # write_dataframe(df, "/tmp/slr_bounds.gpkg", driver="GPKG") # Create overviews for each individual file in the VRT # Note: these have varying resolution, but this creates lower resolutions for each print("Adding overviews to SLR files...") for filename in src_dir.glob("*.tif"): print(f"Processing {filename}...") add_overviews(filename)
message=".*initial implementation of Parquet.*") from analysis.constants import DATA_CRS, GEO_CRS, M2_ACRES src_dir = Path("source_data") data_dir = Path("data") analysis_dir = data_dir / "inputs/summary_units" bnd_dir = data_dir / "boundaries" # GPKGs output for reference tile_dir = data_dir / "for_tiles" ### Extract the boundary sa_df = read_dataframe(src_dir / "boundaries/SABlueprint2020_Extent.shp")[["geometry"]] # boundary has self-intersections and 4 geometries, need to clean up bnd = pg.union_all(pg.make_valid(sa_df.geometry.values.data)) ### Extract HUC12 within boundary print("Reading source HUC12s...") merged = None for huc2 in [2, 3, 5, 6]: df = read_dataframe( src_dir / f"summary_units/WBD_0{huc2}_HU2_GDB/WBD_0{huc2}_HU2_GDB.gdb", layer="WBDHU12", )[["huc12", "name", "geometry"]].rename(columns={"huc12": "id"}) if merged is None: merged = df else: merged = merged.append(df, ignore_index=True)
connectors_outfilename = out_dir / "ns_connectors.tif" tnc_outfilename = out_dir / "tnc_resilient_connected.tif" ns_outfilename = out_dir / "ns_priority.tif" if not out_dir.exists(): os.makedirs(out_dir) ### Get input area mask print("Extracting NatureScape input area mask...") mask, transform, window = get_input_area_mask("app") inputs_df = gp.read_feather(data_dir / "boundaries/input_areas.feather") values = [ e["value"] for e in INPUT_AREA_VALUES if "app" in set(e["id"].split(",")) ] bnd = pg.union_all( inputs_df.loc[inputs_df.value.isin(values)].geometry.values.data) ### Warp TNC resilient and connected landscapes to match Blueprint input area print("Reading and warping TNC resilient and connected landscapes...") with rasterio.open(src_dir / "Resilient_and_Connected20180308.tif") as rc: vrt = WarpedVRT( rc, width=window.width, height=window.height, nodata=int(rc.nodata), transform=transform, crs=DATA_CRS, resampling=Resampling.nearest, ) data = vrt.read()[0]
def union_or_combine(geometries, grid_size=None, op="union"): """First does a check for overlap of geometries according to STRtree intersects. If any overlap, then will use union_all on all of them; otherwise will return as a multipolygon. If only one polygon is present, it will be returned in a MultiPolygon. If coverage_union op is provided, geometries must be polygons and topologically related or this will produce bad output or fail outright. See docs for coverage_union in GEOS. Parameters ---------- geometries : ndarray of single part polygons grid_size : [type], optional (default: None) provided to union_all; otherwise no effect op : str, one of {'union', 'coverage_union'} Returns ------- MultiPolygon """ if not (pg.get_type_id(geometries) == 3).all(): print("Inputs to union or combine must be single-part geometries") if len(geometries) == 1: return pg.multipolygons(geometries) tree = pg.STRtree(geometries) left, right = tree.query_bulk(geometries, predicate="intersects") # drop self intersections ix = left != right left = left[ix] right = right[ix] # no intersections, just combine parts if len(left) == 0: return pg.multipolygons(geometries) # find groups of contiguous geometries and union them together individually contiguous = np.sort(np.unique(np.concatenate([left, right]))) discontiguous = np.setdiff1d(np.arange(len(geometries), dtype="uint"), contiguous) groups = find_adjacent_groups(left, right) parts = [] if op == "coverage_union": for group in groups: parts.extend( pg.get_parts(pg.coverage_union_all(geometries[list(group)]))) else: for group in groups: parts.extend( pg.get_parts( pg.union_all(geometries[list(group)], grid_size=grid_size))) parts.extend(pg.get_parts(geometries[discontiguous])) return pg.multipolygons(parts)
columns=["ls_cond"]).rename(columns=field_map).rename( columns={"chat_rank": "chatrank"})) for col in chat_fields: df[col] = df[col].astype("uint8") df = df.drop(columns=["hexagon_id"]) ### Find the CHAT units that intersect with OK / TX input areas # Use centerpoints, since input area roughly follows edges of hexes points = pg.centroid(df.geometry.values.data) tree = pg.STRtree(points) for state in ["ok", "tx"]: print(f"Processing {state} CHAT...") input_area = pg.union_all( inputs_df.loc[inputs_df.inputs == f"{state}chat"].geometry.values.data) ix = tree.query(input_area, predicate="intersects") state_df = df.iloc[ix].reset_index(drop=True) # lcon not present for TX if state == "tx": state_df = state_df.drop(columns=["lcon"]) # Reclassify chatrank to match blueprint integration rules. # First shift other values up one ix = state_df.chatrank >= 2 state_df.loc[ix, "chatrank"] = state_df.chatrank + 1 # for any that were previously a chatrank of 2 but with higher values of aquatic or # terrestrial, map them back to 2 ix = ((state_df.chatrank == 3) & state_df.arank.isin([2, 3])
# {"name": "Cave Spring, VA area", "path": "CaveSpring"}, # {"name": "South Atlantic Offshore", "path": "SAOffshore"}, # {"name": "Florida Offshore", "path": "FLOffshore"} ] for aoi in aois: name = aoi["name"] path = aoi["path"] print(f"Creating report for {name}...") start = time() df = read_dataframe(f"examples/{path}.shp", columns=[]) geometry = pg.make_valid(df.geometry.values.data) # dissolve geometry = np.asarray([pg.union_all(geometry)]) extent_area = ( pg.area(pg.box(*pg.total_bounds(to_crs(geometry, df.crs, DATA_CRS)))) * M2_ACRES) print("Area of extent", extent_area.round()) ### calculate results, data must be in DATA_CRS print("Calculating results...") results = CustomArea(geometry, df.crs, name=name).get_results() if results is None: print(f"AOI: {path} does not overlap Blueprint") continue out_dir = Path("/tmp/aoi") / path
def find_nhd_waterbody_breaks(geometries, nhd_lines): """Some large waterbody complexes are divided by dams; these breaks need to be preserved. This is done by finding the shared edges between adjacent waterbodies that fall near NHD lines (which include dams) and buffering them by 10 meters (arbitrary, from trial and error). This should be skipped if nhd_lines is empty. Parameters ---------- df : GeoDataFrame nhd_lines : GeoDataFrame Returns ------- MultiPolygon containing all buffered lines between waterbodies that are near NHD lines. Returns None if no adjacent waterbodies meet these criteria """ # find all nhd lines that intersect waterbodies # first, buffer them slightly nhd_lines = pg.get_parts(pg.union_all(pg.buffer(nhd_lines, 0.1))) tree = pg.STRtree(geometries) left, right = tree.query_bulk(nhd_lines, predicate="intersects") # add these to the return keep_nhd_lines = nhd_lines[np.unique(left)] # find connected boundaries boundaries = pg.polygons(pg.get_exterior_ring(geometries)) tree = pg.STRtree(boundaries) left, right = tree.query_bulk(boundaries, predicate="intersects") # drop self intersections ix = left != right left = left[ix] right = right[ix] # extract unique pairs (dedup symmetric pairs) pairs = np.array([left, right]).T pairs = ( pd.DataFrame({"left": pairs.min(axis=1), "right": pairs.max(axis=1)}) .groupby(["left", "right"]) .first() .reset_index() ) # calculate geometric intersection i = pg.intersection( geometries.take(pairs.left.values), geometries.take(pairs.right.values) ) # extract individual parts (may be geom collections) parts = pg.get_parts(pg.get_parts(pg.get_parts(i))) # extract only the lines or polygons t = pg.get_type_id(parts) parts = parts[((t == 1) | (t == 3)) & (~pg.is_empty(parts))].copy() # buffer and merge split_lines = pg.get_parts(pg.union_all(pg.buffer(parts, 10))) # now find the ones that are within 100m of nhd lines nhd_lines = pg.get_parts(nhd_lines) tree = pg.STRtree(nhd_lines) left, right = tree.nearest_all(split_lines, max_distance=100) split_lines = split_lines[np.unique(left)] if len(split_lines) or len(keep_nhd_lines): return pg.union_all(np.append(split_lines, keep_nhd_lines)) return None
from analysis.lib.pygeos_util import explode src_dir = Path("source_data") data_dir = Path("data") out_dir = data_dir / "inputs/boundaries" # used as inputs for other steps tile_dir = data_dir / "for_tiles" sa_df = read_dataframe(src_dir / "boundaries/SABlueprint2020_Extent.shp") ### Create mask by cutting SA bounds out of world bounds print("Creating mask...") world = pg.box(-180, -85, 180, 85) # boundary has self-intersections and 4 geometries, need to clean up bnd = pg.union_all(pg.make_valid(sa_df.geometry.values.data)) bnd_geo = pg.union_all( pg.make_valid(sa_df.to_crs(GEO_CRS).geometry.values.data)) mask = pg.normalize(pg.difference(world, bnd_geo)) gp.GeoDataFrame(geometry=[bnd], crs=DATA_CRS).to_feather(out_dir / "sa_boundary.feather") write_dataframe( gp.GeoDataFrame({"geometry": bnd_geo}, index=[0], crs=GEO_CRS), tile_dir / "sa_boundary.geojson", driver="GeoJSONSeq", ) write_dataframe( gp.GeoDataFrame({"geometry": mask}, index=[0], crs=GEO_CRS),
def test_union_all_prec(geom, grid_size, expected): actual = pygeos.union_all(geom, grid_size=grid_size) assert pygeos.equals(actual, expected)
def time_union_all_prec2(self): pygeos.union_all([self.left, self.right], grid_size=2)
def unary_union(data): if compat.USE_PYGEOS: return _pygeos_to_shapely(pygeos.union_all(data)) else: return shapely.ops.unary_union(data)
def convex_hull_all(arr): if isinstance(arr, LazyObj): arr = arr.values() points = pg.union_all(pg.extract_unique_points(pg.from_wkb(arr))) return pg.to_wkb(pg.convex_hull(points))
def union_all(arr): if isinstance(arr, LazyObj): arr = arr.values() return pg.union_all(from_wkb(arr))
def time_union_all(self): pygeos.union_all([self.left, self.right])
tile_dir = data_dir / "for_tiles" if not out_dir.exists(): os.makedirs(out_dir) if not tile_dir.exists(): os.makedirs(tile_dir) ### Extract the boundary bnd_df = read_dataframe( src_dir / "blueprint/SE_Blueprint_2021_Vectors.gdb", layer="SECAS_Boundary_2021_20211117", )[["geometry"]] # boundary has multiple geometries, union together and cleanup bnd_df = gp.GeoDataFrame( geometry=[pg.union_all(pg.make_valid(bnd_df.geometry.values.data))], index=[0], crs=bnd_df.crs, ) bnd_df.to_feather(out_dir / "se_boundary.feather") write_dataframe(bnd_df, data_dir / "boundaries/se_boundary.fgb") # create GeoJSON for tiling bnd_geo = bnd_df.to_crs(GEO_CRS) write_dataframe(bnd_geo, tile_dir / "se_boundary.geojson", driver="GeoJSONSeq") ### Create mask by cutting SA bounds out of world bounds print("Creating mask...") world = pg.box(-180, -85, 180, 85) mask = pg.normalize(pg.difference(world, bnd_geo.geometry.values.data))
# save all states for spatial joins state_df.to_feather(out_dir / "states.feather") state_df = state_df.loc[state_df.id.isin(STATES.keys())].copy() state_df.to_feather(out_dir / "region_states.feather") write_dataframe( state_df[["State", "geometry"]].rename(columns={"State": "id"}), out_dir / "region_states.gpkg", ) # dissolve to create outer state boundary for total analysis area and regions bnd_df = gp.GeoDataFrame( [ { "geometry": pg.union_all(state_df.geometry.values.data), "id": "total" }, ] + [{ "geometry": pg.union_all(state_df.loc[state_df.id.isin( REGION_STATES[region])].geometry.values.data), "id": region, } for region in REGION_STATES], crs=CRS, ) write_dataframe(bnd_df, out_dir / "region_boundary.gpkg") bnd_df.to_feather(out_dir / "region_boundary.feather") bnd = bnd_df.geometry.values.data[0]
def remove_false_nodes(gdf): """ Clean topology of existing LineString geometry by removal of nodes of degree 2. Parameters ---------- gdf : GeoDataFrame, GeoSeries, array of pygeos geometries (Multi)LineString data of street network Returns ------- gdf : GeoDataFrame, GeoSeries See also -------- momepy.extend_lines momepy.close_gaps """ if isinstance(gdf, (gpd.GeoDataFrame, gpd.GeoSeries)): # explode to avoid MultiLineStrings # double reset index due to the bug in GeoPandas explode df = gdf.reset_index(drop=True).explode().reset_index(drop=True) # get underlying pygeos geometry geom = df.geometry.values.data else: geom = gdf # extract array of coordinates and number per geometry coords = pygeos.get_coordinates(geom) indices = pygeos.get_num_coordinates(geom) # generate a list of start and end coordinates and create point geometries edges = [0] i = 0 for ind in indices: ix = i + ind edges.append(ix - 1) edges.append(ix) i = ix edges = edges[:-1] points = pygeos.points(np.unique(coords[edges], axis=0)) # query LineString geometry to identify points intersecting 2 geometries tree = pygeos.STRtree(geom) inp, res = tree.query_bulk(points, predicate="intersects") unique, counts = np.unique(inp, return_counts=True) merge = res[np.isin(inp, unique[counts == 2])] if len(merge) > 0: # filter duplications and create a dictionary with indication of components to # be merged together dups = [ item for item, count in collections.Counter(merge).items() if count > 1 ] split = np.split(merge, len(merge) / 2) components = {} for i, a in enumerate(split): if a[0] in dups or a[1] in dups: if a[0] in components.keys(): i = components[a[0]] elif a[1] in components.keys(): i = components[a[1]] components[a[0]] = i components[a[1]] = i # iterate through components and create new geometries new = [] for c in set(components.values()): keys = [] for item in components.items(): if item[1] == c: keys.append(item[0]) new.append(pygeos.line_merge(pygeos.union_all(geom[keys]))) # remove incorrect geometries and append fixed versions df = df.drop(merge) final = gpd.GeoSeries(new).explode().reset_index(drop=True) if isinstance(gdf, gpd.GeoDataFrame): return df.append( gpd.GeoDataFrame({df.geometry.name: final}, geometry=df.geometry.name), ignore_index=True, ) return df.append(final, ignore_index=True)
def enclosures(primary_barriers, limit=None, additional_barriers=None, enclosure_id="eID"): """ Generate enclosures based on passed barriers. Enclosures are areas enclosed from all sides by at least one type of a barrier. Barriers are typically roads, railways, natural features like rivers and other water bodies or coastline. Enclosures are a result of polygonization of the ``primary_barrier`` and ``limit`` and its subdivision based on additional_barriers. Parameters ---------- primary_barriers : GeoDataFrame, GeoSeries GeoDataFrame or GeoSeries containing primary barriers. (Multi)LineString geometry is expected. limit : GeoDataFrame, GeoSeries (default None) GeoDataFrame or GeoSeries containing external limit of enclosures, i.e. the area which gets partitioned. If None is passed, the internal area of ``primary_barriers`` will be used. additional_barriers : GeoDataFrame GeoDataFrame or GeoSeries containing additional barriers. (Multi)LineString geometry is expected. enclosure_id : str (default 'eID') name of the enclosure_id (to be created). Returns ------- enclosures : GeoDataFrame GeoDataFrame containing enclosure geometries and enclosure_id Examples -------- >>> enclosures = mm.enclosures(streets, admin_boundary, [railway, rivers]) """ if limit is not None: if limit.geom_type.isin(["Polygon", "MultiPolygon"]).any(): limit = limit.boundary barriers = pd.concat([primary_barriers.geometry, limit.geometry]) else: barriers = primary_barriers unioned = barriers.unary_union polygons = polygonize(unioned) enclosures = gpd.GeoSeries(list(polygons), crs=primary_barriers.crs) if additional_barriers is not None: if not isinstance(additional_barriers, list): raise TypeError( "`additional_barriers` expects a list of GeoDataFrames or GeoSeries." f"Got {type(additional_barriers)}.") additional = pd.concat([gdf.geometry for gdf in additional_barriers]) inp, res = enclosures.sindex.query_bulk(additional.geometry, predicate="intersects") unique = np.unique(res) new = [] for i in unique: poly = enclosures.values.data[i] # get enclosure polygon crossing = inp[res == i] # get relevant additional barriers buf = pygeos.buffer(poly, 0.01) # to avoid floating point errors crossing_ins = pygeos.intersection( buf, additional.values.data[crossing] ) # keeping only parts of additional barriers within polygon union = pygeos.union_all( np.append(crossing_ins, pygeos.boundary(poly))) # union polygons = np.array(list(polygonize( _pygeos_to_shapely(union)))) # polygonize within = pygeos.covered_by( pygeos.from_shapely(polygons), buf) # keep only those within original polygon new += list(polygons[within]) final_enclosures = (gpd.GeoSeries(enclosures).drop(unique).append( gpd.GeoSeries(new)).reset_index(drop=True)).set_crs( primary_barriers.crs) return gpd.GeoDataFrame({enclosure_id: range(len(final_enclosures))}, geometry=final_enclosures) return gpd.GeoDataFrame({enclosure_id: range(len(enclosures))}, geometry=enclosures)
marine = atl.append(gulf, ignore_index=True) marine["id"] = marine.PROT_NUMBE.str.strip() + "-" + marine.BLOCK_NUMB.str.strip() marine["name"] = ( marine.PROT_NUMBE.str.strip() + ": Block " + marine.BLOCK_NUMB.str.strip() ) # there are a couple blocks without proper names and 0 area; drop them marine = marine[["id", "name", "geometry"]].dropna().to_crs(DATA_CRS) # some blocks have multiple parts, merge them grouped = marine.groupby("id") # save as DataFrame instead of GeoDataFrame for easier processing marine = pd.DataFrame( grouped.geometry.apply(lambda g: g.values.data).apply( lambda g: pg.union_all(g) if len(g) > 1 else g[0] ) ).join(grouped.name.first()) # coerce all to MultiPolygons ix = pg.get_type_id(marine.geometry.values) == 3 marine.loc[ix, "geometry"] = marine.loc[ix].geometry.apply( lambda g: pg.multipolygons([g]) ) marine = ( gp.GeoDataFrame(marine, geometry="geometry", crs=DATA_CRS) .reset_index() .rename(columns={"index": "id"}) )