def export_duplicate_areas(dups, path): """Export duplicate barriers for QA. Parameters ---------- dups : GeoDataFrame contains "geometry" and "dup_group" to indicate group path : str or Path output path """ print("Exporting duplicate areas") dups = dups.copy() dups["geometry"] = pg.buffer(dups.geometry.values.data, dups.dup_tolerance) dissolved = dissolve(dups[["geometry", "dup_group"]], by="dup_group") groups = gp.GeoDataFrame( dups[["id", "SARPID", "dup_group"]] .groupby("dup_group") .agg({"SARPID": "unique", "id": "unique"}) .join(dissolved.geometry, on="dup_group"), crs=dups.crs, ) groups["id"] = groups.id.apply(lambda x: ", ".join([str(s) for s in x])) groups["SARPID"] = groups.SARPID.apply(lambda x: ", ".join([str(s) for s in x])) write_dataframe(groups, path)
# find contiguous groups for dissolve nhd_dams = nhd_dams.join(find_contiguous_groups(nhd_dams.geometry.values.data)) # fill in the isolated dams ix = nhd_dams.group.isnull() next_group = nhd_dams.group.max() + 1 nhd_dams.loc[ix, "group"] = next_group + np.arange(ix.sum()) nhd_dams.group = nhd_dams.group.astype("uint") print("Dissolving overlapping dams") nhd_dams = dissolve( explode(nhd_dams), by=["HUC2", "source", "group"], agg={ "GNIS_Name": lambda n: ", ".join({s for s in n if s}), # set missing NHD fields as 0 "FType": lambda n: ", ".join({str(s) for s in n}), "FCode": lambda n: ", ".join({str(s) for s in n}), "NHDPlusID": lambda n: ", ".join({str(s) for s in n}), }, ).reset_index(drop=True) # fill in missing values nhd_dams.GNIS_Name = nhd_dams.GNIS_Name.fillna("") nhd_dams.geometry = pg.make_valid(nhd_dams.geometry.values.data) nhd_dams["damID"] = nhd_dams.index.copy() nhd_dams.damID = nhd_dams.damID.astype("uint32") nhd_dams = nhd_dams.set_index("damID")
nwi = gp.read_feather(nwi_dir / huc2 / "waterbodies.feather") df = nhd[["geometry", "altered"]].append(nwi[["geometry", "altered"]]) altered = df.loc[df.altered].copy() if huc2 == "03": sc = gp.read_feather("data/states/sc/sc_waterbodies.feather", columns=[]) sc["altered"] = False # unknown df = df.append(sc[["geometry", "altered"]]) print(f"Dissolving {len(df):,} waterbodies...") dissolve_start = time() df["tmp"] = 1 df = dissolve(df, by="tmp").drop(columns=["tmp"]) df = explode(df).reset_index(drop=True) print(f"Now have {len(df):,} waterbodies ({time() - dissolve_start:,.2f}s)") # assign altered if any resulting polygons intersect altered polygons tree = pg.STRtree(df.geometry.values.data) left, right = tree.query_bulk(altered.geometry.values.data) df["altered"] = False df.loc[np.unique(right), "altered"] = True # cut at breaks from NHD nhd_lines_filename = nhd_dir / huc2 / "nhd_lines.feather" if nhd_lines_filename.exists(): print("Checking for breaks between adjacent waterbodies") nhd_lines = gp.read_feather(nhd_lines_filename).geometry.values.data breaks = find_nhd_waterbody_breaks(nhd.geometry.values.data, nhd_lines)
predicate="intersects") waterbodies = waterbodies.iloc[np.unique(left)].reset_index(drop=True) print(f"Kept {len(waterbodies):,} that intersect flowlines") # TODO: explode, repair, dissolve, explode, reset index waterbodies = explode(waterbodies) # make valid ix = ~pg.is_valid(waterbodies.geometry.values.data) if ix.sum(): print(f"Repairing {ix.sum():,} invalid waterbodies") waterbodies.loc[ix, "geometry"] = pg.make_valid( waterbodies.loc[ix].geometry.values.data) # note: nwi_code, nwi_type are discarded here since they aren't used later print("Dissolving adjacent waterbodies") waterbodies = dissolve(waterbodies, by=["altered"]) waterbodies = explode(waterbodies).reset_index(drop=True) waterbodies["km2"] = pg.area(waterbodies.geometry.values.data) / 1e6 waterbodies.to_feather(huc2_dir / "waterbodies.feather") write_dataframe(waterbodies, huc2_dir / "waterbodies.gpkg") ### Process riverine print(f"Extracted {len(rivers):,} NWI altered river polygons") left, right = tree.query_bulk(rivers.geometry.values.data, predicate="intersects") rivers = rivers.iloc[np.unique(left)].reset_index(drop=True) print(f"Kept {len(rivers):,} that intersect flowlines") rivers = explode(rivers)
ix = np.setdiff1d(intersects_ix, contains_ix) outer_huc4 = huc4_df.iloc[ix].copy() outer_huc4["km2"] = pg.area(outer_huc4.geometry.values.data) / 1e6 # calculate geometric difference, explode, and keep non-slivers outer_huc4["geometry"] = pg.difference(outer_huc4.geometry.values.data, state_merged) outer_huc4 = explode(outer_huc4) outer_huc4["clip_km2"] = pg.area(outer_huc4.geometry.values.data) / 1e6 outer_huc4["percent"] = 100 * outer_huc4.clip_km2 / outer_huc4.km2 keep_huc4 = outer_huc4.loc[outer_huc4.clip_km2 >= 100].HUC4.unique() outer_huc4 = outer_huc4.loc[outer_huc4.HUC4.isin(keep_huc4) & (outer_huc4.clip_km2 >= 2.5)].copy() outer_huc4 = dissolve(outer_huc4, by="HUC4", agg={ "HUC2": "first" }).reset_index(drop=True) outer_huc4.to_feather(out_dir / "outer_huc4.feather") write_dataframe(outer_huc4, out_dir / "outer_huc4.gpkg") ### Counties - within HUC4 bounds print("Processing counties") fips = sorted(state_df.STATEFIPS.unique()) county_df = (read_dataframe( county_filename, columns=["NAME", "GEOID", "STATEFP"], ).to_crs(CRS).rename(columns={ "NAME": "County", "GEOID": "COUNTYFIPS", "STATEFP": "STATEFIPS"