from analysis.constants import ( REGIONS, REGION_GROUPS, CRS, WATERBODY_EXCLUDE_FTYPES, WATERBODY_MIN_SIZE, ) from analysis.util import append src_dir = Path("data/nhd/source/huc4") out_dir = Path("data/nhd/raw") start = time() # useful slices are [:2], [2:4], [4:] for region, HUC2s in list(REGION_GROUPS.items())[4:]: print("\n----- {} ------\n".format(region)) region_dir = out_dir / region if not os.path.exists(region_dir): os.makedirs(region_dir) # if os.path.exists(region_dir / "flowline.feather"): # print("Skipping existing region {}".format(region)) # continue region_start = time() merged_flowlines = None merged_joins = None merged_waterbodies = None
from pgpkg import to_gpkg from nhdnet.io import deserialize_dfs from analysis.constants import REGION_GROUPS, CRS_WKT data_dir = Path("data") networks_dir = data_dir / "networks" df = deserialize_dfs([ networks_dir / region / "small_barriers/barriers_network.feather" for region in REGION_GROUPS ], ) networkIDs = df.loc[df.kind == "small_barrier"].upNetID.unique() for region in list(REGION_GROUPS.keys()): print("\n----------------\n processing {}".format(region)) networks = from_geofeather(networks_dir / region / "small_barriers" / "network.feather") # Extract only the networks associated with small barriers, the rest are dams networks = networks.loc[networks.networkID.isin(networkIDs), ["networkID", "geometry"]] if len(networks) == 0: print("No small barriers in this region, skipping") continue print("Writing to GPKG") to_gpkg(
def snap_to_flowlines(df, to_snap): """Snap to nearest flowline, within tolerance Updates df with snapping results, and returns to_snap as set of dams still needing to be snapped after this operation. Parameters ---------- df : GeoDataFrame master dataset, this is where all snapping gets recorded to_snap : DataFrame data frame containing pygeos geometries to snap ("geometry") and snapping tolerance ("snap_tolerance") Returns ------- tuple of (GeoDataFrame, DataFrame) (df, to_snap) """ for region, HUC2s in list(REGION_GROUPS.items()): region_start = time() print("\n----- {} ------\n".format(region)) print("Reading flowlines...") flowlines = from_geofeather( nhd_dir / "clean" / region / "flowlines.feather" ).set_index("lineID") in_region = to_snap.loc[to_snap.HUC2.isin(HUC2s)] print( "Selected {:,} barriers in region to snap against {:,} flowlines".format( len(in_region), len(flowlines) ) ) if len(in_region) == 0: print("No barriers in region to snap") continue print("Finding nearest flowlines...") # TODO: can use near instead of nearest, and persist list of near lineIDs per barrier # so that we can construct subnetworks with just those lines = nearest( in_region.geometry, flowlines.geometry, in_region.snap_tolerance ) lines = lines.join(in_region.geometry).join( flowlines.geometry.rename("line"), on="lineID", ) # project the point to the line, # find out its distance on the line, # then interpolate its new coordinates lines["geometry"] = pg.line_interpolate_point( lines.line, pg.line_locate_point(lines.line, lines.geometry) ) ix = lines.index df.loc[ix, "snapped"] = True df.loc[ix, "geometry"] = lines.geometry df.loc[ix, "snap_dist"] = lines.distance df.loc[ix, "snap_ref_id"] = lines.lineID df.loc[ix, "lineID"] = lines.lineID df.loc[ix, "snap_log"] = ndarray_append_strings( "snapped: within ", to_snap.loc[ix].snap_tolerance, "m tolerance of flowline", ) to_snap = to_snap.loc[~to_snap.index.isin(ix)].copy() print( "{:,} barriers snapped in region in {:.2f}s".format( len(ix), time() - region_start ) ) # TODO: flag those that joined to loops return df, to_snap
# Dam, Gate, Lock Chamber, Waterfall KEEP_FTYPES = [343, 369, 398, 487] src_dir = Path("data/nhd/source/huc4") nhd_dir = Path("data/nhd") out_dir = nhd_dir / "extra" if not os.path.exists(out_dir): os.makedirs(out_dir) start = time() merged = None for region, HUC2s in REGION_GROUPS.items(): print("\n----- {} ------\n".format(region)) for HUC2 in HUC2s: for i in REGIONS[HUC2]: HUC4 = "{0}{1:02d}".format(HUC2, i) read_start = time() print("\n\n------------------- Reading {} -------------------". format(HUC4)) gdb = src_dir / HUC4 / "NHDPLUS_H_{HUC4}_HU4_GDB.gdb".format( HUC4=HUC4) df = gp.read_file(gdb, layer="NHDLine") df.NHDPlusID = df.NHDPlusID.astype("uint64")