Ejemplo n.º 1
0
def snap_to_flowlines(df, to_snap):
    """Snap to nearest flowline, within tolerance

    Updates df with snapping results, and returns to_snap as set of dams still
    needing to be snapped after this operation.

    Parameters
    ----------
    df : GeoDataFrame
        master dataset, this is where all snapping gets recorded
    to_snap : DataFrame
        data frame containing pygeos geometries to snap ("geometry")
        and snapping tolerance ("snap_tolerance")

    Returns
    -------
    tuple of (GeoDataFrame, DataFrame)
        (df, to_snap)
    """

    for region, HUC2s in list(REGION_GROUPS.items()):
        region_start = time()

        print("\n----- {} ------\n".format(region))

        print("Reading flowlines...")
        flowlines = from_geofeather(
            nhd_dir / "clean" / region / "flowlines.feather"
        ).set_index("lineID")

        in_region = to_snap.loc[to_snap.HUC2.isin(HUC2s)]
        print(
            "Selected {:,} barriers in region to snap against {:,} flowlines".format(
                len(in_region), len(flowlines)
            )
        )

        if len(in_region) == 0:
            print("No barriers in region to snap")
            continue

        print("Finding nearest flowlines...")
        # TODO: can use near instead of nearest, and persist list of near lineIDs per barrier
        # so that we can construct subnetworks with just those
        lines = nearest(
            in_region.geometry, flowlines.geometry, in_region.snap_tolerance
        )
        lines = lines.join(in_region.geometry).join(
            flowlines.geometry.rename("line"), on="lineID",
        )

        # project the point to the line,
        # find out its distance on the line,
        # then interpolate its new coordinates
        lines["geometry"] = pg.line_interpolate_point(
            lines.line, pg.line_locate_point(lines.line, lines.geometry)
        )

        ix = lines.index
        df.loc[ix, "snapped"] = True
        df.loc[ix, "geometry"] = lines.geometry
        df.loc[ix, "snap_dist"] = lines.distance
        df.loc[ix, "snap_ref_id"] = lines.lineID
        df.loc[ix, "lineID"] = lines.lineID
        df.loc[ix, "snap_log"] = ndarray_append_strings(
            "snapped: within ",
            to_snap.loc[ix].snap_tolerance,
            "m tolerance of flowline",
        )

        to_snap = to_snap.loc[~to_snap.index.isin(ix)].copy()

        print(
            "{:,} barriers snapped in region in {:.2f}s".format(
                len(ix), time() - region_start
            )
        )

    # TODO: flag those that joined to loops

    return df, to_snap
from analysis.constants import (
    REGIONS,
    REGION_GROUPS,
    CRS,
    WATERBODY_EXCLUDE_FTYPES,
    WATERBODY_MIN_SIZE,
)
from analysis.util import append

src_dir = Path("data/nhd/source/huc4")
out_dir = Path("data/nhd/raw")

start = time()

# useful slices are [:2], [2:4], [4:]
for region, HUC2s in list(REGION_GROUPS.items())[4:]:
    print("\n----- {} ------\n".format(region))

    region_dir = out_dir / region
    if not os.path.exists(region_dir):
        os.makedirs(region_dir)

    # if os.path.exists(region_dir / "flowline.feather"):
    #     print("Skipping existing region {}".format(region))
    #     continue

    region_start = time()

    merged_flowlines = None
    merged_joins = None
    merged_waterbodies = None
Ejemplo n.º 3
0
# Dam, Gate, Lock Chamber, Waterfall
KEEP_FTYPES = [343, 369, 398, 487]

src_dir = Path("data/nhd/source/huc4")
nhd_dir = Path("data/nhd")

out_dir = nhd_dir / "extra"

if not os.path.exists(out_dir):
    os.makedirs(out_dir)

start = time()

merged = None
for region, HUC2s in REGION_GROUPS.items():
    print("\n----- {} ------\n".format(region))

    for HUC2 in HUC2s:
        for i in REGIONS[HUC2]:
            HUC4 = "{0}{1:02d}".format(HUC2, i)

            read_start = time()
            print("\n\n------------------- Reading {} -------------------".
                  format(HUC4))
            gdb = src_dir / HUC4 / "NHDPLUS_H_{HUC4}_HU4_GDB.gdb".format(
                HUC4=HUC4)

            df = gp.read_file(gdb, layer="NHDLine")
            df.NHDPlusID = df.NHDPlusID.astype("uint64")