Exemple #1
0
def export_duplicate_areas(dups, path):
    """Export duplicate barriers for QA.

    Parameters
    ----------
    dups : GeoDataFrame
        contains "geometry" and "dup_group"
        to indicate group
    path : str or Path
        output path
    """

    print("Exporting duplicate areas")

    dups = dups.copy()
    dups["geometry"] = pg.buffer(dups.geometry.values.data, dups.dup_tolerance)
    dissolved = dissolve(dups[["geometry", "dup_group"]], by="dup_group")
    groups = gp.GeoDataFrame(
        dups[["id", "SARPID", "dup_group"]]
        .groupby("dup_group")
        .agg({"SARPID": "unique", "id": "unique"})
        .join(dissolved.geometry, on="dup_group"),
        crs=dups.crs,
    )
    groups["id"] = groups.id.apply(lambda x: ", ".join([str(s) for s in x]))
    groups["SARPID"] = groups.SARPID.apply(lambda x: ", ".join([str(s) for s in x]))
    write_dataframe(groups, path)
# find contiguous groups for dissolve
nhd_dams = nhd_dams.join(find_contiguous_groups(nhd_dams.geometry.values.data))
# fill in the isolated dams
ix = nhd_dams.group.isnull()
next_group = nhd_dams.group.max() + 1
nhd_dams.loc[ix, "group"] = next_group + np.arange(ix.sum())
nhd_dams.group = nhd_dams.group.astype("uint")

print("Dissolving overlapping dams")
nhd_dams = dissolve(
    explode(nhd_dams),
    by=["HUC2", "source", "group"],
    agg={
        "GNIS_Name": lambda n: ", ".join({s for s in n if s}),
        # set missing NHD fields as 0
        "FType": lambda n: ", ".join({str(s) for s in n}),
        "FCode": lambda n: ", ".join({str(s) for s in n}),
        "NHDPlusID": lambda n: ", ".join({str(s) for s in n}),
    },
).reset_index(drop=True)

# fill in missing values
nhd_dams.GNIS_Name = nhd_dams.GNIS_Name.fillna("")

nhd_dams.geometry = pg.make_valid(nhd_dams.geometry.values.data)

nhd_dams["damID"] = nhd_dams.index.copy()
nhd_dams.damID = nhd_dams.damID.astype("uint32")

nhd_dams = nhd_dams.set_index("damID")
Exemple #3
0
    nwi = gp.read_feather(nwi_dir / huc2 / "waterbodies.feather")

    df = nhd[["geometry", "altered"]].append(nwi[["geometry", "altered"]])

    altered = df.loc[df.altered].copy()

    if huc2 == "03":
        sc = gp.read_feather("data/states/sc/sc_waterbodies.feather", columns=[])
        sc["altered"] = False  # unknown
        df = df.append(sc[["geometry", "altered"]])

    print(f"Dissolving {len(df):,} waterbodies...")
    dissolve_start = time()
    df["tmp"] = 1
    df = dissolve(df, by="tmp").drop(columns=["tmp"])
    df = explode(df).reset_index(drop=True)
    print(f"Now have {len(df):,} waterbodies ({time() - dissolve_start:,.2f}s)")

    # assign altered if any resulting polygons intersect altered polygons
    tree = pg.STRtree(df.geometry.values.data)
    left, right = tree.query_bulk(altered.geometry.values.data)
    df["altered"] = False
    df.loc[np.unique(right), "altered"] = True

    # cut at breaks from NHD
    nhd_lines_filename = nhd_dir / huc2 / "nhd_lines.feather"
    if nhd_lines_filename.exists():
        print("Checking for breaks between adjacent waterbodies")
        nhd_lines = gp.read_feather(nhd_lines_filename).geometry.values.data
        breaks = find_nhd_waterbody_breaks(nhd.geometry.values.data, nhd_lines)
Exemple #4
0
                                  predicate="intersects")
    waterbodies = waterbodies.iloc[np.unique(left)].reset_index(drop=True)
    print(f"Kept {len(waterbodies):,} that intersect flowlines")

    # TODO: explode, repair, dissolve, explode, reset index
    waterbodies = explode(waterbodies)
    # make valid
    ix = ~pg.is_valid(waterbodies.geometry.values.data)
    if ix.sum():
        print(f"Repairing {ix.sum():,} invalid waterbodies")
        waterbodies.loc[ix, "geometry"] = pg.make_valid(
            waterbodies.loc[ix].geometry.values.data)

    # note: nwi_code, nwi_type are discarded here since they aren't used later
    print("Dissolving adjacent waterbodies")
    waterbodies = dissolve(waterbodies, by=["altered"])
    waterbodies = explode(waterbodies).reset_index(drop=True)

    waterbodies["km2"] = pg.area(waterbodies.geometry.values.data) / 1e6

    waterbodies.to_feather(huc2_dir / "waterbodies.feather")
    write_dataframe(waterbodies, huc2_dir / "waterbodies.gpkg")

    ### Process riverine
    print(f"Extracted {len(rivers):,} NWI altered river polygons")
    left, right = tree.query_bulk(rivers.geometry.values.data,
                                  predicate="intersects")
    rivers = rivers.iloc[np.unique(left)].reset_index(drop=True)
    print(f"Kept {len(rivers):,} that intersect flowlines")

    rivers = explode(rivers)
ix = np.setdiff1d(intersects_ix, contains_ix)

outer_huc4 = huc4_df.iloc[ix].copy()
outer_huc4["km2"] = pg.area(outer_huc4.geometry.values.data) / 1e6

# calculate geometric difference, explode, and keep non-slivers
outer_huc4["geometry"] = pg.difference(outer_huc4.geometry.values.data,
                                       state_merged)
outer_huc4 = explode(outer_huc4)
outer_huc4["clip_km2"] = pg.area(outer_huc4.geometry.values.data) / 1e6
outer_huc4["percent"] = 100 * outer_huc4.clip_km2 / outer_huc4.km2
keep_huc4 = outer_huc4.loc[outer_huc4.clip_km2 >= 100].HUC4.unique()
outer_huc4 = outer_huc4.loc[outer_huc4.HUC4.isin(keep_huc4)
                            & (outer_huc4.clip_km2 >= 2.5)].copy()
outer_huc4 = dissolve(outer_huc4, by="HUC4", agg={
    "HUC2": "first"
}).reset_index(drop=True)
outer_huc4.to_feather(out_dir / "outer_huc4.feather")
write_dataframe(outer_huc4, out_dir / "outer_huc4.gpkg")

### Counties - within HUC4 bounds
print("Processing counties")
fips = sorted(state_df.STATEFIPS.unique())

county_df = (read_dataframe(
    county_filename,
    columns=["NAME", "GEOID", "STATEFP"],
).to_crs(CRS).rename(columns={
    "NAME": "County",
    "GEOID": "COUNTYFIPS",
    "STATEFP": "STATEFIPS"