Exemple #1
0
def is_ring(data):
    if "Polygon" in geom_type(data):
        warnings.warn(
            "is_ring currently returns True for Polygons, which is not correct. "
            "This will be corrected to False in a future release.",
            FutureWarning,
            stacklevel=3,
        )
    if compat.USE_PYGEOS:
        return pygeos.is_ring(data) | pygeos.is_ring(
            pygeos.get_exterior_ring(data))
    # for polygons operates on the exterior, so can't use _unary_op()
    results = []
    for geom in data:
        if geom is None or geom.type not in [
                "Polygon",
                "LineString",
                "LinearRing",
        ]:
            results.append(False)
        elif geom.type == "Polygon":
            results.append(geom.exterior.is_ring)
        else:
            results.append(geom.is_ring)
    return np.array(results, dtype=bool)
    def get_ring_coords(polygon):
        # outer ring must be reversed to be counterclockwise[::-1]
        coords = [pg.get_coordinates(pg.get_exterior_ring(polygon)).tolist()]
        for i in range(pg.get_num_interior_rings(polygon)):
            # inner rings must be reversed to be clockwise[::-1]
            coords.append(
                pg.get_coordinates(pg.get_interior_ring(polygon, i)).tolist())

        return coords
Exemple #3
0
def test_get_rings(geom):
    if (pygeos.get_type_id(geom) !=
            pygeos.GeometryType.POLYGON) or pygeos.is_empty(geom):
        rings = pygeos.get_rings(geom)
        assert len(rings) == 0
    else:
        rings = pygeos.get_rings(geom)
        assert len(rings) == 1
        assert rings[0] == pygeos.get_exterior_ring(geom)
Exemple #4
0
def is_ring(data):
    if compat.USE_PYGEOS:
        return pygeos.is_ring(pygeos.get_exterior_ring(data))
    else:
        # operates on the exterior, so can't use _unary_op()
        # XXX needed to change this because there is now a geometry collection
        # in the shapely ones that was something else before?
        return np.array(
            [
                geom.exterior.is_ring if geom is not None and hasattr(
                    geom, "exterior") and geom.exterior is not None else False
                for geom in data
            ],
            dtype=bool,
        )
Exemple #5
0
def test_get_rings_return_index():
    geom = np.array([polygon, None, empty_polygon, polygon_with_hole])
    expected_parts = []
    expected_index = []
    for i, g in enumerate(geom):
        if g is None or pygeos.is_empty(g):
            continue
        expected_parts.append(pygeos.get_exterior_ring(g))
        expected_index.append(i)
        for j in range(0, pygeos.get_num_interior_rings(g)):
            expected_parts.append(pygeos.get_interior_ring(g, j))
            expected_index.append(i)

    parts, index = pygeos.get_rings(geom, return_index=True)
    assert len(parts) == len(expected_parts)
    assert np.all(pygeos.equals_exact(parts, expected_parts))
    assert np.array_equal(index, expected_index)
Exemple #6
0
def exterior(data):
    if compat.USE_PYGEOS:
        return pygeos.get_exterior_ring(data)
    else:
        return _unary_geo("exterior", data)
def find_dam_face_from_waterbody(waterbody, drain_pt):
    total_area = pg.area(waterbody)
    ring = pg.get_exterior_ring(pg.normalize(waterbody))
    total_length = pg.length(ring)
    num_pts = pg.get_num_points(ring) - 1  # drop closing coordinate
    vertices = pg.get_point(ring, range(num_pts))

    ### Extract line segments that are no more than 1/3 coordinates of polygon
    # starting from the vertex nearest the drain
    # note: lower numbers are to the right
    tree = pg.STRtree(vertices)
    ix = tree.nearest(drain_pt)[1][0]
    side_width = min(num_pts // 3, MAX_SIDE_PTS)
    left_ix = ix + side_width
    right_ix = ix - side_width

    # extract these as a left-to-write line;
    pts = vertices[max(right_ix, 0):min(num_pts, left_ix)][::-1]
    if left_ix >= num_pts:
        pts = np.append(vertices[0:left_ix - num_pts][::-1], pts)

    if right_ix < 0:
        pts = np.append(pts, vertices[num_pts + right_ix:num_pts][::-1])

    coords = pg.get_coordinates(pts)

    if len(coords) > 2:
        # first run a simplification process to extract the major shape and bends
        # then run the straight line algorithm
        simp_coords, simp_ix = simplify_vw(
            coords, min(MAX_SIMPLIFY_AREA, total_area / 100))

        if len(simp_coords) > 2:
            keep_coords, ix = extract_straight_segments(
                simp_coords, max_angle=MAX_STRAIGHT_ANGLE, loops=5)
            keep_ix = simp_ix.take(ix)

        else:
            keep_coords = simp_coords
            keep_ix = simp_ix

    else:
        keep_coords = coords
        keep_ix = np.arange(len(coords))

    ### Calculate the length of each run and drop any that are not sufficiently long
    lengths = segment_length(keep_coords)
    ix = (lengths >= MIN_DAM_WIDTH) & (lengths / total_length <
                                       MAX_WIDTH_RATIO)

    pairs = np.dstack([keep_ix[:-1][ix], keep_ix[1:][ix]])[0]

    # since ranges are ragged, we have to do this in a loop instead of vectorized
    segments = []
    for start, end in pairs:
        segments.append(pg.linestrings(coords[start:end + 1]))

    segments = np.array(segments)

    # only keep the segments that are close to the drain
    segments = segments[
        pg.intersects(segments, pg.buffer(drain_pt, MAX_DRAIN_DIST)), ]

    if not len(segments):
        return segments

    # only keep those where the drain is interior to the line
    pos = pg.line_locate_point(segments, drain_pt)
    lengths = pg.length(segments)

    ix = (pos >= MIN_INTERIOR_DIST) & (pos <= (lengths - MIN_INTERIOR_DIST))

    return segments[ix]
Exemple #8
0
def test_get_exterior_ring():
    actual = pygeos.get_exterior_ring([polygon, polygon_with_hole])
    assert (pygeos.get_type_id(actual) == 2).all()
Exemple #9
0
def test_get_exterior_ring_non_polygon(geom):
    actual = pygeos.get_exterior_ring(geom)
    assert pygeos.is_missing(actual).all()
Exemple #10
0
def test_get_rings_holes():
    rings = pygeos.get_rings(polygon_with_hole)
    assert len(rings) == 2
    assert rings[0] == pygeos.get_exterior_ring(polygon_with_hole)
    assert rings[1] == pygeos.get_interior_ring(polygon_with_hole, 0)
def cut_lines_by_waterbodies(flowlines, joins, waterbodies, next_lineID):
    """
    Cut lines by waterbodies.
    1. Finds all intersections between waterbodies and flowlines.
    2. For those that cross but are not completely contained by waterbodies, cut them.
    3. Evaluate the cuts, only those that have substantive cuts inside and outside are retained as cuts.
    4. Any flowlines that are not contained or crossing waterbodies are dropped from wb_joins

    Parameters
    ----------
    flowlines : GeoDataFrame
    joins : DataFrame
        flowline joins
    waterbodies : GeoDataFrame
    next_lineID : int
        next lineID; must be greater than all prior lines in region

    Returns
    -------
    tuple of (GeoDataFrame, DataFrame, GeoDataFrame, DataFrame)
        (flowlines, joins, waterbodies, waterbody joins)
    """

    start = time()

    ### Find flowlines that intersect waterbodies

    join_start = time()
    tree = pg.STRtree(flowlines.geometry.values.data)
    left, right = tree.query_bulk(waterbodies.geometry.values.data,
                                  predicate="intersects")
    df = pd.DataFrame({
        "lineID": flowlines.index.take(right),
        "flowline": flowlines.geometry.values.data.take(right),
        "wbID": waterbodies.index.take(left),
        "waterbody": waterbodies.geometry.values.data.take(left),
    })
    print(
        f"Found {len(df):,} waterbody / flowline joins in {time() - join_start:.2f}s"
    )

    ### Find those that are completely contained; these don't need further processing
    pg.prepare(df.waterbody.values)

    # find those that are fully contained and do not touch the edge of the waterbody (contains_properly predicate)
    # contains_properly is very fast
    contained_start = time()
    df["contains"] = pg.contains_properly(df.waterbody.values,
                                          df.flowline.values)
    print(
        f"Identified {df.contains.sum():,} flowlines fully within waterbodies in {time() - contained_start:.2f}s"
    )

    # find those that aren't fully contained by contained and touch the edge of waterbody (contains predicate)
    contained_start = time()
    ix = ~df.contains
    tmp = df.loc[ix]
    df.loc[ix, "contains"] = pg.contains(tmp.waterbody, tmp.flowline)
    print(
        f"Identified {df.loc[ix].contains.sum():,} more flowlines contained by waterbodies in {time() - contained_start:.2f}s"
    )

    # Sanity check: flowlines should only ever be contained by one waterbody
    if df.loc[df.contains].groupby("lineID").size().max() > 1:
        raise ValueError(
            "ERROR: one or more lines contained by multiple waterbodies")

    # for any that are not completely contained, find the ones that overlap
    crosses_start = time()
    df["crosses"] = False
    ix = ~df.contains
    tmp = df.loc[ix]
    df.loc[ix, "crosses"] = pg.crosses(tmp.waterbody, tmp.flowline)
    print(
        f"Identified {df.crosses.sum():,} flowlines that cross edge of waterbodies in {time() - crosses_start:.2f}s"
    )

    # discard any that only touch (ones that don't cross or are contained)
    # note that we only cut the ones that cross below; contained ones are left intact
    df = df.loc[df.contains | df.crosses].copy()

    print("Intersecting flowlines and waterbodies...")
    cut_start = time()
    ix = df.crosses
    tmp = df.loc[ix]
    df["geometry"] = df.flowline
    # use intersection to cut flowlines by waterbodies.  Note: this may produce
    # nonlinear (e.g., geom collection) results
    df.loc[ix, "geometry"] = pg.intersection(tmp.flowline, tmp.waterbody)
    df["length"] = pg.length(df.geometry)
    df["flength"] = pg.length(df.flowline)

    # Cut lines that are long enough and different enough from the original lines
    df["to_cut"] = False
    tmp = df.loc[df.crosses]
    keep = (tmp.crosses
            & (tmp.length >= CUT_TOLERANCE)
            & ((tmp.flength - tmp.length).abs() >= CUT_TOLERANCE))
    df.loc[keep[keep].index, "to_cut"] = True
    df["inside"] = (df.length / df.flength).clip(0, 1)
    print(
        f"Found {df.to_cut.sum():,} segments that need to be cut by flowlines in {time() - cut_start:.2f}s"
    )

    # save all that are completely contained or mostly contained.
    # They must be at least 50% in waterbody to be considered mostly contained.
    # Note: there are some that are mostly outside and we exclude those here.
    # We then update this after cutting
    contained = df.loc[df.inside >= 0.5, ["wbID", "lineID"]].copy()

    ### Cut lines
    if df.to_cut.sum():
        # only work with those to cut from here on out
        df = df.loc[df.to_cut,
                    ["lineID", "flowline", "wbID", "waterbody"]].reset_index(
                        drop=True)

        # save waterbody ids to re-evaluate intersection after cutting
        wbID = df.wbID.unique()

        # extract all intersecting interior rings for these waterbodies
        print("Extracting interior rings for intersected waterbodies")
        wb = waterbodies.loc[waterbodies.index.isin(wbID)]
        outer_index, inner_index, rings = get_interior_rings(
            wb.geometry.values.data)
        if len(outer_index):
            # find the pairs of waterbody rings and lines to add
            rings = np.asarray(rings)
            wb_with_rings = wb.index.values.take(outer_index)
            lines_in_wb = df.loc[df.wbID.isin(wb_with_rings)].lineID.unique()
            lines_in_wb = flowlines.loc[flowlines.index.isin(
                lines_in_wb)].geometry
            tree = pg.STRtree(rings)
            left, right = tree.query_bulk(lines_in_wb.values.data,
                                          predicate="intersects")

            tmp = pd.DataFrame({
                "lineID": lines_in_wb.index.values.take(left),
                "flowline": lines_in_wb.values.data.take(left),
                "wbID": wb_with_rings.take(right),
                "waterbody": rings.take(right),
            })
            df = df.append(tmp, ignore_index=True, sort=False)

        # extract the outer ring for original waterbodies
        ix = pg.get_type_id(df.waterbody.values.data) == 3
        df.loc[ix, "waterbody"] = pg.get_exterior_ring(
            df.loc[ix].waterbody.values.data)

        # Calculate all geometric intersections between the flowlines and
        # waterbody rings and drop any that are not points
        # Note: these may be multipoints where line crosses the ring of waterbody
        # multiple times.
        # We ignore any shared edges, etc that result from the intersection; those
        # aren't helpful for cutting the lines
        print("Finding cut points...")
        df["geometry"] = pg.intersection(df.flowline.values,
                                         df.waterbody.values)
        df = explode(
            explode(
                gp.GeoDataFrame(df[["geometry", "lineID", "flowline"]],
                                crs=flowlines.crs))).reset_index()
        points = (df.loc[pg.get_type_id(df.geometry.values.data) ==
                         0].set_index("lineID").geometry)

        print("cutting flowlines")
        cut_start = time()
        flowlines, joins = cut_flowlines_at_points(flowlines,
                                                   joins,
                                                   points,
                                                   next_lineID=next_lineID)
        new_flowlines = flowlines.loc[flowlines.new]

        print(
            f"{len(new_flowlines):,} new flowlines created in {time() - cut_start:,.2f}s"
        )

        if len(new_flowlines):
            # remove any flowlines no longer present (they were replaced by cut lines)
            contained = contained.loc[contained.lineID.isin(
                flowlines.loc[~flowlines.new].index.unique())].copy()

            contained_start = time()
            # recalculate overlaps with waterbodies
            print("Recalculating overlaps with waterbodies")
            wb = waterbodies.loc[wbID]
            tree = pg.STRtree(new_flowlines.geometry.values.data)
            left, right = tree.query_bulk(wb.geometry.values.data,
                                          predicate="intersects")

            df = pd.DataFrame({
                "lineID":
                new_flowlines.index.take(right),
                "flowline":
                new_flowlines.geometry.values.data.take(right),
                "wbID":
                wb.index.take(left),
                "waterbody":
                wb.geometry.values.data.take(left),
            })

            pg.prepare(df.waterbody.values)
            df["contains"] = pg.contains(df.waterbody.values,
                                         df.flowline.values)
            print(
                f"Identified {df.contains.sum():,} more flowlines contained by waterbodies in {time() - contained_start:.2f}s"
            )

            # some aren't perfectly contained, add those that are mostly in
            df["crosses"] = False
            ix = ~df.contains
            tmp = df.loc[ix]
            df.loc[ix, "crosses"] = pg.crosses(tmp.waterbody, tmp.flowline)

            # discard any that only touch (don't cross or are contained)
            df = df.loc[df.contains | df.crosses].copy()

            tmp = df.loc[df.crosses]
            df["geometry"] = df.flowline
            # use intersection to cut flowlines by waterbodies.  Note: this may produce
            # nonlinear (e.g., geom collection) results
            df.loc[ix, "geometry"] = pg.intersection(tmp.flowline,
                                                     tmp.waterbody)
            df["length"] = pg.length(df.geometry)
            df["flength"] = pg.length(df.flowline)

            # keep any that are contained or >= 50% in waterbody
            contained = contained.append(
                df.loc[df.contains | ((df.length / df.flength) >= 0.5),
                       ["wbID", "lineID"]],
                ignore_index=True,
            )

        flowlines = flowlines.drop(columns=["new"])

    # make sure that updated joins are unique
    joins = joins.drop_duplicates()

    # make sure that wb_joins is unique
    contained = contained.groupby(by=["lineID", "wbID"]).first().reset_index()

    # set flag for flowlines in waterbodies
    flowlines["waterbody"] = flowlines.index.isin(contained.lineID.unique())

    print("Done evaluating waterbody / flowline overlap in {:.2f}s".format(
        time() - start))

    return flowlines, joins, contained
Exemple #12
0
def cut_lines_by_waterbodies(flowlines, joins, waterbodies, wb_joins, out_dir):
    """
    Cut lines by waterbodies.
    1. Intersects all previously intersected flowlines with waterbodies.
    2. For those that cross but are not completely contained by waterbodies, cut them.
    3. Evaluate the cuts, only those that have substantive cuts inside and outside are retained as cuts.
    4. Any flowlines that are not contained or crossing waterbodies are dropped from joins

    Parameters
    ----------
    flowlines : GeoDataFrame
    joins : DataFrame
        flowline joins
    waterbodies : GeoDataFrame
    wb_joins : DataFrame
        waterbody flowline joins
    outdir : pathlib.Path
        output directory for writing error files, if needed

    Returns
    -------
    tuple of (GeoDataFrame, DataFrame, GeoDataFrame, DataFrame)
        (flowlines, joins, waterbodies, waterbody joins)
    """

    start = time()

    fl_geom = flowlines.loc[flowlines.index.isin(wb_joins.lineID), ["geometry"]].copy()

    # Many waterbodies have interior polygons (islands); these break the analysis below for cutting lines
    # Extract a new polygon of just their outer boundary
    wb_geom = waterbodies[["geometry"]].copy()
    wb_geom["waterbody"] = pg.polygons(pg.get_exterior_ring(wb_geom.geometry))

    print("Validating waterbodies...")
    ix = ~pg.is_valid(wb_geom.waterbody)
    invalid_count = ix.sum()
    if invalid_count:
        print("{:,} invalid waterbodies found, repairing...".format(invalid_count))

        # Buffer by 0 to fix
        # TODO: may need to do this by a small fraction and simplify instead
        repair_start = time()
        wb_geom.loc[ix, "waterbody"] = pg.buffer(wb_geom.loc[ix].waterbody, 0)
        waterbodies.loc[ix, "geometry"] = wb_geom.loc[ix].waterbody
        print("Repaired geometry in {:.2f}s".format(time() - repair_start))

    # Set indices and create combined geometry object for analysis
    wb_joins = wb_joins.set_index(["lineID", "wbID"])
    geoms = wb_joins.join(fl_geom, how="inner").join(wb_geom.waterbody)

    ### Find contained geometries
    print(
        "Identifying flowlines completely within waterbodies out of {:,} flowline / waterbody combinations...".format(
            len(geoms)
        )
    )
    contained_start = time()
    geoms["inside"] = pg.contains(geoms.waterbody.values, geoms.geometry.values)

    print(
        "Identified {:,} flowlines completely contained by waterbodies in {:.2f}s".format(
            geoms.inside.sum(), time() - contained_start
        )
    )

    # Check for logic errors - no flowline should be completely contained by more than 1 waterbody
    errors = geoms.groupby(level=[0]).inside.sum().astype("uint8") > 1
    if errors.max():
        # this most likely indicates duplicate waterbodies, which should have been resolved before this
        print(
            "ERROR: major logic error - some flowlines claim to be completely contained by multiple waterbodies"
        )
        print(
            "===> error flowlines written to {}/contained_errors.feather".format(
                out_dir
            )
        )
        to_geofeather(
            flowlines.loc[flowlines.index.isin(errors)],
            out_dir / "contained_errors.feather",
            crs=CRS,
        )

    ### Check those that aren't contained to see if they cross
    print("Determining which flowlines actually cross into waterbodies...")
    cross_start = time()
    geoms = geoms.loc[~geoms.inside].copy()
    geoms["crosses"] = pg.crosses(geoms.geometry, geoms.waterbody)

    outside = geoms.loc[~(geoms["crosses"] | geoms.inside)].index

    # keep the ones that cross for further processing
    geoms = geoms.loc[geoms.crosses].copy()

    print(
        "Identified {:,} flowlines completely outside waterbodies and {:,} flowlines that cross waterbody boundaries in {:.2f}s".format(
            len(outside), len(geoms), time() - cross_start
        )
    )

    # Any that do not cross and are not completely within waterbodies should be dropped now
    # Can only drop joins by BOTH lineID and wbID (the index here)
    # Also drop associated waterbodies that no longer have joins
    wb_joins = wb_joins.loc[~wb_joins.index.isin(outside)].copy()

    # FIXME: for closely adjacent waterbodies, these are important to keep
    # Need to cut them by their multiple polys, update their joins, and feed back into following analysis
    # pg.intersection_all might work here

    # check for multiple crossings - these are errors from NHD that we can drop from here
    errors = geoms.groupby(level=0).size() > 1
    if errors.max():
        print(
            "Found {:,} flowlines that cross multiple waterbodies.  These are bad data and will be dropped from waterbody intersection.".format(
                errors.sum()
            )
        )

        to_geofeather(
            flowlines.loc[errors.index].reset_index(),
            out_dir / "error_crosses_multiple.feather",
            crs=CRS,
        )

        # completely remove the flowlines from intersections and drop the waterbodies
        wb_joins = wb_joins.loc[
            ~wb_joins.index.get_level_values(0).isin(errors.loc[errors].index)
        ].copy()
        waterbodies = waterbodies.loc[
            waterbodies.index.isin(wb_joins.index.get_level_values(1))
        ].copy()
        geoms = geoms.loc[geoms.index.isin(wb_joins.index)].copy()

    print("Calculating geometric intersection of flowlines and waterbodies...")
    int_start = time()
    geoms = geoms[["geometry", "waterbody"]].join(flowlines.length.rename("origLength"))

    # First, calculate the geometric intersection between the lines and waterbodies
    # WARNING: this intersection may return LineString, MultiLineString, Point, GeometryCollection
    geoms["intersection"] = pg.intersection(geoms.geometry, geoms.waterbody)
    types = pg.get_type_id(geoms.intersection)
    # NOTE: all the points should be captured by the above logic for crosses
    is_point = types.isin([0, 4])
    is_line = types.isin([1, 5])

    others = types[~(is_point | is_line)].unique()
    # GeometryCollection indicates a mess, skip those
    if len(others):
        print(
            "WARNING: Found other types of geometric intersection: {} (n={:,}), these will be dropped".format(
                others, len(types[~(is_point | is_line)])
            )
        )

    # Any that intersect only at a point are OUTSIDE
    outside = geoms.loc[is_point].index  # TODO: confirm this works
    wb_joins = wb_joins.loc[~wb_joins.index.isin(outside)].copy()
    print("Identified {:,} more flowlines outside waterbodies".format(len(outside)))

    # Drop those that are not lines from further analysis
    geoms = geoms.loc[is_line].copy()

    # Inspect amount of overlay - if the intersected length is within 1m of final length, it is completely within
    # if it is near 0, it is completely outside
    geoms["length"] = pg.length(geoms.intersection)
    outside = geoms.length < 1
    inside = (geoms.origLength - geoms.length).abs() < 1

    print(
        "Found {:,} more completely outside, {:,} completely inside".format(
            outside.sum(), inside.sum()
        )
    )

    # drop the ones that are outside
    wb_joins = wb_joins.loc[~wb_joins.index.isin(outside[outside].index)].copy()

    # cut the ones that aren't completely inside or outside
    geoms = geoms.loc[~(inside | outside)].copy()

    print("Done evaluating intersection in {:.2f}s".format(time() - int_start))

    if len(geoms):
        print("Cutting {:,} flowlines ...".format(len(geoms)))
        cut_start = time()
        geoms = geoms[["geometry", "waterbody", "origLength"]]

        # WARNING: difference is not precise, the point of split is not exactly at the intersection between lines
        # but within some tolerance.  This will cause them to fail the contains() test below.
        boundary = pg.boundary(geoms.waterbody)
        geoms["geometry"] = pg.difference(geoms.geometry, boundary)

        errors = ~pg.is_valid(geoms.geometry)
        if errors.max():
            print("WARNING: geometry errors for {:,} cut lines".format(errors.sum()))

        length = pg.length(geoms.geometry)
        errors = (length - geoms.origLength).abs() > 1
        if errors.max():
            print(
                "WARNING: {:,} lines were not completely cut by waterbodies (maybe shared edge?).\nThese will not be cut".format(
                    errors.sum()
                )
            )
            to_geofeather(
                flowlines.loc[
                    errors.loc[errors].index.get_level_values(0).unique()
                ].reset_index(),
                out_dir / "error_incomplete_cut.feather",
                crs=CRS,
            )

            # remove these from the cut geoms and retain their originals
            geoms = geoms.loc[~errors].copy()

        # Explode the multilines into single line segments
        geoms["geometry"] = explode(geoms.geometry)
        geoms = geoms.explode("geometry")

        # mark those parts of the cut lines that are within waterbodies
        # WARNING: this is not capturing all that should be inside after cutting!
        geoms["iswithin"] = pg.contains(geoms.waterbody, geoms.geometry)

        errors = geoms.groupby(level=0).iswithin.max() == False
        if errors.max():
            print(
                "WARNING: {:,} flowlines that cross waterbodies had no parts contained within those waterbodies".format(
                    errors.sum()
                )
            )
            to_geofeather(
                flowlines.loc[errors.index].reset_index(),
                out_dir / "error_crosses_but_not_contained.feather",
                crs=CRS,
            )

            # If they cross, assume they are within
            print("Attempting to correct these based on which ones cross")
            ix = geoms.loc[
                geoms.index.get_level_values(0).isin(errors.loc[errors].index)
            ].index
            geoms.loc[ix, "iswithin"] = pg.crosses(
                geoms.loc[ix].geometry, geoms.loc[ix].waterbody
            )

            errors = geoms.groupby(level=0).iswithin.max() == False
            print("{:,} still have no part in a waterbody".format(errors.sum()))

        # calculate total length of within and outside parts
        geoms["length"] = pg.length(geoms.geometry)

        # drop any new segments that are < 1m, these are noise
        print("Dropping {:,} new segments < 1m".format((geoms.length < 1).sum()))
        geoms = geoms.loc[geoms.length >= 1].copy()

        if len(geoms) > 1:
            length = geoms.groupby(["lineID", "wbID", "iswithin"]).agg(
                {"length": "sum", "origLength": "first"}
            )

            # Anything within 1 meter of original length is considered unchanged
            # This is so that we ignore slivers
            length["unchanged"] = (length.origLength - length["length"]).abs() < 1
            unchanged = (
                length[["unchanged"]]
                .reset_index()
                .groupby(["lineID", "wbID"])
                .unchanged.max()
                .rename("max_unchanged")
            )
            unchanged = (
                length.reset_index().set_index(["lineID", "wbID"]).join(unchanged)
            )
            is_within = (
                unchanged.loc[unchanged.max_unchanged]
                .reset_index()
                .set_index(["lineID", "wbID"])
                .iswithin
            )

            # For any that are unchanged and NOT within waterbodies,
            # remove them from wb_joins
            ix = is_within.loc[~is_within].index
            wb_joins = wb_joins.loc[~wb_joins.index.isin(ix)].copy()

            # Remove any that are unchanged from intersection analysis
            geoms = geoms.loc[~geoms.index.isin(is_within.index)].copy()

            print(
                "Created {:,} new flowlines by splitting {:,} flowlines at waterbody edges in {:.2f}".format(
                    len(geoms),
                    len(geoms.index.get_level_values(0).unique()),
                    time() - cut_start,
                )
            )

            if len(geoms) > 1:
                ### These are our final new lines to add
                # remove their lineIDs from flowlines and append
                # replace their outer joins to these ones and add intermediates

                # Join in previous line information from flowlines
                new_lines = (
                    geoms[["geometry", "length", "iswithin"]]
                    .reset_index()
                    .set_index("lineID")
                    .join(flowlines.drop(columns=["geometry", "length", "sinuosity"]))
                    .reset_index()
                    .rename(columns={"lineID": "origLineID", "iswithin": "waterbody"})
                )

                error = (
                    new_lines.groupby("origLineID").wbID.unique().apply(len).max() > 1
                )
                if error:
                    # Watch for errors - if a flowline is cut by multiple waterbodies
                    # there will be problems with our logic for splicing in new lines
                    # also - our intersection logic above is wrong
                    print(
                        """\n========\n
                    MAJOR LOGIC ERROR: multiple waterbodies associated with a single flowline that as been cut.
                    \n========\n
                    """
                    )

                # recalculate length and sinuosity
                new_lines["length"] = pg.length(new_lines.geometry).astype("float32")
                new_lines["sinuosity"] = calculate_sinuosity(new_lines.geometry).astype(
                    "float32"
                )

                # calculate new IDS
                next_segment_id = int(flowlines.index.max() + 1)
                new_lines["lineID"] = next_segment_id + new_lines.index
                new_lines.lineID = new_lines.lineID.astype("uint32")

                ### Update waterbody joins
                # remove joins replaced by above
                ix = new_lines.set_index(["origLineID", "wbID"]).index
                wb_joins = wb_joins.loc[~wb_joins.index.isin(ix)].copy()

                # add new joins
                wb_joins = (
                    wb_joins.reset_index()
                    .append(
                        new_lines.loc[new_lines.waterbody, ["lineID", "wbID"]],
                        ignore_index=True,
                        sort=False,
                    )
                    .set_index(["lineID", "wbID"])
                )

                ### Update flowline joins
                # transform new lines to create new joins
                l = new_lines.groupby("origLineID").lineID
                # the first new line per original line is the furthest upstream, so use its
                # ID as the new downstream ID for anything that had this origLineID as its downstream
                first = l.first().rename("new_downstream_id")
                # the last new line per original line is the furthest downstream...
                last = l.last().rename("new_upstream_id")

                # Update existing joins with the new lineIDs we created at the upstream or downstream
                # ends of segments we just created
                joins = update_joins(
                    joins,
                    first,
                    last,
                    downstream_col="downstream_id",
                    upstream_col="upstream_id",
                )

                ### Create new line joins for any that weren't inserted above
                # Transform all groups of new line IDs per original lineID, wbID
                # into joins structure
                pairs = lambda a: pd.Series(zip(a[:-1], a[1:]))
                new_joins = (
                    new_lines.groupby(["origLineID", "wbID"])
                    .lineID.apply(pairs)
                    .apply(pd.Series)
                    .reset_index()
                    .rename(columns={0: "upstream_id", 1: "downstream_id"})
                    .join(
                        flowlines[["NHDPlusID", "loop"]].rename(
                            columns={"NHDPlusID": "upstream"}
                        ),
                        on="origLineID",
                    )
                )
                # NHDPlusID is same for both sides
                new_joins["downstream"] = new_joins.upstream
                new_joins["type"] = "internal"
                new_joins = new_joins[
                    [
                        "upstream",
                        "downstream",
                        "upstream_id",
                        "downstream_id",
                        "type",
                        "loop",
                    ]
                ]

                joins = joins.append(
                    new_joins, ignore_index=True, sort=False
                ).sort_values(["downstream_id", "upstream_id"])

                ### Update flowlines
                # remove originals now replaced by cut versions here
                flowlines = (
                    flowlines.loc[~flowlines.index.isin(new_lines.origLineID)]
                    .reset_index()
                    .append(
                        new_lines[["lineID"] + list(flowlines.columns) + ["waterbody"]],
                        ignore_index=True,
                        sort=False,
                    )
                    .sort_values("lineID")
                    .set_index("lineID")
                )

                # End cut geometries

    # Update waterbody bool for other flowlines based on those that completely intersected
    # above
    flowlines.loc[
        flowlines.index.isin(wb_joins.index.get_level_values(0).unique()), "waterbody"
    ] = True
    flowlines.waterbody = flowlines.waterbody.fillna(False)

    ### Update waterbodies and calculate flowline stats
    wb_joins = wb_joins.reset_index()
    stats = (
        wb_joins.join(flowlines.length.rename("flowlineLength"), on="lineID")
        .groupby("wbID")
        .flowlineLength.sum()
        .astype("float32")
    )
    waterbodies = waterbodies.loc[waterbodies.index.isin(wb_joins.wbID)].join(stats)

    print("Done cutting flowlines by waterbodies in {:.2f}s".format(time() - start))

    return flowlines, joins, waterbodies, wb_joins
Exemple #13
0
def find_nhd_waterbody_breaks(geometries, nhd_lines):
    """Some large waterbody complexes are divided by dams; these breaks
    need to be preserved.  This is done by finding the shared edges between
    adjacent waterbodies that fall near NHD lines (which include dams) and
    buffering them by 10 meters (arbitrary, from trial and error).

    This should be skipped if nhd_lines is empty.

    Parameters
    ----------
    df : GeoDataFrame
    nhd_lines : GeoDataFrame

    Returns
    -------
    MultiPolygon containing all buffered lines between waterbodies that are near
        NHD lines.  Returns None if no adjacent waterbodies meet these criteria
    """

    # find all nhd lines that intersect waterbodies
    # first, buffer them slightly
    nhd_lines = pg.get_parts(pg.union_all(pg.buffer(nhd_lines, 0.1)))
    tree = pg.STRtree(geometries)
    left, right = tree.query_bulk(nhd_lines, predicate="intersects")

    # add these to the return
    keep_nhd_lines = nhd_lines[np.unique(left)]

    # find connected boundaries
    boundaries = pg.polygons(pg.get_exterior_ring(geometries))
    tree = pg.STRtree(boundaries)
    left, right = tree.query_bulk(boundaries, predicate="intersects")
    # drop self intersections
    ix = left != right
    left = left[ix]
    right = right[ix]

    # extract unique pairs (dedup symmetric pairs)
    pairs = np.array([left, right]).T
    pairs = (
        pd.DataFrame({"left": pairs.min(axis=1), "right": pairs.max(axis=1)})
        .groupby(["left", "right"])
        .first()
        .reset_index()
    )

    # calculate geometric intersection
    i = pg.intersection(
        geometries.take(pairs.left.values), geometries.take(pairs.right.values)
    )

    # extract individual parts (may be geom collections)
    parts = pg.get_parts(pg.get_parts(pg.get_parts(i)))

    # extract only the lines or polygons
    t = pg.get_type_id(parts)
    parts = parts[((t == 1) | (t == 3)) & (~pg.is_empty(parts))].copy()

    # buffer and merge
    split_lines = pg.get_parts(pg.union_all(pg.buffer(parts, 10)))

    # now find the ones that are within 100m of nhd lines
    nhd_lines = pg.get_parts(nhd_lines)
    tree = pg.STRtree(nhd_lines)
    left, right = tree.nearest_all(split_lines, max_distance=100)

    split_lines = split_lines[np.unique(left)]

    if len(split_lines) or len(keep_nhd_lines):
        return pg.union_all(np.append(split_lines, keep_nhd_lines))

    return None