Example #1
0
    def _dense_point_array(self, geoms, distance, index):
        """
        geoms - array of pygeos lines
        """
        # interpolate lines to represent them as points for Voronoi
        points = np.empty((0, 2))
        ids = []

        if pygeos.get_type_id(geoms[0]) not in [1, 2, 5]:
            lines = pygeos.boundary(geoms)
        else:
            lines = geoms
        lengths = pygeos.length(lines)
        for ix, line, length in zip(index, lines, lengths):
            if length > distance:  # some polygons might have collapsed
                pts = pygeos.line_interpolate_point(
                    line,
                    np.linspace(0.1,
                                length - 0.1,
                                num=int((length - 0.1) // distance)),
                )  # .1 offset to keep a gap between two segments
                points = np.append(points, pygeos.get_coordinates(pts), axis=0)
                ids += [ix] * len(pts)

        return points, ids
Example #2
0
def boundary_distance(polygon, points):
    """
    Find the distance between a polygon's boundary and an
    array of points.

    Uses either `shapely` or `pygeos` (5-10x faster) as a backend.

    Parameters
    -------------
    polygon : shapely.geometry.Polygon
      Polygon to query
    points : (n, 2) float
      2D points

    Returns
    ------------
    distance : (n,) float
      Minimum distance from each point to polygon boundary
    """

    try:
        import pygeos
        # the pygeos way is 5-10x faster
        pg_points = pygeos.points(*points.T)
        pg_boundary = pygeos.boundary(pygeos.Geometry(polygon.wkt))
        distance = pygeos.distance(pg_boundary, pg_points)
    except BaseException:
        # in pure shapely we have to loop
        inverse = polygon.boundary
        distance = np.array([
            inverse.distance(i) for i in MultiPoint(points)])

    return distance
Example #3
0
def constructive(arr, operation, *args, **kwargs):
    if operation == 'boundary':
        geometries = pg.boundary(pg.from_wkb(arr), **kwargs)
    elif operation == 'buffer':
        geometries = pg.buffer(pg.from_wkb(arr), *args, **kwargs)
    elif operation == 'build_area':
        geometries = pg.build_area(pg.from_wkb(arr), **kwargs)
    elif operation == 'centroid':
        geometries = pg.centroid(pg.from_wkb(arr), **kwargs)
    elif operation == 'clip_by_rect':
        geometries = pg.clip_by_rect(pg.from_wkb(arr), *args, **kwargs)
    elif operation == 'convex_hull':
        geometries = pg.convex_hull(pg.from_wkb(arr), **kwargs)
    elif operation == 'delaunay_triangles':
        geometries = pg.delaunay_triangles(pg.from_wkb(arr), **kwargs)
    elif operation == 'envelope':
        geometries = pg.envelope(pg.from_wkb(arr), **kwargs)
    elif operation == 'extract_unique_points':
        geometries = pg.extract_unique_points(pg.from_wkb(arr), **kwargs)
    elif operation == 'make_valid':
        geometries = pg.make_valid(pg.from_wkb(arr), **kwargs)
    elif operation == 'normalize':
        geometries = pg.normalize(pg.from_wkb(arr), **kwargs)
    elif operation == 'offset_curve':
        geometries = pg.offset_curve(pg.from_wkb(arr), *args, **kwargs)
    elif operation == 'point_on_surface':
        geometries = pg.point_on_surface(pg.from_wkb(arr), **kwargs)
    elif operation == 'reverse':
        geometries = pg.reverse(pg.from_wkb(arr), **kwargs)
    elif operation == 'simplify':
        geometries = pg.simplify(pg.from_wkb(arr), *args, **kwargs)
    elif operation == 'snap':
        geometries = pg.snap(pg.from_wkb(arr), *args, **kwargs)
    elif operation == 'voronoi_polygons':
        geometries = pg.voronoi_polygons(pg.from_wkb(arr), **kwargs)
    else:
        warnings.warn(f'Operation {operation} not supported.')
        return None
    return pg.to_wkb(geometries)
Example #4
0
def enclosures(primary_barriers,
               limit=None,
               additional_barriers=None,
               enclosure_id="eID"):
    """
    Generate enclosures based on passed barriers.

    Enclosures are areas enclosed from all sides by at least one type of
    a barrier. Barriers are typically roads, railways, natural features
    like rivers and other water bodies or coastline. Enclosures are a
    result of polygonization of the  ``primary_barrier`` and ``limit`` and its
    subdivision based on additional_barriers.

    Parameters
    ----------
    primary_barriers : GeoDataFrame, GeoSeries
        GeoDataFrame or GeoSeries containing primary barriers.
        (Multi)LineString geometry is expected.
    limit : GeoDataFrame, GeoSeries (default None)
        GeoDataFrame or GeoSeries containing external limit of enclosures,
        i.e. the area which gets partitioned. If None is passed,
        the internal area of ``primary_barriers`` will be used.
    additional_barriers : GeoDataFrame
        GeoDataFrame or GeoSeries containing additional barriers.
        (Multi)LineString geometry is expected.
    enclosure_id : str (default 'eID')
        name of the enclosure_id (to be created).

    Returns
    -------
    enclosures : GeoDataFrame
       GeoDataFrame containing enclosure geometries and enclosure_id

    Examples
    --------
    >>> enclosures = mm.enclosures(streets, admin_boundary, [railway, rivers])

    """
    if limit is not None:
        if limit.geom_type.isin(["Polygon", "MultiPolygon"]).any():
            limit = limit.boundary
        barriers = pd.concat([primary_barriers.geometry, limit.geometry])
    else:
        barriers = primary_barriers
    unioned = barriers.unary_union
    polygons = polygonize(unioned)
    enclosures = gpd.GeoSeries(list(polygons), crs=primary_barriers.crs)

    if additional_barriers is not None:
        if not isinstance(additional_barriers, list):
            raise TypeError(
                "`additional_barriers` expects a list of GeoDataFrames or GeoSeries."
                f"Got {type(additional_barriers)}.")
        additional = pd.concat([gdf.geometry for gdf in additional_barriers])

        inp, res = enclosures.sindex.query_bulk(additional.geometry,
                                                predicate="intersects")
        unique = np.unique(res)

        new = []

        for i in unique:
            poly = enclosures.values.data[i]  # get enclosure polygon
            crossing = inp[res == i]  # get relevant additional barriers
            buf = pygeos.buffer(poly, 0.01)  # to avoid floating point errors
            crossing_ins = pygeos.intersection(
                buf, additional.values.data[crossing]
            )  # keeping only parts of additional barriers within polygon
            union = pygeos.union_all(
                np.append(crossing_ins, pygeos.boundary(poly)))  # union
            polygons = np.array(list(polygonize(
                _pygeos_to_shapely(union))))  # polygonize
            within = pygeos.covered_by(
                pygeos.from_shapely(polygons),
                buf)  # keep only those within original polygon
            new += list(polygons[within])

        final_enclosures = (gpd.GeoSeries(enclosures).drop(unique).append(
            gpd.GeoSeries(new)).reset_index(drop=True)).set_crs(
                primary_barriers.crs)

        return gpd.GeoDataFrame({enclosure_id: range(len(final_enclosures))},
                                geometry=final_enclosures)

    return gpd.GeoDataFrame({enclosure_id: range(len(enclosures))},
                            geometry=enclosures)
Example #5
0
def boundary(data):
    if compat.USE_PYGEOS:
        return pygeos.boundary(data)
    else:
        return _unary_geo("boundary", data)
Example #6
0
def snap_to_large_waterbodies(df, to_snap):
    """Snap to nearest large waterbody.

    NOTE: only run this on dams that could not snap to flowlines, to avoid
    moving them far away.

    This captures large dam centerpoints that are not near enough to flowlines.

    Updates df with snapping results, and returns to_snap as set of dams still
    needing to be snapped after this operation.

    Parameters
    ----------
    df : GeoDataFrame
        master dataset, this is where all snapping gets recorded
    to_snap : DataFrame
        data frame containing pygeos geometries to snap ("geometry")
        and snapping tolerance ("snap_tolerance")

    Returns
    -------
    tuple of (GeoDataFrame, DataFrame)
        (df, to_snap)
    """
    wb = from_geofeather(nhd_dir / "merged" / "large_waterbodies.feather").set_index(
        "wbID"
    )
    drains = (
        from_geofeather(nhd_dir / "merged" / "large_waterbody_drain_points.feather")
        .rename(columns={"id": "drainID"})
        .set_index("drainID")
    )

    near_wb = nearest(to_snap.geometry, pg.boundary(wb.geometry), NEAR_WB_TOLERANCE)
    near_wb = (
        pd.DataFrame(near_wb)
        .join(to_snap.geometry)
        .join(
            drains.reset_index()
            .set_index("wbID")[["geometry", "drainID", "lineID"]]
            .rename(columns={"geometry": "drain"}),
            on="wbID",
        )
        .dropna(subset=["drain"])
    )
    near_wb["snap_dist"] = pg.distance(near_wb.geometry, near_wb.drain)

    # drop any that are > 250 m away, these aren't useful
    near_wb = near_wb.loc[near_wb.snap_dist <= WB_DRAIN_MAX_TOLERANCE].copy()

    # take the closest drain point
    near_wb = near_wb.sort_values(by="snap_dist").groupby(level=0).first()

    ix = near_wb.index
    df.loc[ix, "snapped"] = True
    df.loc[ix, "geometry"] = near_wb.drain
    df.loc[ix, "snap_dist"] = near_wb.distance
    df.loc[ix, "snap_ref_id"] = near_wb.drainID
    df.loc[ix, "lineID"] = near_wb.lineID
    df.loc[ix, "wbID"] = near_wb.wbID

    df.loc[ix, "snap_log"] = ndarray_append_strings(
        "snapped: within ",
        WB_DRAIN_MAX_TOLERANCE,
        "m tolerance of drain point of large waterbody that is within ",
        NEAR_WB_TOLERANCE,
        "m of dam",
    )

    to_snap = to_snap.loc[~to_snap.index.isin(ix)].copy()

    print(
        "Found {:,} dams within {}m of large waterbodies and within {}m of the drain point of those waterbodies".format(
            len(near_wb), NEAR_WB_TOLERANCE, WB_DRAIN_MAX_TOLERANCE
        )
    )

    return df, to_snap
Example #7
0
def cut_lines_by_waterbodies(flowlines, joins, waterbodies, wb_joins, out_dir):
    """
    Cut lines by waterbodies.
    1. Intersects all previously intersected flowlines with waterbodies.
    2. For those that cross but are not completely contained by waterbodies, cut them.
    3. Evaluate the cuts, only those that have substantive cuts inside and outside are retained as cuts.
    4. Any flowlines that are not contained or crossing waterbodies are dropped from joins

    Parameters
    ----------
    flowlines : GeoDataFrame
    joins : DataFrame
        flowline joins
    waterbodies : GeoDataFrame
    wb_joins : DataFrame
        waterbody flowline joins
    outdir : pathlib.Path
        output directory for writing error files, if needed

    Returns
    -------
    tuple of (GeoDataFrame, DataFrame, GeoDataFrame, DataFrame)
        (flowlines, joins, waterbodies, waterbody joins)
    """

    start = time()

    fl_geom = flowlines.loc[flowlines.index.isin(wb_joins.lineID), ["geometry"]].copy()

    # Many waterbodies have interior polygons (islands); these break the analysis below for cutting lines
    # Extract a new polygon of just their outer boundary
    wb_geom = waterbodies[["geometry"]].copy()
    wb_geom["waterbody"] = pg.polygons(pg.get_exterior_ring(wb_geom.geometry))

    print("Validating waterbodies...")
    ix = ~pg.is_valid(wb_geom.waterbody)
    invalid_count = ix.sum()
    if invalid_count:
        print("{:,} invalid waterbodies found, repairing...".format(invalid_count))

        # Buffer by 0 to fix
        # TODO: may need to do this by a small fraction and simplify instead
        repair_start = time()
        wb_geom.loc[ix, "waterbody"] = pg.buffer(wb_geom.loc[ix].waterbody, 0)
        waterbodies.loc[ix, "geometry"] = wb_geom.loc[ix].waterbody
        print("Repaired geometry in {:.2f}s".format(time() - repair_start))

    # Set indices and create combined geometry object for analysis
    wb_joins = wb_joins.set_index(["lineID", "wbID"])
    geoms = wb_joins.join(fl_geom, how="inner").join(wb_geom.waterbody)

    ### Find contained geometries
    print(
        "Identifying flowlines completely within waterbodies out of {:,} flowline / waterbody combinations...".format(
            len(geoms)
        )
    )
    contained_start = time()
    geoms["inside"] = pg.contains(geoms.waterbody.values, geoms.geometry.values)

    print(
        "Identified {:,} flowlines completely contained by waterbodies in {:.2f}s".format(
            geoms.inside.sum(), time() - contained_start
        )
    )

    # Check for logic errors - no flowline should be completely contained by more than 1 waterbody
    errors = geoms.groupby(level=[0]).inside.sum().astype("uint8") > 1
    if errors.max():
        # this most likely indicates duplicate waterbodies, which should have been resolved before this
        print(
            "ERROR: major logic error - some flowlines claim to be completely contained by multiple waterbodies"
        )
        print(
            "===> error flowlines written to {}/contained_errors.feather".format(
                out_dir
            )
        )
        to_geofeather(
            flowlines.loc[flowlines.index.isin(errors)],
            out_dir / "contained_errors.feather",
            crs=CRS,
        )

    ### Check those that aren't contained to see if they cross
    print("Determining which flowlines actually cross into waterbodies...")
    cross_start = time()
    geoms = geoms.loc[~geoms.inside].copy()
    geoms["crosses"] = pg.crosses(geoms.geometry, geoms.waterbody)

    outside = geoms.loc[~(geoms["crosses"] | geoms.inside)].index

    # keep the ones that cross for further processing
    geoms = geoms.loc[geoms.crosses].copy()

    print(
        "Identified {:,} flowlines completely outside waterbodies and {:,} flowlines that cross waterbody boundaries in {:.2f}s".format(
            len(outside), len(geoms), time() - cross_start
        )
    )

    # Any that do not cross and are not completely within waterbodies should be dropped now
    # Can only drop joins by BOTH lineID and wbID (the index here)
    # Also drop associated waterbodies that no longer have joins
    wb_joins = wb_joins.loc[~wb_joins.index.isin(outside)].copy()

    # FIXME: for closely adjacent waterbodies, these are important to keep
    # Need to cut them by their multiple polys, update their joins, and feed back into following analysis
    # pg.intersection_all might work here

    # check for multiple crossings - these are errors from NHD that we can drop from here
    errors = geoms.groupby(level=0).size() > 1
    if errors.max():
        print(
            "Found {:,} flowlines that cross multiple waterbodies.  These are bad data and will be dropped from waterbody intersection.".format(
                errors.sum()
            )
        )

        to_geofeather(
            flowlines.loc[errors.index].reset_index(),
            out_dir / "error_crosses_multiple.feather",
            crs=CRS,
        )

        # completely remove the flowlines from intersections and drop the waterbodies
        wb_joins = wb_joins.loc[
            ~wb_joins.index.get_level_values(0).isin(errors.loc[errors].index)
        ].copy()
        waterbodies = waterbodies.loc[
            waterbodies.index.isin(wb_joins.index.get_level_values(1))
        ].copy()
        geoms = geoms.loc[geoms.index.isin(wb_joins.index)].copy()

    print("Calculating geometric intersection of flowlines and waterbodies...")
    int_start = time()
    geoms = geoms[["geometry", "waterbody"]].join(flowlines.length.rename("origLength"))

    # First, calculate the geometric intersection between the lines and waterbodies
    # WARNING: this intersection may return LineString, MultiLineString, Point, GeometryCollection
    geoms["intersection"] = pg.intersection(geoms.geometry, geoms.waterbody)
    types = pg.get_type_id(geoms.intersection)
    # NOTE: all the points should be captured by the above logic for crosses
    is_point = types.isin([0, 4])
    is_line = types.isin([1, 5])

    others = types[~(is_point | is_line)].unique()
    # GeometryCollection indicates a mess, skip those
    if len(others):
        print(
            "WARNING: Found other types of geometric intersection: {} (n={:,}), these will be dropped".format(
                others, len(types[~(is_point | is_line)])
            )
        )

    # Any that intersect only at a point are OUTSIDE
    outside = geoms.loc[is_point].index  # TODO: confirm this works
    wb_joins = wb_joins.loc[~wb_joins.index.isin(outside)].copy()
    print("Identified {:,} more flowlines outside waterbodies".format(len(outside)))

    # Drop those that are not lines from further analysis
    geoms = geoms.loc[is_line].copy()

    # Inspect amount of overlay - if the intersected length is within 1m of final length, it is completely within
    # if it is near 0, it is completely outside
    geoms["length"] = pg.length(geoms.intersection)
    outside = geoms.length < 1
    inside = (geoms.origLength - geoms.length).abs() < 1

    print(
        "Found {:,} more completely outside, {:,} completely inside".format(
            outside.sum(), inside.sum()
        )
    )

    # drop the ones that are outside
    wb_joins = wb_joins.loc[~wb_joins.index.isin(outside[outside].index)].copy()

    # cut the ones that aren't completely inside or outside
    geoms = geoms.loc[~(inside | outside)].copy()

    print("Done evaluating intersection in {:.2f}s".format(time() - int_start))

    if len(geoms):
        print("Cutting {:,} flowlines ...".format(len(geoms)))
        cut_start = time()
        geoms = geoms[["geometry", "waterbody", "origLength"]]

        # WARNING: difference is not precise, the point of split is not exactly at the intersection between lines
        # but within some tolerance.  This will cause them to fail the contains() test below.
        boundary = pg.boundary(geoms.waterbody)
        geoms["geometry"] = pg.difference(geoms.geometry, boundary)

        errors = ~pg.is_valid(geoms.geometry)
        if errors.max():
            print("WARNING: geometry errors for {:,} cut lines".format(errors.sum()))

        length = pg.length(geoms.geometry)
        errors = (length - geoms.origLength).abs() > 1
        if errors.max():
            print(
                "WARNING: {:,} lines were not completely cut by waterbodies (maybe shared edge?).\nThese will not be cut".format(
                    errors.sum()
                )
            )
            to_geofeather(
                flowlines.loc[
                    errors.loc[errors].index.get_level_values(0).unique()
                ].reset_index(),
                out_dir / "error_incomplete_cut.feather",
                crs=CRS,
            )

            # remove these from the cut geoms and retain their originals
            geoms = geoms.loc[~errors].copy()

        # Explode the multilines into single line segments
        geoms["geometry"] = explode(geoms.geometry)
        geoms = geoms.explode("geometry")

        # mark those parts of the cut lines that are within waterbodies
        # WARNING: this is not capturing all that should be inside after cutting!
        geoms["iswithin"] = pg.contains(geoms.waterbody, geoms.geometry)

        errors = geoms.groupby(level=0).iswithin.max() == False
        if errors.max():
            print(
                "WARNING: {:,} flowlines that cross waterbodies had no parts contained within those waterbodies".format(
                    errors.sum()
                )
            )
            to_geofeather(
                flowlines.loc[errors.index].reset_index(),
                out_dir / "error_crosses_but_not_contained.feather",
                crs=CRS,
            )

            # If they cross, assume they are within
            print("Attempting to correct these based on which ones cross")
            ix = geoms.loc[
                geoms.index.get_level_values(0).isin(errors.loc[errors].index)
            ].index
            geoms.loc[ix, "iswithin"] = pg.crosses(
                geoms.loc[ix].geometry, geoms.loc[ix].waterbody
            )

            errors = geoms.groupby(level=0).iswithin.max() == False
            print("{:,} still have no part in a waterbody".format(errors.sum()))

        # calculate total length of within and outside parts
        geoms["length"] = pg.length(geoms.geometry)

        # drop any new segments that are < 1m, these are noise
        print("Dropping {:,} new segments < 1m".format((geoms.length < 1).sum()))
        geoms = geoms.loc[geoms.length >= 1].copy()

        if len(geoms) > 1:
            length = geoms.groupby(["lineID", "wbID", "iswithin"]).agg(
                {"length": "sum", "origLength": "first"}
            )

            # Anything within 1 meter of original length is considered unchanged
            # This is so that we ignore slivers
            length["unchanged"] = (length.origLength - length["length"]).abs() < 1
            unchanged = (
                length[["unchanged"]]
                .reset_index()
                .groupby(["lineID", "wbID"])
                .unchanged.max()
                .rename("max_unchanged")
            )
            unchanged = (
                length.reset_index().set_index(["lineID", "wbID"]).join(unchanged)
            )
            is_within = (
                unchanged.loc[unchanged.max_unchanged]
                .reset_index()
                .set_index(["lineID", "wbID"])
                .iswithin
            )

            # For any that are unchanged and NOT within waterbodies,
            # remove them from wb_joins
            ix = is_within.loc[~is_within].index
            wb_joins = wb_joins.loc[~wb_joins.index.isin(ix)].copy()

            # Remove any that are unchanged from intersection analysis
            geoms = geoms.loc[~geoms.index.isin(is_within.index)].copy()

            print(
                "Created {:,} new flowlines by splitting {:,} flowlines at waterbody edges in {:.2f}".format(
                    len(geoms),
                    len(geoms.index.get_level_values(0).unique()),
                    time() - cut_start,
                )
            )

            if len(geoms) > 1:
                ### These are our final new lines to add
                # remove their lineIDs from flowlines and append
                # replace their outer joins to these ones and add intermediates

                # Join in previous line information from flowlines
                new_lines = (
                    geoms[["geometry", "length", "iswithin"]]
                    .reset_index()
                    .set_index("lineID")
                    .join(flowlines.drop(columns=["geometry", "length", "sinuosity"]))
                    .reset_index()
                    .rename(columns={"lineID": "origLineID", "iswithin": "waterbody"})
                )

                error = (
                    new_lines.groupby("origLineID").wbID.unique().apply(len).max() > 1
                )
                if error:
                    # Watch for errors - if a flowline is cut by multiple waterbodies
                    # there will be problems with our logic for splicing in new lines
                    # also - our intersection logic above is wrong
                    print(
                        """\n========\n
                    MAJOR LOGIC ERROR: multiple waterbodies associated with a single flowline that as been cut.
                    \n========\n
                    """
                    )

                # recalculate length and sinuosity
                new_lines["length"] = pg.length(new_lines.geometry).astype("float32")
                new_lines["sinuosity"] = calculate_sinuosity(new_lines.geometry).astype(
                    "float32"
                )

                # calculate new IDS
                next_segment_id = int(flowlines.index.max() + 1)
                new_lines["lineID"] = next_segment_id + new_lines.index
                new_lines.lineID = new_lines.lineID.astype("uint32")

                ### Update waterbody joins
                # remove joins replaced by above
                ix = new_lines.set_index(["origLineID", "wbID"]).index
                wb_joins = wb_joins.loc[~wb_joins.index.isin(ix)].copy()

                # add new joins
                wb_joins = (
                    wb_joins.reset_index()
                    .append(
                        new_lines.loc[new_lines.waterbody, ["lineID", "wbID"]],
                        ignore_index=True,
                        sort=False,
                    )
                    .set_index(["lineID", "wbID"])
                )

                ### Update flowline joins
                # transform new lines to create new joins
                l = new_lines.groupby("origLineID").lineID
                # the first new line per original line is the furthest upstream, so use its
                # ID as the new downstream ID for anything that had this origLineID as its downstream
                first = l.first().rename("new_downstream_id")
                # the last new line per original line is the furthest downstream...
                last = l.last().rename("new_upstream_id")

                # Update existing joins with the new lineIDs we created at the upstream or downstream
                # ends of segments we just created
                joins = update_joins(
                    joins,
                    first,
                    last,
                    downstream_col="downstream_id",
                    upstream_col="upstream_id",
                )

                ### Create new line joins for any that weren't inserted above
                # Transform all groups of new line IDs per original lineID, wbID
                # into joins structure
                pairs = lambda a: pd.Series(zip(a[:-1], a[1:]))
                new_joins = (
                    new_lines.groupby(["origLineID", "wbID"])
                    .lineID.apply(pairs)
                    .apply(pd.Series)
                    .reset_index()
                    .rename(columns={0: "upstream_id", 1: "downstream_id"})
                    .join(
                        flowlines[["NHDPlusID", "loop"]].rename(
                            columns={"NHDPlusID": "upstream"}
                        ),
                        on="origLineID",
                    )
                )
                # NHDPlusID is same for both sides
                new_joins["downstream"] = new_joins.upstream
                new_joins["type"] = "internal"
                new_joins = new_joins[
                    [
                        "upstream",
                        "downstream",
                        "upstream_id",
                        "downstream_id",
                        "type",
                        "loop",
                    ]
                ]

                joins = joins.append(
                    new_joins, ignore_index=True, sort=False
                ).sort_values(["downstream_id", "upstream_id"])

                ### Update flowlines
                # remove originals now replaced by cut versions here
                flowlines = (
                    flowlines.loc[~flowlines.index.isin(new_lines.origLineID)]
                    .reset_index()
                    .append(
                        new_lines[["lineID"] + list(flowlines.columns) + ["waterbody"]],
                        ignore_index=True,
                        sort=False,
                    )
                    .sort_values("lineID")
                    .set_index("lineID")
                )

                # End cut geometries

    # Update waterbody bool for other flowlines based on those that completely intersected
    # above
    flowlines.loc[
        flowlines.index.isin(wb_joins.index.get_level_values(0).unique()), "waterbody"
    ] = True
    flowlines.waterbody = flowlines.waterbody.fillna(False)

    ### Update waterbodies and calculate flowline stats
    wb_joins = wb_joins.reset_index()
    stats = (
        wb_joins.join(flowlines.length.rename("flowlineLength"), on="lineID")
        .groupby("wbID")
        .flowlineLength.sum()
        .astype("float32")
    )
    waterbodies = waterbodies.loc[waterbodies.index.isin(wb_joins.wbID)].join(stats)

    print("Done cutting flowlines by waterbodies in {:.2f}s".format(time() - start))

    return flowlines, joins, waterbodies, wb_joins