Esempio n. 1
0
def _prepare_input(geometry, prepare):
    """Prepare without modifying inplace"""
    if prepare:
        geometry = pygeos.apply(geometry, lambda x: x)  # makes a copy
        pygeos.prepare(geometry)
        return geometry
    else:
        return geometry
Esempio n. 2
0
def test_destroy_prepared():
    arr = np.array([pygeos.points(1, 1), None, pygeos.box(0, 0, 1, 1)])
    pygeos.prepare(arr)
    assert arr[0]._ptr_prepared != 0
    assert arr[2]._ptr_prepared != 0
    pygeos.destroy_prepared(arr)
    assert arr[0]._ptr_prepared == 0
    assert arr[1] is None
    assert arr[2]._ptr_prepared == 0
    pygeos.destroy_prepared(arr)  # does not error
Esempio n. 3
0
def test_query_bulk_with_prepared(tree):
    geom = np.array([box(0, 0, 1, 1), box(3, 3, 5, 5)])
    expected = tree.query_bulk(geom, predicate="intersects")

    # test with array of partially prepared geometries
    pygeos.prepare(geom[0])
    assert_array_equal(expected, tree.query_bulk(geom, predicate="intersects"))

    # test with fully prepared geometries
    pygeos.prepare(geom)
    assert_array_equal(expected, tree.query_bulk(geom, predicate="intersects"))
Esempio n. 4
0
def test_prepare():
    arr = np.array([pygeos.points(1, 1), None, pygeos.box(0, 0, 1, 1)])
    assert arr[0]._ptr_prepared == 0
    assert arr[2]._ptr_prepared == 0
    pygeos.prepare(arr)
    assert arr[0]._ptr_prepared != 0
    assert arr[1] is None
    assert arr[2]._ptr_prepared != 0

    # preparing again actually does nothing
    original = arr[0]._ptr_prepared
    pygeos.prepare(arr)
    assert arr[0]._ptr_prepared == original
Esempio n. 5
0
 def __init__(self, bdy):
     """
     Construct rich "Boundary" type. Input "bdy" can either be:
         (1) GlobalSmoothBoundary type (from pybie2d)
         (2) complex form of (x, y) points of boundary
     """
     if type(bdy) == GSB:
         self.GSB = bdy
     else:
         self.GSB = GSB(c=bdy)
     self.c = self.GSB.c
     self.x = self.GSB.x
     self.y = self.GSB.y
     self.SH = pygeos.polygons([*zip(self.x, self.y)])
     pygeos.prepare(self.SH)
    def _pandas(cls, column, **kwargs):

        shape = kwargs.get("shape")
        shape_format = kwargs.get("shape_format")
        column_shape_format = kwargs.get("column_shape_format")

        # Check that shape is given and given in the correct format
        if shape is not None:
            try:
                if shape_format == "wkt":
                    shape_ref = geos.from_wkt(shape)
                elif shape_format == "wkb":
                    shape_ref = geos.from_wkb(shape)
                elif shape_format == "geojson":
                    shape_ref = geos.from_geojson(shape)
                else:
                    raise NotImplementedError(
                        "Shape constructor method not implemented. Must be in WKT, WKB, or GeoJSON format."
                    )
            except:
                raise Exception("A valid reference shape was not given.")
        else:
            raise Exception("A shape must be provided for this method.")

        # Load the column into a pygeos Geometry vector from numpy array (Series not supported).
        if column_shape_format == "wkt":
            shape_test = geos.from_wkt(column.to_numpy(), on_invalid="ignore")
        elif column_shape_format == "wkb":
            shape_test = geos.from_wkb(column.to_numpy(), on_invalid="ignore")
        else:
            raise NotImplementedError(
                "Column values shape format not implemented.")

        # Allow for an array of reference shapes to be provided. Return a union of all the shapes in the array (Polygon or Multipolygon)
        shape_ref = geos.union_all(shape_ref)

        # Prepare the geometries
        geos.prepare(shape_ref)
        geos.prepare(shape_test)
        column_centroids = geos.centroid(shape_test)

        print(column_centroids)

        return pd.Series(geos.within(column_centroids, shape_ref))
Esempio n. 7
0
def _prepare_with_copy(geometry):
    """Prepare without modifying inplace"""
    geometry = pygeos.apply(geometry, lambda x: x)  # makes a copy
    pygeos.prepare(geometry)
    return geometry
Esempio n. 8
0
 def _sanitize(x):
     if len(x.shape) > 1:
         for i in range(1, len(x.shape)):
             x = x.squeeze(i)
     prepare(x)
     return x
Esempio n. 9
0
def test_query_with_prepared(tree):
    geom = box(0, 0, 1, 1)
    expected = tree.query(geom, predicate="intersects")

    pygeos.prepare(geom)
    assert_array_equal(expected, tree.query(geom, predicate="intersects"))
Esempio n. 10
0
def cut_lines_by_waterbodies(flowlines, joins, waterbodies, next_lineID):
    """
    Cut lines by waterbodies.
    1. Finds all intersections between waterbodies and flowlines.
    2. For those that cross but are not completely contained by waterbodies, cut them.
    3. Evaluate the cuts, only those that have substantive cuts inside and outside are retained as cuts.
    4. Any flowlines that are not contained or crossing waterbodies are dropped from wb_joins

    Parameters
    ----------
    flowlines : GeoDataFrame
    joins : DataFrame
        flowline joins
    waterbodies : GeoDataFrame
    next_lineID : int
        next lineID; must be greater than all prior lines in region

    Returns
    -------
    tuple of (GeoDataFrame, DataFrame, GeoDataFrame, DataFrame)
        (flowlines, joins, waterbodies, waterbody joins)
    """

    start = time()

    ### Find flowlines that intersect waterbodies

    join_start = time()
    tree = pg.STRtree(flowlines.geometry.values.data)
    left, right = tree.query_bulk(waterbodies.geometry.values.data,
                                  predicate="intersects")
    df = pd.DataFrame({
        "lineID": flowlines.index.take(right),
        "flowline": flowlines.geometry.values.data.take(right),
        "wbID": waterbodies.index.take(left),
        "waterbody": waterbodies.geometry.values.data.take(left),
    })
    print(
        f"Found {len(df):,} waterbody / flowline joins in {time() - join_start:.2f}s"
    )

    ### Find those that are completely contained; these don't need further processing
    pg.prepare(df.waterbody.values)

    # find those that are fully contained and do not touch the edge of the waterbody (contains_properly predicate)
    # contains_properly is very fast
    contained_start = time()
    df["contains"] = pg.contains_properly(df.waterbody.values,
                                          df.flowline.values)
    print(
        f"Identified {df.contains.sum():,} flowlines fully within waterbodies in {time() - contained_start:.2f}s"
    )

    # find those that aren't fully contained by contained and touch the edge of waterbody (contains predicate)
    contained_start = time()
    ix = ~df.contains
    tmp = df.loc[ix]
    df.loc[ix, "contains"] = pg.contains(tmp.waterbody, tmp.flowline)
    print(
        f"Identified {df.loc[ix].contains.sum():,} more flowlines contained by waterbodies in {time() - contained_start:.2f}s"
    )

    # Sanity check: flowlines should only ever be contained by one waterbody
    if df.loc[df.contains].groupby("lineID").size().max() > 1:
        raise ValueError(
            "ERROR: one or more lines contained by multiple waterbodies")

    # for any that are not completely contained, find the ones that overlap
    crosses_start = time()
    df["crosses"] = False
    ix = ~df.contains
    tmp = df.loc[ix]
    df.loc[ix, "crosses"] = pg.crosses(tmp.waterbody, tmp.flowline)
    print(
        f"Identified {df.crosses.sum():,} flowlines that cross edge of waterbodies in {time() - crosses_start:.2f}s"
    )

    # discard any that only touch (ones that don't cross or are contained)
    # note that we only cut the ones that cross below; contained ones are left intact
    df = df.loc[df.contains | df.crosses].copy()

    print("Intersecting flowlines and waterbodies...")
    cut_start = time()
    ix = df.crosses
    tmp = df.loc[ix]
    df["geometry"] = df.flowline
    # use intersection to cut flowlines by waterbodies.  Note: this may produce
    # nonlinear (e.g., geom collection) results
    df.loc[ix, "geometry"] = pg.intersection(tmp.flowline, tmp.waterbody)
    df["length"] = pg.length(df.geometry)
    df["flength"] = pg.length(df.flowline)

    # Cut lines that are long enough and different enough from the original lines
    df["to_cut"] = False
    tmp = df.loc[df.crosses]
    keep = (tmp.crosses
            & (tmp.length >= CUT_TOLERANCE)
            & ((tmp.flength - tmp.length).abs() >= CUT_TOLERANCE))
    df.loc[keep[keep].index, "to_cut"] = True
    df["inside"] = (df.length / df.flength).clip(0, 1)
    print(
        f"Found {df.to_cut.sum():,} segments that need to be cut by flowlines in {time() - cut_start:.2f}s"
    )

    # save all that are completely contained or mostly contained.
    # They must be at least 50% in waterbody to be considered mostly contained.
    # Note: there are some that are mostly outside and we exclude those here.
    # We then update this after cutting
    contained = df.loc[df.inside >= 0.5, ["wbID", "lineID"]].copy()

    ### Cut lines
    if df.to_cut.sum():
        # only work with those to cut from here on out
        df = df.loc[df.to_cut,
                    ["lineID", "flowline", "wbID", "waterbody"]].reset_index(
                        drop=True)

        # save waterbody ids to re-evaluate intersection after cutting
        wbID = df.wbID.unique()

        # extract all intersecting interior rings for these waterbodies
        print("Extracting interior rings for intersected waterbodies")
        wb = waterbodies.loc[waterbodies.index.isin(wbID)]
        outer_index, inner_index, rings = get_interior_rings(
            wb.geometry.values.data)
        if len(outer_index):
            # find the pairs of waterbody rings and lines to add
            rings = np.asarray(rings)
            wb_with_rings = wb.index.values.take(outer_index)
            lines_in_wb = df.loc[df.wbID.isin(wb_with_rings)].lineID.unique()
            lines_in_wb = flowlines.loc[flowlines.index.isin(
                lines_in_wb)].geometry
            tree = pg.STRtree(rings)
            left, right = tree.query_bulk(lines_in_wb.values.data,
                                          predicate="intersects")

            tmp = pd.DataFrame({
                "lineID": lines_in_wb.index.values.take(left),
                "flowline": lines_in_wb.values.data.take(left),
                "wbID": wb_with_rings.take(right),
                "waterbody": rings.take(right),
            })
            df = df.append(tmp, ignore_index=True, sort=False)

        # extract the outer ring for original waterbodies
        ix = pg.get_type_id(df.waterbody.values.data) == 3
        df.loc[ix, "waterbody"] = pg.get_exterior_ring(
            df.loc[ix].waterbody.values.data)

        # Calculate all geometric intersections between the flowlines and
        # waterbody rings and drop any that are not points
        # Note: these may be multipoints where line crosses the ring of waterbody
        # multiple times.
        # We ignore any shared edges, etc that result from the intersection; those
        # aren't helpful for cutting the lines
        print("Finding cut points...")
        df["geometry"] = pg.intersection(df.flowline.values,
                                         df.waterbody.values)
        df = explode(
            explode(
                gp.GeoDataFrame(df[["geometry", "lineID", "flowline"]],
                                crs=flowlines.crs))).reset_index()
        points = (df.loc[pg.get_type_id(df.geometry.values.data) ==
                         0].set_index("lineID").geometry)

        print("cutting flowlines")
        cut_start = time()
        flowlines, joins = cut_flowlines_at_points(flowlines,
                                                   joins,
                                                   points,
                                                   next_lineID=next_lineID)
        new_flowlines = flowlines.loc[flowlines.new]

        print(
            f"{len(new_flowlines):,} new flowlines created in {time() - cut_start:,.2f}s"
        )

        if len(new_flowlines):
            # remove any flowlines no longer present (they were replaced by cut lines)
            contained = contained.loc[contained.lineID.isin(
                flowlines.loc[~flowlines.new].index.unique())].copy()

            contained_start = time()
            # recalculate overlaps with waterbodies
            print("Recalculating overlaps with waterbodies")
            wb = waterbodies.loc[wbID]
            tree = pg.STRtree(new_flowlines.geometry.values.data)
            left, right = tree.query_bulk(wb.geometry.values.data,
                                          predicate="intersects")

            df = pd.DataFrame({
                "lineID":
                new_flowlines.index.take(right),
                "flowline":
                new_flowlines.geometry.values.data.take(right),
                "wbID":
                wb.index.take(left),
                "waterbody":
                wb.geometry.values.data.take(left),
            })

            pg.prepare(df.waterbody.values)
            df["contains"] = pg.contains(df.waterbody.values,
                                         df.flowline.values)
            print(
                f"Identified {df.contains.sum():,} more flowlines contained by waterbodies in {time() - contained_start:.2f}s"
            )

            # some aren't perfectly contained, add those that are mostly in
            df["crosses"] = False
            ix = ~df.contains
            tmp = df.loc[ix]
            df.loc[ix, "crosses"] = pg.crosses(tmp.waterbody, tmp.flowline)

            # discard any that only touch (don't cross or are contained)
            df = df.loc[df.contains | df.crosses].copy()

            tmp = df.loc[df.crosses]
            df["geometry"] = df.flowline
            # use intersection to cut flowlines by waterbodies.  Note: this may produce
            # nonlinear (e.g., geom collection) results
            df.loc[ix, "geometry"] = pg.intersection(tmp.flowline,
                                                     tmp.waterbody)
            df["length"] = pg.length(df.geometry)
            df["flength"] = pg.length(df.flowline)

            # keep any that are contained or >= 50% in waterbody
            contained = contained.append(
                df.loc[df.contains | ((df.length / df.flength) >= 0.5),
                       ["wbID", "lineID"]],
                ignore_index=True,
            )

        flowlines = flowlines.drop(columns=["new"])

    # make sure that updated joins are unique
    joins = joins.drop_duplicates()

    # make sure that wb_joins is unique
    contained = contained.groupby(by=["lineID", "wbID"]).first().reset_index()

    # set flag for flowlines in waterbodies
    flowlines["waterbody"] = flowlines.index.isin(contained.lineID.unique())

    print("Done evaluating waterbody / flowline overlap in {:.2f}s".format(
        time() - start))

    return flowlines, joins, contained