def cut_lines_at_multipoints(lines, points, tolerance=1e-6): """Wraps cut_line_at_points to take array inputs. Points will be projected onto the line; those interior to the line will be used to cut the line in to new segments. Parameters ---------- lines : ndarray of pygeos Linestrings cut_points : ndarray of pygeos MultiPoints tolerance : float, optional (default: 1e-6) minimum distance from endpoints to consider the points interior to the line. Returns ------- ndarray of MultiLineStrings (or LineString, if unchanged) """ out = np.empty(shape=len(lines), dtype="object") for i in range(len(lines)): new_line = cut_line_at_points(lines[i], pg.get_parts(points[i]), tolerance=tolerance) out[i] = new_line return out
def test_get_parts(geom): expected_num_parts = pygeos.get_num_geometries(geom) expected_parts = pygeos.get_geometry(geom, range(0, expected_num_parts)) parts = pygeos.get_parts(geom) assert len(parts) == expected_num_parts assert np.all(pygeos.equals_exact(parts, expected_parts))
def transform_geometry(self, geom, rs, max_points=5): """Transforms a geometry embedding new points. In case geom is (multi)line or (multi)polygon, it adds points collinear to their neighbours, so that an equivalent geometry is generated. The number of extra points depends on the number of vertices in the geometry. Arguments: geom (pygeos.Geometry): Geometry rs (numpy.RandomState): Random State max_points (int): Maximum value of extra points. Returns: (pygeos.Geometry) Raises: ValueError: When geometry type is not supported. """ type_ = pg.get_type_id(geom) if type_ == 1 or type_ == 3: # LINESTRING or POLYGON vertices = pg.get_coordinates(geom) size = min(max_points, math.ceil(len(vertices) / 6)) vert_ids = rs.randint(1, len(vertices), size) vert_ids.sort() new = [] for idx in vert_ids: xa, ya = vertices[idx - 1] xb, yb = vertices[idx] if xa == xb: x = xa y = self._random_float(rs, ya, yb) else: x = self._random_float(rs, xa, xb) y = (yb - ya) * (x - xa) / (xb - xa) + ya x = _round(x, [xa, xb]) y = _round(y, [ya, yb]) new.append((idx, [x, y])) offset = 0 extended = [] for idx, entry in new: extended.extend(vertices[offset:idx]) extended.append(entry) offset = idx extended.extend(vertices[offset:]) extended = np.array(extended) result = pg.linestrings(extended) if type_ == 1 else pg.polygons( extended) elif type_ == 5 or type_ == 6: # MULTILINESTRING or MULTIPOLYGON parts = pg.get_parts(geom) part_idx = rs.randint(0, len(parts)) parts[part_idx] = self.transform_geometry(parts[part_idx], rs) result = pg.multilinestrings( parts) if type_ == 5 else pg.multipolygons(parts) else: raise ValueError( 'geom should be linestring, polygon, multilinestring, or multipolygon.' ) return result
def test_get_parts_geometry_collection_multi(): """On the first pass, the individual Multi* geometry objects are returned from the collection. On the second pass, the individual singular geometry objects within those are returned. """ geom = pygeos.geometrycollections([multi_point, multi_line_string, multi_polygon]) expected_num_parts = pygeos.get_num_geometries(geom) expected_parts = pygeos.get_geometry(geom, range(0, expected_num_parts)) parts = pygeos.get_parts(geom) assert len(parts) == expected_num_parts assert np.all(pygeos.equals_exact(parts, expected_parts)) expected_subparts = [] for g in np.asarray(expected_parts): for i in range(0, pygeos.get_num_geometries(g)): expected_subparts.append(pygeos.get_geometry(g, i)) subparts = pygeos.get_parts(parts) assert len(subparts) == len(expected_subparts) assert np.all(pygeos.equals_exact(subparts, expected_subparts))
def test_get_parts_array(): # note: this also verifies that None is handled correctly # in the mix; internally it returns -1 for count of geometries geom = np.array([None, empty_line_string, multi_point, point, multi_polygon]) expected_parts = [] for g in geom: for i in range(0, pygeos.get_num_geometries(g)): expected_parts.append(pygeos.get_geometry(g, i)) parts = pygeos.get_parts(geom) assert len(parts) == len(expected_parts) assert np.all(pygeos.equals_exact(parts, expected_parts))
def test_get_parts_return_index(): geom = np.array([multi_point, point, multi_polygon]) expected_parts = [] expected_index = [] for i, g in enumerate(geom): for j in range(0, pygeos.get_num_geometries(g)): expected_parts.append(pygeos.get_geometry(g, j)) expected_index.append(i) parts, index = pygeos.get_parts(geom, return_index=True) assert len(parts) == len(expected_parts) assert np.all(pygeos.equals_exact(parts, expected_parts)) assert np.array_equal(index, expected_index)
def explode(df, add_position=False): """Explode a GeoDataFrame containing multi* geometries into single parts. Note: GeometryCollections of Multi* may need to be exploded a second time. Parameters ---------- df : GeoDataFrame add_position : bool, optional (default: False) if True, adds inner index within original geometries Returns ------- GeoDataFrame """ join_cols = [c for c in df.columns if not c == "geometry"] crs = df.crs geom, outer_index = pg.get_parts(df.geometry.values.data, return_index=True) if not add_position: return gp.GeoDataFrame(df[join_cols].take(outer_index), geometry=geom, crs=df.crs) if len(outer_index): # generate inner index as a range per value of outer_idx # identify the start of each run of values in outer_idx run_start = np.r_[True, outer_index[:-1] != outer_index[1:]] # count the number of values in each run counts = np.diff(np.r_[np.nonzero(run_start)[0], len(outer_index)]) # increment values for each value in each run after run start inner_index = (~run_start).cumsum() # decrement these so that each run is a range that starts at 0 inner_index -= np.repeat(inner_index[run_start], counts) else: inner_index = [] df = df[join_cols].take(outer_index) df["position"] = inner_index return gp.GeoDataFrame(df, geometry=geom, crs=crs)
def explode(df): """Explode a GeoDataFrame containing multi* geometries into single parts. Note: GeometryCollections of Multi* may need to be exploded a second time. Parameters ---------- df : GeoDataFrame Returns ------- GeoDataFrame """ join_cols = [c for c in df.columns if not c == "geometry"] geom, index = pg.get_parts(df.geometry.values.data, return_index=True) return gp.GeoDataFrame(df[join_cols].take(index), geometry=geom, crs=df.crs)
def get_angles(collection, return_indices=False): """ Get the angles pertaining to each vertex of a set of polygons. This assumes the input are polygons. Arguments --------- ga : pygeos geometry array array of polygons/multipolygons return_indices : bool (Default: False) whether to return the indices relating each geometry to a polygon Returns ------- angles between triples of points on each geometry, as well as the indices relating angles to input geometries (if requested). See the Notes for information on the shape of angles and indices. Notes ------- If a geometry has n coordinates and k parts, the array will be n - k. If each geometry has n_i coordinates, then let N be a vector storing those counts (computed, for example, using pygeos.get_num_coordinates(ga)). Likewise, let K be a vector storing the number of parts each geometry has, k_i (computed, for example, using pygeos.get_num_geometries(ga)) Then, the output is of shape (N - K).sum() """ ga = _cast(collection) exploded = pygeos.get_parts(ga) coords = pygeos.get_coordinates(exploded) n_coords_per_geom = pygeos.get_num_coordinates(exploded) n_parts_per_geom = pygeos.get_num_geometries(exploded) angles = numpy.asarray(_get_angles(coords, n_coords_per_geom)) if return_indices: return angles, numpy.repeat( numpy.arange(len(ga)), pygeos.get_num_coordinates(ga) - pygeos.get_num_geometries(ga), ) else: return angles
def setup(self): # create irregular polygons my merging overlapping point buffers self.polygons = pygeos.get_parts( pygeos.union_all( pygeos.buffer(pygeos.points(np.random.random((2000, 2)) * 500), 5))) self.tree = pygeos.STRtree(self.polygons) # initialize the tree by making a tiny query first self.tree.query(pygeos.points(0, 0)) # create points that extend beyond the domain of the above polygons to ensure # some don't overlap self.points = pygeos.points((np.random.random((2000, 2)) * 750) - 125) self.point_tree = pygeos.STRtree( pygeos.points(np.random.random((2000, 2)) * 750)) self.point_tree.query(pygeos.points(0, 0)) # create points on a grid for testing equidistant nearest neighbors # creates 2025 points grid_coords = np.mgrid[:45, :45].T.reshape(-1, 2) self.grid_point_tree = pygeos.STRtree(pygeos.points(grid_coords)) self.grid_points = pygeos.points(grid_coords + 0.5)
def second_areal_moment(collection): """ Using equation listed on en.wikipedia.org/Second_Moment_of_area, the second moment of area is actually the cross-moment of area between the X and Y dimensions: I_xy = (1/24)\sum^{i=N}^{i=1} (x_iy_{i+1} + 2*x_iy_i + 2*x_{i+1}y_{i+1} + x_{i+1}y_i)(x_iy_i - x_{i+1}y_i) where x_i, y_i is the current point and x_{i+1}, y_{i+1} is the next point, and where x_{n+1} = x_1, y_{n+1} = 1. This relation is known as the: - second moment of area - moment of inertia of plane area - area moment of inertia - second area moment and is *not* the mass moment of inertia, a property of the distribution of mass around a shape. """ ga = _cast(collection) result = numpy.zeros(len(ga)) n_holes_per_geom = pygeos.get_num_interior_rings(ga) for i, geometry in enumerate(ga): n_holes = n_holes_per_geom[i] for hole_ix in range(n_holes): hole = pygeos.get_coordinates(pygeos.get_interior_ring( ga, hole_ix)) result[i] -= _second_moa_ring(hole) n_parts = pygeos.get_num_geometries(geometry) for part in pygeos.get_parts(geometry): result[i] += _second_moa_ring(pygeos.get_coordinates(part)) # must divide everything by 24 and flip if polygon is clockwise. signflip = numpy.array([-1, 1])[pygeos.is_ccw(ga).astype(int)] return result * (1 / 24) * signflip
def explode(self): """ Explode multi-part geometries into multiple single geometries. Single rows can become multiple rows. This is analogous to PostGIS's ST_Dump(). The 'path' index is the second level of the returned MultiIndex Returns ------ A GeoSeries with a MultiIndex. The levels of the MultiIndex are the original index and a zero-based integer index that counts the number of single geometries within a multi-part geometry. Examples -------- >>> from shapely.geometry import MultiPoint >>> s = geopandas.GeoSeries( ... [MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3), (4, 4)])] ... ) >>> s 0 MULTIPOINT (0.00000 0.00000, 1.00000 1.00000) 1 MULTIPOINT (2.00000 2.00000, 3.00000 3.00000, ... dtype: geometry >>> s.explode() 0 0 POINT (0.00000 0.00000) 1 POINT (1.00000 1.00000) 1 0 POINT (2.00000 2.00000) 1 POINT (3.00000 3.00000) 2 POINT (4.00000 4.00000) dtype: geometry See also -------- GeoDataFrame.explode """ if compat.USE_PYGEOS and compat.PYGEOS_GE_09: import pygeos # noqa geometries, outer_idx = pygeos.get_parts(self.values.data, return_index=True) if len(outer_idx): # Generate inner index as a range per value of outer_idx # 1. identify the start of each run of values in outer_idx # 2. count number of values per run # 3. use cumulative sums to create an incremental range # starting at 0 in each run run_start = np.r_[True, outer_idx[:-1] != outer_idx[1:]] counts = np.diff(np.r_[np.nonzero(run_start)[0], len(outer_idx)]) inner_index = (~run_start).cumsum() inner_index -= np.repeat(inner_index[run_start], counts) else: inner_index = [] # extract original index values based on integer index outer_index = self.index.take(outer_idx) index = zip(outer_index, inner_index) # if self.index is a MultiIndex then index is a list of nested tuples if isinstance(self.index, MultiIndex): index = [tuple(outer) + (inner, ) for outer, inner in index] index = MultiIndex.from_tuples(index, names=self.index.names + [None]) return GeoSeries(geometries, index=index, crs=self.crs).__finalize__(self) # else PyGEOS is not available or version <= 0.8 index = [] geometries = [] for idx, s in self.geometry.iteritems(): if s.type.startswith("Multi") or s.type == "GeometryCollection": geoms = s.geoms idxs = [(idx, i) for i in range(len(geoms))] else: geoms = [s] idxs = [(idx, 0)] index.extend(idxs) geometries.extend(geoms) # if self.index is a MultiIndex then index is a list of nested tuples if isinstance(self.index, MultiIndex): index = [tuple(outer) + (inner, ) for outer, inner in index] index = MultiIndex.from_tuples(index, names=self.index.names + [None]) return GeoSeries(geometries, index=index, crs=self.crs).__finalize__(self)
def test_get_parts_invalid_geometry(geom): with pytest.raises(TypeError, match="One of the arguments is of incorrect type."): pygeos.get_parts(geom)
def test_get_parts_None(geom): assert len(pygeos.get_parts(geom)) == 0
def test_get_parts_non_multi(geom): """Non-multipart geometries should be returned identical to inputs""" assert np.all(pygeos.equals_exact(np.asarray(geom), pygeos.get_parts(geom)))
def create_voronoi( points: Sequence[pygeos.Geometry]) -> Sequence[pygeos.Geometry]: mp = pygeos.multipoints(points) polys = pygeos.get_parts(pygeos.voronoi_polygons(mp)) convex_hull = pygeos.buffer(pygeos.convex_hull(mp), 2) return pygeos.intersection(convex_hull, polys)
def union_or_combine(geometries, grid_size=None, op="union"): """First does a check for overlap of geometries according to STRtree intersects. If any overlap, then will use union_all on all of them; otherwise will return as a multipolygon. If only one polygon is present, it will be returned in a MultiPolygon. If coverage_union op is provided, geometries must be polygons and topologically related or this will produce bad output or fail outright. See docs for coverage_union in GEOS. Parameters ---------- geometries : ndarray of single part polygons grid_size : [type], optional (default: None) provided to union_all; otherwise no effect op : str, one of {'union', 'coverage_union'} Returns ------- MultiPolygon """ if not (pg.get_type_id(geometries) == 3).all(): print("Inputs to union or combine must be single-part geometries") if len(geometries) == 1: return pg.multipolygons(geometries) tree = pg.STRtree(geometries) left, right = tree.query_bulk(geometries, predicate="intersects") # drop self intersections ix = left != right left = left[ix] right = right[ix] # no intersections, just combine parts if len(left) == 0: return pg.multipolygons(geometries) # find groups of contiguous geometries and union them together individually contiguous = np.sort(np.unique(np.concatenate([left, right]))) discontiguous = np.setdiff1d(np.arange(len(geometries), dtype="uint"), contiguous) groups = find_adjacent_groups(left, right) parts = [] if op == "coverage_union": for group in groups: parts.extend( pg.get_parts(pg.coverage_union_all(geometries[list(group)]))) else: for group in groups: parts.extend( pg.get_parts( pg.union_all(geometries[list(group)], grid_size=grid_size))) parts.extend(pg.get_parts(geometries[discontiguous])) return pg.multipolygons(parts)
# assign altered if any resulting polygons intersect altered polygons tree = pg.STRtree(df.geometry.values.data) left, right = tree.query_bulk(altered.geometry.values.data) df["altered"] = False df.loc[np.unique(right), "altered"] = True # cut at breaks from NHD nhd_lines_filename = nhd_dir / huc2 / "nhd_lines.feather" if nhd_lines_filename.exists(): print("Checking for breaks between adjacent waterbodies") nhd_lines = gp.read_feather(nhd_lines_filename).geometry.values.data breaks = find_nhd_waterbody_breaks(nhd.geometry.values.data, nhd_lines) if breaks is not None: breaks = pg.get_parts(breaks) write_geoms(breaks, f"/tmp/{huc2}breaks.gpkg", crs=nhd.crs) print( f"Cutting NHD waterbodies by {len(breaks):,} breaks at dams to prevent dissolving together" ) # find all pairs of waterbody and breaks, aggregate # breaks by waterbody, then calculate difference tree = pg.STRtree(df.geometry.values.data) left, right = tree.query_bulk(breaks, predicate="intersects") pairs = pd.DataFrame( {"break_geometry": breaks.take(left)}, index=df.index.take(right) ) grouped = pairs.groupby(level=0).break_geometry.apply( lambda g: pg.multipolygons(g.values.data)
def explode(self, ignore_index=False, index_parts=None): """ Explode multi-part geometries into multiple single geometries. Single rows can become multiple rows. This is analogous to PostGIS's ST_Dump(). The 'path' index is the second level of the returned MultiIndex Parameters ---------- ignore_index : bool, default False If True, the resulting index will be labelled 0, 1, …, n - 1, ignoring `index_parts`. index_parts : boolean, default True If True, the resulting index will be a multi-index (original index with an additional level indicating the multiple geometries: a new zero-based index for each single part geometry per multi-part geometry). Returns ------- A GeoSeries with a MultiIndex. The levels of the MultiIndex are the original index and a zero-based integer index that counts the number of single geometries within a multi-part geometry. Examples -------- >>> from shapely.geometry import MultiPoint >>> s = geopandas.GeoSeries( ... [MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3), (4, 4)])] ... ) >>> s 0 MULTIPOINT (0.00000 0.00000, 1.00000 1.00000) 1 MULTIPOINT (2.00000 2.00000, 3.00000 3.00000, ... dtype: geometry >>> s.explode(index_parts=True) 0 0 POINT (0.00000 0.00000) 1 POINT (1.00000 1.00000) 1 0 POINT (2.00000 2.00000) 1 POINT (3.00000 3.00000) 2 POINT (4.00000 4.00000) dtype: geometry See also -------- GeoDataFrame.explode """ if index_parts is None and not ignore_index: warnings.warn( "Currently, index_parts defaults to True, but in the future, " "it will default to False to be consistent with Pandas. " "Use `index_parts=True` to keep the current behavior and True/False " "to silence the warning.", FutureWarning, stacklevel=2, ) index_parts = True if compat.USE_PYGEOS and compat.PYGEOS_GE_09: import pygeos # noqa geometries, outer_idx = pygeos.get_parts(self.values.data, return_index=True) if len(outer_idx): # Generate inner index as a range per value of outer_idx # 1. identify the start of each run of values in outer_idx # 2. count number of values per run # 3. use cumulative sums to create an incremental range # starting at 0 in each run run_start = np.r_[True, outer_idx[:-1] != outer_idx[1:]] counts = np.diff(np.r_[np.nonzero(run_start)[0], len(outer_idx)]) inner_index = (~run_start).cumsum() inner_index -= np.repeat(inner_index[run_start], counts) else: inner_index = [] # extract original index values based on integer index outer_index = self.index.take(outer_idx) if ignore_index: index = range(len(geometries)) elif index_parts: nlevels = outer_index.nlevels index_arrays = [ outer_index.get_level_values(lvl) for lvl in range(nlevels) ] index_arrays.append(inner_index) index = MultiIndex.from_arrays(index_arrays, names=self.index.names + [None]) else: index = outer_index return GeoSeries(geometries, index=index, crs=self.crs).__finalize__(self) # else PyGEOS is not available or version <= 0.8 index = [] geometries = [] for idx, s in self.geometry.iteritems(): if s.type.startswith("Multi") or s.type == "GeometryCollection": geoms = s.geoms idxs = [(idx, i) for i in range(len(geoms))] else: geoms = [s] idxs = [(idx, 0)] index.extend(idxs) geometries.extend(geoms) if ignore_index: index = range(len(geometries)) elif index_parts: # if self.index is a MultiIndex then index is a list of nested tuples if isinstance(self.index, MultiIndex): index = [tuple(outer) + (inner, ) for outer, inner in index] index = MultiIndex.from_tuples(index, names=self.index.names + [None]) else: index = [idx for idx, _ in index] return GeoSeries(geometries, index=index, crs=self.crs).__finalize__(self)
def test_get_parts_invalid_dimensions(geom): """Only 1D inputs are supported""" with pytest.raises(ValueError, match="Array should be one dimensional"): pygeos.get_parts(geom)
def time_get_parts(self): """Cython implementation of get_parts""" pygeos.get_parts(self.multipolygons)
def find_nhd_waterbody_breaks(geometries, nhd_lines): """Some large waterbody complexes are divided by dams; these breaks need to be preserved. This is done by finding the shared edges between adjacent waterbodies that fall near NHD lines (which include dams) and buffering them by 10 meters (arbitrary, from trial and error). This should be skipped if nhd_lines is empty. Parameters ---------- df : GeoDataFrame nhd_lines : GeoDataFrame Returns ------- MultiPolygon containing all buffered lines between waterbodies that are near NHD lines. Returns None if no adjacent waterbodies meet these criteria """ # find all nhd lines that intersect waterbodies # first, buffer them slightly nhd_lines = pg.get_parts(pg.union_all(pg.buffer(nhd_lines, 0.1))) tree = pg.STRtree(geometries) left, right = tree.query_bulk(nhd_lines, predicate="intersects") # add these to the return keep_nhd_lines = nhd_lines[np.unique(left)] # find connected boundaries boundaries = pg.polygons(pg.get_exterior_ring(geometries)) tree = pg.STRtree(boundaries) left, right = tree.query_bulk(boundaries, predicate="intersects") # drop self intersections ix = left != right left = left[ix] right = right[ix] # extract unique pairs (dedup symmetric pairs) pairs = np.array([left, right]).T pairs = ( pd.DataFrame({"left": pairs.min(axis=1), "right": pairs.max(axis=1)}) .groupby(["left", "right"]) .first() .reset_index() ) # calculate geometric intersection i = pg.intersection( geometries.take(pairs.left.values), geometries.take(pairs.right.values) ) # extract individual parts (may be geom collections) parts = pg.get_parts(pg.get_parts(pg.get_parts(i))) # extract only the lines or polygons t = pg.get_type_id(parts) parts = parts[((t == 1) | (t == 3)) & (~pg.is_empty(parts))].copy() # buffer and merge split_lines = pg.get_parts(pg.union_all(pg.buffer(parts, 10))) # now find the ones that are within 100m of nhd lines nhd_lines = pg.get_parts(nhd_lines) tree = pg.STRtree(nhd_lines) left, right = tree.nearest_all(split_lines, max_distance=100) split_lines = split_lines[np.unique(left)] if len(split_lines) or len(keep_nhd_lines): return pg.union_all(np.append(split_lines, keep_nhd_lines)) return None