def _get_matches(self, pts, S1_df, S2_df): def apply_matches(pt): S1_slice = S1_df.iloc[Q_S1.loc[Q_S1['pt_idx']==pt.name,'S1_idx'],:] #ah, perhaps here? index fuckup? S2_slice = S2_df.iloc[Q_S2.loc[Q_S2['pt_idx']==pt.name,'S2_idx'],:] if len(S2_slice)==0 or len(S1_slice)==0: return [] S2_slice['matches'] = S2_slice.apply(lambda el: S1_slice.loc[(S1_slice['beginposition']>(el['beginposition']-timedelta(days=self.CONFIG['day_offset'])))&(S1_slice['beginposition']<(el['beginposition']+timedelta(days=self.CONFIG['day_offset']))),:].index.values, axis=1) #print ('pre',S2_slice.columns, S2_slice.index.name) S2_slice = S2_slice.explode('matches').reset_index() S2_slice = S2_slice.loc[~S2_slice['matches'].isna()] #print ('post',S2_slice.columns, S2_slice.index.name) return S2_slice[['level1cpdiidentifier','matches']].values.tolist() S1_tree = pygeos.STRtree([pygeos.io.from_wkt(subshp) for subshp in S1_df['footprint'].values.tolist()]) S2_tree = pygeos.STRtree([pygeos.io.from_shapely(subshp) for subshp in S2_df['intersection_geom'].values.tolist()]) Q_S1 = pd.DataFrame(S1_tree.query_bulk([pygeos.io.from_shapely(pp) for pp in pts['bbox_wgs'].values.tolist()], predicate='within').T, columns=['pt_idx','S1_idx']) Q_S2 = pd.DataFrame(S2_tree.query_bulk([pygeos.io.from_shapely(pp) for pp in pts['bbox_wgs'].values.tolist()], predicate='within').T, columns=['pt_idx','S2_idx']) S2_df['beginposition'] = pd.to_datetime(S2_df['beginposition']) S1_df['beginposition'] = pd.to_datetime(S1_df['beginposition']) S1_df['endposition'] = pd.to_datetime(S1_df['endposition']) pts['matches'] = pts.apply(apply_matches, axis=1) return pts[pts['matches'].str.len()>0]
def prepare_possible_OD(gridDF, nodes, tolerance = 1): """Returns an array of tuples, with the first value the node ID to consider, and the second value the total population associated with this node. The tolerance is the size of the bounding box to search for nodes within Args: gridDF (pandas.DataFrame): A dataframe with the grid centroids and their population nodes (pandas.DataFrame): A dataframe of the road network nodes tolerance (float, optional): size of the bounding box . Defaults to 0.1. Returns: final_possible_pop (list): a list of tuples representing the nodes and their population """ nodeIDs = [] sindex = pyg.STRtree(nodes['geometry']) pos_OD_nodes = [] pos_tot_pop = [] for i in gridDF.itertuples(): ID = nearest(i.geometry, nodes, sindex, tolerance) #If a node was found if ID > -1: pos_OD_nodes.append(ID) pos_tot_pop.append(i.tot_pop) a = nodes.loc[nodes.id.isin(pos_OD_nodes)] #Create a geopackage of the possible ODs #with Geopackage('nodyBGR.gpkg', 'w') as out: # out.add_layer(a, name='finanod', crs='EPSG:4326') nodes = np.array([pos_OD_nodes]) node_unique = np.unique(nodes) count = np.array([pos_tot_pop]) #List comprehension to add total populations of recurring nodes final_possible_pop = [(i, count[nodes==i].sum()) for i in node_unique] return final_possible_pop
def poly_tree(): # create buffers so that midpoint between two buffers intersects # each buffer. NOTE: add EPS to help mitigate rounding errors at midpoint. geoms = pygeos.buffer(pygeos.points(np.arange(10), np.arange(10)), HALF_UNIT_DIAG + EPS, quadsegs=32) yield pygeos.STRtree(geoms)
def __init__(self, iso_geographies=None, conf=None): if not conf: self.CONFIG = yaml.load(open( os.path.join(os.getcwd(), 'conf', 'DATA_CONFIG.yaml'), 'r'), Loader=yaml.SafeLoader) else: self.CONFIG = yaml.load(open(conf, 'r'), Loader=yaml.SafeLoader) self.sentinelsat_auth = json.load(open(self.CONFIG['scihub_auth'], 'r'))['scihub'] countries = gpd.read_file( os.path.join(self.CONFIG['NE_ROOT'], 'ne_10m_countries.gpkg')) countries = countries[~countries['geometry'].isna()] if iso_geographies: countries = countries[countries['ISO_A2'].isin(iso_geographies)] self.tree = pygeos.STRtree([ pygeos.io.from_shapely(subshp) for subshp in list(countries['geometry'].unary_union) ]) self.S2_tiles = gpd.read_file( os.path.join(self.CONFIG['NE_ROOT'], 'S2_utm.gpkg')) # Get datastrip_ids after Nov-2019 from GCP buckets os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = self.CONFIG[ 'gcp_credentials_path'] self.client = storage.Client() self.bucketname = 'gcp-public-data-sentinel-2'
def test_flush_geometries(tree): arr = pygeos.points(np.arange(10), np.arange(10)) tree = pygeos.STRtree(arr) # Dereference geometries arr[:] = None # Still it does not lead to a segfault tree.query(point)
def mp_sjoin_dissolve(df_left, df_right, left_geom_column, right_geom_column, N_workers, logger, include_geometry=False): ### get and query tree using pygeos logger.info('Getting STRTree and querying') tree = pygeos.STRtree([pygeos.io.from_wkt(el) for el in df_right[right_geom_column].values]) Q = tree.query_bulk([pygeos.io.from_wkt(el) for el in df_left[left_geom_column].values]) logger.info(f'Got tree of with {Q.shape[1]} results, now verifying intersections.') # temp for prototyping # Q = Q[:,0:1200] logger.info('Assigning SharedMemory') left_shm, left_spec = to_shm(df_left[['unique_id',left_geom_column]],'left') right_shm, right_spec = to_shm(df_right[['unique_id',right_geom_column]],'right') # do multiprocess logger.info('Async SJoin') chunk = Q.shape[1]//N_workers +1 args = [(left_spec, right_spec, include_geometry, Q.T[ii*chunk:(ii+1)*chunk,:]) for ii in range(N_workers)] with mp.Pool(N_workers) as pool: res = pool.starmap(chunk_worker, args) res = [item for sublist in res for item in sublist] logger.info(f'obtained results {len(res)}') # release shared mem left_shm.close() left_shm.unlink() right_shm.close() right_shm.unlink() return res
def find_contiguous_groups(geometries): """Find all adjacent geometries Parameters ---------- geometries : ndarray of pygeos geometries Returns ------- DataFrame indexed on the integer index of geometries """ tree = pg.STRtree(geometries) left, right = tree.query_bulk(geometries, predicate="intersects") # drop self intersections ix = left != right left = left[ix] right = right[ix] groups = DirectedGraph.from_arrays(left, right).components() groups = (pd.DataFrame( {i: list(g) for i, g in enumerate(groups)}.items(), columns=["group", "index"], ).explode("index").set_index("index")) return groups
def spatial_join(points: Sequence[pygeos.Geometry], voronoi_polygons: Sequence[pygeos.Geometry]) -> Sequence[int]: query_result = pygeos.STRtree(voronoi_polygons).query_bulk( points, predicate="intersects") assert (query_result[0] == range(query_result[0].size) ).all() # check that the first array is a standard range return query_result[1]
def summarize_raster_by_geometry(geometries, extract_func, outfilename, progress_label="", bounds=None, **kwargs): """Summarize values of input dataset by geometry and writes results to a feather file, with one column for shape_mask and one for each raster value. Parameters ---------- geometries : Series of pygeos geometries, indexed by HUC12 / marine block extract_func : function that extracts results for each geometry outfilename : str progress_label : str """ if bounds is not None: # select only those areas that overlap input area tree = pg.STRtree(geometries) ix = tree.query(pg.box(*bounds)) geometries = geometries.iloc[ix].copy() if not len(geometries): return index = [] results = [] for ix, geometry in Bar(progress_label, max=len(geometries)).iter(geometries.iteritems()): zone_results = extract_func([to_dict(geometry)], bounds=pg.total_bounds(geometry), **kwargs) if zone_results is None: continue index.append(ix) results.append(zone_results) if not len(results): return df = pd.DataFrame(results, index=index) results = df[["shape_mask"]].copy() results.index.name = "id" avg_cols = [c for c in df.columns if c.endswith("_avg")] # each column is an array of counts for each for col in df.columns.difference(["shape_mask"] + avg_cols): s = df[col].apply(pd.Series).fillna(0) s.columns = [f"{col}_{c}" for c in s.columns] results = results.join(s) if len(avg_cols) > 0: results = results.join(df[avg_cols]).round() results.reset_index().to_feather(outfilename)
def sjoin_points_to_poly(point_df, poly_df): """Perfom a spatial join between left and right, then remove duplicate entries for right by left (e.g., feature in left overlaps multiple features in right). Returns the first spatial join for each point. Parameters ---------- point_df : GeoDataFrame poly_df : GeoDataFrame Returns ------- GeoDataFrame all columns of left plus all non-geometry columns from right """ if len(point_df) > len(poly_df): tree = pg.STRtree(point_df.geometry.values.data) poly_ix, pt_ix = tree.query_bulk(poly_df.geometry.values.data, predicate="intersects") else: tree = pg.STRtree(poly_df.geometry.values.data) pt_ix, poly_ix = tree.query_bulk(point_df.geometry.values.data, predicate="intersects") # reduce to unique poly per pt j = pd.DataFrame( {"index_right": poly_df.index.values.take(poly_ix)}, index=point_df.index.values.take(pt_ix), ) grouped = j.groupby(level=0) if grouped.size().max() > 1: print( "WARNING: multiple target areas returned in spatial join for a single point" ) j = grouped.first() return (point_df.join(j.index_right, how="left").join( poly_df.drop(columns=["geometry"]), on="index_right").drop(columns=["index_right"]))
def _overlay(a, b, return_indices=False): """ Compute geometries from overlaying a onto b """ tree = pygeos.STRtree(a) bix, aix = tree.query_bulk(b) overlay = pygeos.intersection(a[aix], b[bix]) if return_indices: return aix, bix, overlay return overlay
def nearest(geom, gdf): """Find the element of a GeoDataFrame nearest a shapely geometry """ sindex = pygeos.STRtree(gdf['geometry']) matches_idx = sindex.query(geom) #pygeos.measurement.bounds(geom) #matches_idx = gdf.sindex.nearest(geom.bounds) nearest_geom = min( [gdf.iloc[match_idx] for match_idx in matches_idx], key=lambda match: pygeos.measurement.distance(match.geometry, geom)) return nearest_geom
def sjoin_geometry(left, right, predicate="intersects", how="inner"): """Use pygeos to do a spatial join between 2 series or ndarrays of geometries. Parameters ---------- left : Series or ndarray left geometries, will form basis of index that is returned right : Series or ndarray right geometries, their indices will be returned where thy meet predicate predicate : str, optional (default: "intersects") name of pygeos predicate function (any of the pygeos predicates should work: intersects, contains, within, overlaps, crosses) how : str, optional (default: "inner") one of "inner" or "left"; "right" is not supported at this time. Returns ------- Series indexed on index of left, containing values of right index """ if not how in ("inner", "left"): raise NotImplementedError("Other join types not implemented") if isinstance(left, pd.Series): left_values = left.values left_index = left.index else: left_values = left left_index = np.arange(0, len(left)) if isinstance(right, pd.Series): right_values = right.values right_index = right.index else: right_values = right right_index = np.arange(0, len(right)) tree = pg.STRtree(right_values) # hits are in 0-based indicates of right hits = tree.query_bulk(left_values, predicate=predicate) if how == "inner": index = left_index[hits[0]] values = right_index[hits[1]] elif how == "left": index = left_index.copy() values = np.empty(shape=index.shape) values.fill(np.nan) values[hits[0]] = right_index[hits[1]] return pd.Series(values, index=index, name="index_right")
def __init__(self, geometry): # set empty geometries to None to avoid segfault on GEOS <= 3.6 # see: # https://github.com/pygeos/pygeos/issues/146 # https://github.com/pygeos/pygeos/issues/147 non_empty = geometry.copy() non_empty[pygeos.is_empty(non_empty)] = None # set empty geometries to None to maintain indexing self._tree = pygeos.STRtree(non_empty) # store geometries, including empty geometries for user access self.geometries = geometry.copy()
def setup(self): # create irregular polygons my merging overlapping point buffers self.polygons = pygeos.get_parts( pygeos.union_all( pygeos.buffer(pygeos.points(np.random.random((2000, 2)) * 500), 5))) self.tree = pygeos.STRtree(self.polygons) # initialize the tree by making a tiny query first self.tree.query(pygeos.points(0, 0)) # create points that extend beyond the domain of the above polygons to ensure # some don't overlap self.points = pygeos.points((np.random.random((2000, 2)) * 750) - 125) self.point_tree = pygeos.STRtree( pygeos.points(np.random.random((2000, 2)) * 750)) self.point_tree.query(pygeos.points(0, 0)) # create points on a grid for testing equidistant nearest neighbors # creates 2025 points grid_coords = np.mgrid[:45, :45].T.reshape(-1, 2) self.grid_point_tree = pygeos.STRtree(pygeos.points(grid_coords)) self.grid_points = pygeos.points(grid_coords + 0.5)
def split_edges_at_nodes_pyg(network, tolerance=1e-9): """Split network edges where they intersect node geometries """ #already initiate the spatial index, so we dont have to do that every time sindex = pygeos.STRtree(network.nodes['geometry']) grab_all_edges = [] for edge in tqdm(network.edges.itertuples(index=False), desc="split", total=len(network.edges)): hits = nodes_intersecting_pyg(edge.geometry, network.nodes['geometry'], sindex, tolerance=1e-9) if len(hits) < 3: grab_all_edges.append([[edge.osm_id], [edge.geometry], [edge.highway]]) continue # get points and geometry as list of coordinates split_points = pygeos.coordinates.get_coordinates( pygeos.snap(hits, edge.geometry, tolerance=1e-9)) coor_geom = pygeos.coordinates.get_coordinates(edge.geometry) # potentially split to multiple edges split_locs = np.argwhere(np.isin(coor_geom, split_points).all(axis=1))[:, 0] split_locs = list(zip(split_locs.tolist(), split_locs.tolist()[1:])) new_edges = [ coor_geom[split_loc[0]:split_loc[1] + 1] for split_loc in split_locs ] grab_all_edges.append( [[edge.osm_id] * len(new_edges), [pygeos.linestrings(edge) for edge in new_edges], [edge.infra_type] * len(new_edges)]) # combine all new edges edges = pd.DataFrame([ item for sublist in [list(zip(x[0], x[1], x[2])) for x in grab_all_edges] for item in sublist ], columns=['osm_id', 'geometry', 'infra_type']) # return new network with split edges return Network(nodes=network.nodes, edges=edges)
def remove_singular_segments(segments, image_stack, image_transform, t_shape=None, bandnames_mean=["rVH_mean", "rVV_mean", "fVH_mean", "fVV_mean"], bandnames_stdev=["rVV_std", "rVH_std", "fVV_std", "fVH_std"]): """Remove 1-pixel segments from GeoDataFrame Inputs: segments: geopandas GeoDataFrame GeoDataFrame to remove 1-pixel segments from. image_stack: nd array Image to retriev properties from. image_transform: Affine Transformation from pixel to geographic coordinates. t_shape: int, float or None (default=None) If not None, maximum Perimeter/sqrt(Area) to be considered for merger. bandnames_mean: list (default=["rVH_mean", "rVV_mean", "fVH_mean", "fVV_mean"]) Column names to store image band means (same order as bands in image_stack). bandnames_stdev: list (default=["rVV_std", "rVH_std", "fVV_std", "fVH_std"]) Column names to store image band st. devs. (same order as bands in image_stack). Ouputs: segments: geopandas GeoDataFrame GeoDataFrame without 1-pixel segments. """ segments["removed"] = [False] * len(segments) segments.reset_index(drop=True, inplace=True) singular_segments = segments[segments["area"] == 1] singular_segments_index = list(singular_segments.index) print("Found {} 1-pixel segments".format(len(singular_segments))) segments_sindex = pygeos.STRtree(segments.geometry.values.data) left, right = segments_sindex.query_bulk(singular_segments.geometry.values.data, predicate="touches") for seg_index, seg in singular_segments.iterrows(): possible_neighbours = segments.iloc[right[left == singular_segments_index.index(seg_index)]] possible_neighbours = possible_neighbours[~possible_neighbours["removed"]] # remove removed segments from selection possible_neighbours = possible_neighbours[possible_neighbours["area"] > 1] # remove 1-pixel segments from selection neighbours = possible_neighbours[[type(el) is not shapely.geometry.Point for el in possible_neighbours.geometry.buffer(0).intersection(seg.geometry.buffer(0))]] if len(neighbours) == 0: # only look at 2-connectivity if no neighbours in 1-connectivity (should not happen) print("No neighbours found for segment {}. Swithching to 2-connectivity".format(seg_index)) neighbours = possible_neighbours diff = abs(neighbours[bandnames_mean]- seg[bandnames_mean]) # difference of means current segment and neighbours most_similar_segment = neighbours[np.sum(np.array(diff), axis=1) == np.min(np.sum(np.array(diff), axis=1))].iloc[0] seg_updated, merged = merge_similar_segments(most_similar_segment.copy(), seg.copy(), image_stack, image_transform, bandnames_mean, bandnames_stdev, t_shape) segments.loc[most_similar_segment.name] = seg_updated segments.loc[seg_index, "removed"] = True return segments[segments["removed"] == False].drop("removed", axis=1)
def summarize_by_huc12(geometries): """Summarize by HUC12 Parameters ---------- geometries : Series of pygeos geometries, indexed by HUC12 id """ # find the indexes of the geometries that overlap with SLR bounds; these are the only # ones that need to be analyzed for SLR impacts slr_bounds = gp.read_feather(slr_bounds_filename).geometry tree = pg.STRtree(geometries) ix = tree.query(slr_bounds.geometry.values.data[0], predicate="intersects") geometries = geometries.iloc[ix].copy() if not len(geometries): return results = [] index = [] for huc12, geometry in Bar( "Calculating SLR counts for HUC12", max=len(geometries) ).iter(geometries.iteritems()): zone_results = extract_by_geometry( [to_dict(geometry)], bounds=pg.total_bounds(geometry) ) if zone_results is None: continue index.append(huc12) results.append(zone_results) df = pd.DataFrame(results, index=index) # reorder columns df = df[["shape_mask"] + list(df.columns.difference(["shape_mask"]))] # extract only areas that actually had SLR pixels df = df[df[df.columns[1:]].sum(axis=1) > 0] df.columns = [str(c) for c in df.columns] df = df.reset_index().rename(columns={"index": "id"}).round() df.to_feather(results_filename)
def tile_segments(segments, tile_size=(5000,5000)): """Split up segments in subsets Inputs: segments: geopandas GeoDataFrame Segments to split up. tile_size: tuple Size of subset footprints (geographic coordinates). Ouputs: tiled_segments: list List of segments subsets. """ bounds = segments.total_bounds nct, nrt = np.ceil((bounds[2:] - bounds[:2]) / tile_size).astype('int') # Calculate tile bboxes tile_numbers = np.arange(nrt*nct) tile_ir = (np.floor(tile_numbers / nct)).astype(int) tile_ic = tile_numbers % nct xmin = bounds[0] + tile_ic * tile_size[1] ymax = bounds[3] - tile_ir * tile_size[0] xmax = xmin + tile_size[0] ymin = ymax - tile_size[1] tile_boxes = pygeos.creation.box(xmin, ymin, xmax, ymax) # Spatial query on segments segments_sindex = pygeos.STRtree(segments.geometry.values.data) left, right = segments_sindex.query_bulk(tile_boxes, predicate="intersects") # Divide segments amongst tiles segments["taken"] = [False] * len(segments) tiled_segments = [] for tile_i in tile_numbers: segments_subset = segments.iloc[right[left == tile_i]].copy() segments_subset.drop(segments_subset[segments_subset["taken"]].index, inplace=True) segments.loc[segments_subset.index, "taken"] = True segments_subset.reset_index(drop=True, inplace=True) tiled_segments.append(segments_subset.drop("taken", axis=1)) # Return return tiled_segments
def occult(lines: LineCollection, tolerance: float) -> LineCollection: """ Remove occulted lines. The order of the geometries in 'lines' matters, see example below. 'tolerance' controls the distance tolerance between the first and last points of a geometry to consider it closed. Examples: $ vpype line 0 0 5 5 rect 2 2 1 1 occult show # line is occulted by rect $ vpype rect 2 2 1 1 line 0 0 5 5 occult show # line is NOT occulted by rect, as the line is drawn after the rectangle. """ line_arr = np.array( [pygeos.linestrings(list(zip(line.real, line.imag))) for line in lines] ) for i, line in enumerate(line_arr): coords = pygeos.get_coordinates(line) if math.hypot(coords[-1, 0] - coords[0, 0], coords[-1, 1] - coords[0, 1]) < tolerance: tree = pygeos.STRtree(line_arr[:i]) p = pygeos.polygons(coords) geom_idx = tree.query(p, predicate="intersects") line_arr[geom_idx] = pygeos.set_operations.difference(line_arr[geom_idx], p) new_lines = LineCollection() for geom in line_arr: for i in range(pygeos.get_num_geometries(geom)): coords = pygeos.get_coordinates(pygeos.get_geometry(geom, i)) new_lines.append(coords[:, 0] + coords[:, 1] * 1j) return new_lines
def time_tree_create(self): tree = pygeos.STRtree(self.polygons) tree.query(pygeos.points(0, 0))
factor=MASK_FACTOR, ignore_zero=False, ) ### Process freshwater resilience by watershed print("Processing freshwater resilience") bnd = get_input_area_boundary("nn") df = read_dataframe( src_dir / "indicators/Freshwater_Resilience/FW_resilience_highesthigh_watersheds.shp", columns=["RES_CLASS"], ).to_crs(DATA_CRS) tree = pg.STRtree(df.geometry.values.data) ix = tree.query(bnd, predicate="intersects") df = df.iloc[ix].copy() # remap values so that they are 1-4; 0 is fill value, 255 nodata df["value"] = df.RES_CLASS.map({ "Complex: Highest Relative Resilience": 4, "Complex: High Relative Resilience": 3, "Non-Complex: Highest Relative Score": 2, "Non-Complex: High Relative Score": 2, }) # dissolve by value df = dissolve(explode(df), by=["value"]) # rasterize
def test_init_increases_refcount(): arr = np.array([point]) with assert_increases_refcount(point): _ = pygeos.STRtree(arr)
### Associate with waterbody drain points print("Joining to waterbody drain points...") join_start = time() drains = gp.read_feather( clean_dir / huc2 / "waterbody_drain_points.feather", columns=["wbID", "drainID", "lineID", "geometry"], ).set_index("drainID") # find any waterbody drain points within MAX_DRAIN_DISTANCE of dam polygons # Find the nearest dam polygon for each drain, within MAX_DRAIN_DISTANCE # We do it this way because the dam may intersect or affect multiple drain points # so we can't always take the first or nearest from the dam's perspective tmp_dams = dams.groupby("damID").geometry.first() tree = pg.STRtree(tmp_dams.values.data) drain_ix, dam_ix = tree.nearest_all( drains.geometry.values.data, max_distance=MAX_DRAIN_DISTANCE ) near_drains = pd.DataFrame( {"drainID": drains.index.values.take(drain_ix),}, index=pd.Series(tmp_dams.index.values.take(dam_ix), name="damID"), ).join(drains, on="drainID") # near_drains = pd.DataFrame( # { # "damID" # "drainID": drains.index.values.take(drain_ix), # }, # index=pd.Series(dams.index.values.take(dam_ix), name="damPtID"), # ).join(drains, on="drainID")
def test_del_decreases_refcount(): arr = np.array([point]) tree = pygeos.STRtree(arr) with assert_decreases_refcount(point): del tree
def verify(g, shp, thorough=False): #--------------------------------------------------------------------- logger.info(' Verify grid against coastline\n') #--------------------------------------------------------------------- lon_min = g.Dataset.SCHISM_hgrid_node_x.values.min() lon_max = g.Dataset.SCHISM_hgrid_node_x.values.max() lat_min = g.Dataset.SCHISM_hgrid_node_y.values.min() lat_max = g.Dataset.SCHISM_hgrid_node_y.values.max() c = shp.cx[lon_min:lon_max, lat_min:lat_max] # ## Test polygons d = g.Dataset x = d.SCHISM_hgrid_node_x.values y = d.SCHISM_hgrid_node_y.values tri = d.SCHISM_hgrid_face_nodes.values nodes = pd.DataFrame({'lon': x, 'lat': y}) elems = pd.DataFrame(tri, columns=['a', 'b', 'c']) bnodes = g.Dataset[['node', 'id', 'type']].to_dataframe() # ### Find the invalid nodes (that cross the coasts) cos = pygeos.from_shapely(c.geometry) cos_ = pygeos.set_operations.union_all(cos) gps = pygeos.points(list(nodes.values)) gtree = pygeos.STRtree(gps) invs = gtree.query(cos_, predicate='contains').tolist() #--------------------------------------------------------------------- logger.info('Number of nodes within the coastlines {}\n'.format(len(invs))) #--------------------------------------------------------------------- nps = len(invs) nels = 1 if thorough: # ### Find invalid elements (that cross land) # cells to polygons ap = nodes.loc[elems.a] bp = nodes.loc[elems.b] cp = nodes.loc[elems.c] elems['ap'] = ap.values.tolist() elems['bp'] = bp.values.tolist() elems['cp'] = cp.values.tolist() n = 2 al = elems.ap + elems.bp + elems.cp + elems.ap coords = [[l[i:i + n] for i in range(0, len(l), n)] for l in al] elems['coordinates'] = coords jig = pygeos.polygons(coords) jtree = pygeos.STRtree(jig) jig_ = pygeos.set_operations.union_all(jig) cross = pygeos.set_operations.intersection(jig_, cos_) # #### convert to dataframe fd = pd.DataFrame({'overlap': pygeos.to_wkt(cross)}, index=[0]) fd['overlap'] = fd['overlap'].apply(shapely.wkt.loads) gover = gp.GeoDataFrame(fd, geometry='overlap') # #### Reject small injuctions ipols = gover.explode().loc[0] ipols.columns = ['geometry'] mask = ipols.area.values == 0. ipols = ipols[~mask].reset_index(drop=True) ipols = gp.GeoDataFrame(ipols) #--------------------------------------------------------------------- logger.info( 'Number of elements intersecting the coastlines {}\n'.format( ipols.shape[0])) #--------------------------------------------------------------------- nels = ipols.shape[0] if nps == 0 and nels == 0: #--------------------------------------------------------------------- logger.info('Grid is verified against the coastline') #--------------------------------------------------------------------- return True elif nps == 0: #--------------------------------------------------------------------- logger.info('Grid is node verified against the coastline') #--------------------------------------------------------------------- return True else: #--------------------------------------------------------------------- logger.warning('Grid is not verified against the coastline') #--------------------------------------------------------------------- return False
def test_len(): arr = np.array([point, None, point]) tree = pygeos.STRtree(arr) assert len(tree) == 2
def test_geometries_property(): arr = np.array([point]) tree = pygeos.STRtree(arr) assert arr is tree.geometries
# note: drop any from NABD that have duplicate NIDID nabd = ( read_dataframe(src_dir / "NABD_V2_beta/NABD_V2_beta.shp", columns=["NIDID"]) .dropna(subset=["NIDID"]) .drop_duplicates(subset=["NIDID"], keep=False) .to_crs(CRS) .set_index("NIDID") ) ### Select within outer HUC4s huc4 = gp.read_feather(boundaries_dir / "outer_huc4.feather") # select out NID within outer HUC4s tree = pg.STRtree(nid.geometry.values.data) left, right = tree.query_bulk(huc4.geometry.values.data, predicate="intersects") ix = np.unique(right) nid = nid.iloc[ix].copy() # select out prev within outer HUC4s tree = pg.STRtree(prev.geometry.values.data) left, right = tree.query_bulk(huc4.geometry.values.data, predicate="intersects") ix = np.unique(right) prev = prev.iloc[ix].copy() # select out prev NID within outer HUC4s tree = pg.STRtree(prev_nid.geometry.values.data) left, right = tree.query_bulk(huc4.geometry.values.data, predicate="intersects") ix = np.unique(right) prev_nid = prev_nid.iloc[ix].copy()
def union_or_combine(geometries, grid_size=None, op="union"): """First does a check for overlap of geometries according to STRtree intersects. If any overlap, then will use union_all on all of them; otherwise will return as a multipolygon. If only one polygon is present, it will be returned in a MultiPolygon. If coverage_union op is provided, geometries must be polygons and topologically related or this will produce bad output or fail outright. See docs for coverage_union in GEOS. Parameters ---------- geometries : ndarray of single part polygons grid_size : [type], optional (default: None) provided to union_all; otherwise no effect op : str, one of {'union', 'coverage_union'} Returns ------- MultiPolygon """ if not (pg.get_type_id(geometries) == 3).all(): print("Inputs to union or combine must be single-part geometries") if len(geometries) == 1: return pg.multipolygons(geometries) tree = pg.STRtree(geometries) left, right = tree.query_bulk(geometries, predicate="intersects") # drop self intersections ix = left != right left = left[ix] right = right[ix] # no intersections, just combine parts if len(left) == 0: return pg.multipolygons(geometries) # find groups of contiguous geometries and union them together individually contiguous = np.sort(np.unique(np.concatenate([left, right]))) discontiguous = np.setdiff1d(np.arange(len(geometries), dtype="uint"), contiguous) groups = find_adjacent_groups(left, right) parts = [] if op == "coverage_union": for group in groups: parts.extend( pg.get_parts(pg.coverage_union_all(geometries[list(group)]))) else: for group in groups: parts.extend( pg.get_parts( pg.union_all(geometries[list(group)], grid_size=grid_size))) parts.extend(pg.get_parts(geometries[discontiguous])) return pg.multipolygons(parts)