def sel_sites_poly(pts, poly, buffer_dis=0): """ Simple function to select points within a single polygon. Optional buffer. Parameters ---------- pts: GeoDataFrame or str A GeoDataFrame of points with the site names as the index. Or a shapefile with the first column as the site names. poly: GeoDataFrame or str A GeoDataFrame of polygons with the site names as the index. Or a shapefile with the first column as the site names. buffer_dis: int Distance in coordinate system units for a buffer around the polygon. Returns ------- GeoDataFrame Of points. """ #### Read in data gdf_pts = load_geo_data(pts) gdf_poly = load_geo_data(poly) #### Perform vector operations for initial processing ## Dissolve polygons by id poly2 = gdf_poly.unary_union ## Create buffer poly_buff = poly2.buffer(buffer_dis) ## Select only the vcn sites within the buffer points2 = gdf_pts[gdf_pts.within(poly_buff)] return points2
def closest_line_to_pts(pts, lines, line_site_col, max_distance=1000): """ Function to determine the line closest to each point. Inputs must be GeoDataframes. Parameters ---------- pts: GeoDataFrame The points input. lines: GeoDataFrame The lines input. line_site_col: str The site column from the 'lines' that should be retained at the output. buffer_dis: int The max distance from each point to search for a line. Try to use the shortest buffer_dis that will cover all of your points as a larger buffer_dis will significantly slow down the operation. Returns ------- GeoDataFrame """ ## Load data gdf_pts = load_geo_data(pts) gdf_lines = load_geo_data(lines) ## Process data pts_line_seg = gpd.GeoDataFrame() for i in gdf_pts.index: pts_seg = gdf_pts.loc[[i]] dis = 50 while dis < max_distance: bound = pts_seg.buffer(dis).unary_union lines1 = gdf_lines[gdf_lines.intersects(bound)] if lines1.empty: dis = dis + 50 else: break if lines1.empty: continue near1 = lines1.distance(gdf_pts.geometry[i]).idxmin() line_seg1 = lines1.loc[near1, line_site_col] pts_seg[line_site_col] = line_seg1 pts_line_seg = pd.concat([pts_line_seg, pts_seg]) # print(i) ### Determine points that did not find a line mis_pts = gdf_pts.loc[~gdf_pts.index.isin(pts_line_seg.index)] if not mis_pts.empty: print(mis_pts) print('Did not find a line segment for these sites') return pts_line_seg
def extract_catch(reaches, rec_catch, segment_id_col='nzsegment'): """ Function to extract the catchment polygons from the rec catchments layer. Appends to reaches layer. Parameters ---------- reaches : DataFrame The output DataFrame from the find_upstream function. rec_catch_shp : str path, dict, or GeoDataFrame str path to the REC catchment shapefile or a GeoDataFrame. segment_id_col : str The column name of the line segment id. Returns ------- GeoDataFrame """ sites = reaches[segment_id_col].unique().astype('int32') catch0 = load_geo_data(rec_catch) catch1 = catch0[catch0[segment_id_col].isin(sites)].copy() catch2 = catch1.dissolve(segment_id_col).reset_index()[[ segment_id_col, 'geometry' ]] ### Combine with original sites catch3 = catch2.merge(reaches.reset_index(), on=segment_id_col) catch3.crs = catch0.crs return catch3
def pts_poly_join(pts, poly, poly_id_col): """ Simple function to join the attributes of the polygon to the points. Specifically for an ID field in the polygon. Parameters ---------- pts: GeoDataFrame A GeoDataFrame of points with the site names as the index. poly: GeoDataFrame A GeoDataFrame of polygons with the site names as the index. poly_id_col: str or list of str The names of the columns to join. Returns ------- GeoDataFrame """ #### Read in data gdf_pts = load_geo_data(pts) gdf_poly = load_geo_data(poly) if isinstance(poly_id_col, str): poly_id_col = [poly_id_col] cols = poly_id_col.copy() cols.extend(['geometry']) poly2 = gdf_poly[cols].copy() poly3 = poly2.dissolve(poly_id_col).reset_index() join1 = sjoin(gdf_pts.copy(), poly3.copy(), how='inner', op='within') cols = set(gdf_pts.columns) cols.update(set(poly3.columns)) join1.drop([i for i in join1.columns if i not in cols], axis=1, inplace=True) return join1, poly3
def find_upstream(nzreach, rec_streams, segment_id_col='nzsegment', from_node_col='FROM_NODE', to_node_col='TO_NODE'): """ Function to estimate all of the reaches (and nodes) upstream of specific reaches. Parameters ---------- nzreach : list, ndarray, Series of int The NZ reach IDs rec_streams_shp : str path or GeoDataFrame str path to the REC streams shapefile or the equivelant GeoDataFrame. segment_id_col : str The column name of the line segment id. from_node_col : str The from node column to_node_col : str The to node column Returns ------- DataFrame """ if not isinstance(nzreach, (list, np.ndarray, pd.Series)): raise TypeError('nzreach must be a list, ndarray or Series.') ### Load data rec = load_geo_data(rec_streams).drop('geometry', axis=1).copy() ### Run through all nzreaches reaches_lst = [] for i in nzreach: reach1 = rec[rec[segment_id_col] == i].copy() up1 = rec[rec[to_node_col].isin(reach1[from_node_col])] while not up1.empty: reach1 = pd.concat([reach1, up1]) up1 = rec[rec[to_node_col].isin(up1[from_node_col])] reach1.loc[:, 'start'] = i reaches_lst.append(reach1) reaches = pd.concat(reaches_lst) reaches.set_index('start', inplace=True) return reaches
def find_upstream(nzreach, rec_streams_shp): """ Function to estimate all of the reaches (and nodes) upstream of specific reaches. Parameters ---------- nzreach : list, ndarray, Series of int The NZ reach IDs rec_streams_shp : str path or GeoDataFrame str path to the REC streams shapefile or the equivelant GeoDataFrame. Returns ------- DataFrame """ if not isinstance(nzreach, (list, np.ndarray, pd.Series)): raise TypeError('nzreach must be a list, ndarray or Series.') ### Parameters # server = 'SQL2012PROD05' # db = 'GIS' # table = 'MFE_NZTM_REC' # cols = ['NZREACH', 'NZFNODE', 'NZTNODE'] # # ### Load data rec = load_geo_data(rec_streams_shp).drop('geometry', axis=1) ### Run through all nzreaches reaches_lst = [] for i in nzreach: reach1 = rec[rec.NZREACH == i] up1 = rec[rec.NZTNODE.isin(reach1.NZFNODE)] while not up1.empty: reach1 = pd.concat([reach1, up1]) up1 = rec[rec.NZTNODE.isin(up1.NZFNODE)] reach1.loc[:, 'start'] = i reaches_lst.append(reach1) reaches = pd.concat(reaches_lst) reaches.set_index('start', inplace=True) return reaches
def extract_catch(reaches, rec_catch_shp): """ Function to extract the catchment polygons from the rec catchments layer. Appends to reaches layer. Parameters ---------- reaches : DataFrame The output DataFrame from the find_upstream function. rec_catch_shp : str path, dict, or GeoDataFrame str path to the REC catchment shapefile, dict of pdsql.mssql.rd_sql parameters, or the equivelant GeoDataFrame. Returns ------- GeoDataFrame """ ### Parameters # server = 'SQL2012PROD05' # db = 'GIS' # table = 'MFE_NZTM_RECWATERSHEDCANTERBURY' # cols = ['NZREACH'] # sites = reaches.NZREACH.unique().astype('int32').tolist() # # ### Extract reaches from SQL # catch1 = rd_sql(server, db, table, cols, where_col='NZREACH', where_val=sites, geo_col=True) # catch2 = catch1.dissolve('NZREACH') catch0 = load_geo_data(rec_catch_shp) catch1 = catch0[catch0.NZREACH.isin(sites)] catch2 = catch1.dissolve('NZREACH').reset_index()[['NZREACH', 'geometry']] ### Combine with original sites catch3 = catch2.merge(reaches.reset_index(), on='NZREACH') catch3.crs = catch0.crs return catch3
rec_catch_shp = 'rec_catch_pareora' catch_shp = 'catchment_pareora' sites_shp_path = get_path(sites_shp) rec_streams_shp_path = get_path(rec_streams_shp) rec_catch_shp_path = get_path(rec_catch_shp) catch_shp_path = get_path(catch_shp) sites_col_name = 'SITENUMBER' poly_col_name = 'Catchmen_1' line_site_col = 'NZREACH' ####################################### ### Examples pts = util.load_geo_data(sites_shp_path) pts['geometry'] = pts.geometry.simplify(1) ## Selecting points from within a polygon pts1 = vector.sel_sites_poly(sites_shp_path, rec_catch_shp_path, buffer_dis=10) ## Spatial join attributes of polygons to points pts2, poly2 = vector.pts_poly_join(sites_shp_path, catch_shp_path, poly_col_name) ## Create a GeoDataFrame from x and y data pts_df = pts[[sites_col_name, 'geometry']].copy() pts_df['x'] = pts_df.geometry.x pts_df['y'] = pts_df.geometry.y pts_df.drop('geometry', axis=1, inplace=True) pts3 = vector.xy_to_gpd(sites_col_name, 'x', 'y', pts_df)
#id_col = 'Management' #id_col = 'SITENUMBER' #gdf_from = pts.copy() #max_distance=500 #waterway_name=True # #n0 = ways1[0]['nodes'] #n1 = 1213916012 # #osm_nodes_from = res1.copy() ####################################### ### Tests gdf_from = util.load_geo_data(sites_shp_path) gdf_from['geometry'] = gdf_from.geometry.simplify(1) def test_get_nearest(): pts1, no_node_ids = osm.get_nearest_waterways(gdf_from, id_col) assert (len(pts1) == 2) & isinstance(pts1, gpd.GeoDataFrame) pts1, no_node_ids = osm.get_nearest_waterways(gdf_from, id_col) def test_get_waterways(): waterways, nodes = osm.get_waterways(pts1)
def catch_delineate(sites, rec_streams, rec_catch, segment_id_col='nzsegment', from_node_col='FROM_NODE', to_node_col='TO_NODE', ignore_order=1, stream_order_col='StreamOrde', max_distance=np.inf, site_delineate='all', returns='catch'): """ Catchment delineation using the REC streams and catchments. Parameters ---------- sites : str path or GeoDataFrame Points shapfile of the sites along the streams or the equivelant GeoDataFrame. rec_streams : str path or GeoDataFrame str path to the REC streams shapefile, the equivelant GeoDataFrame, or a dict of parameters to read in an mssql table using the rd_sql function. rec_catch : str path or GeoDataFrame str path to the REC catchment shapefile, the equivelant GeoDataFrame, or a dict of parameters to read in an mssql table using the rd_sql function. segment_id_col : str The column name of the line segment id. from_node_col : str The from node column to_node_col : str The to node column ignore_order : int Ignore the stream orders in the search up to this int. stream_order_col : str The stream order column. max_distance : non-negative float, optional Return only neighbors within this distance. This is used to prune tree searches, so if you are doing a series of nearest-neighbor queries, it may help to supply the distance to the nearest neighbor of the most recent point. It's best to define a reasonable distance for the search. site_delineate : 'all' or 'between' Whether the catchments should be dileated all the way to the top or only in between the sites. returns : 'catch' or 'all' Return only the catchment polygons or the catchments, reaches, and sites Returns ------- GeoDataFrame Polygons """ ### Parameters ### Modifications {segment_id_col: {NZTNODE/NZFNODE: node # to change}} # mods = {13053151: {segment_id_col: 13055874}, 13048353: {'NZTNODE': 13048851}, 13048498: {'NZTNODE': 13048851}, 13048490: {'ORDER': 3}} ### Load data rec_catch = load_geo_data(rec_catch) rec_streams = load_geo_data(rec_streams) pts = load_geo_data(sites) pts['geometry'] = pts.geometry.simplify(1) ### make mods # for i in mods: # rec_streams.loc[rec_streams['segment_id_col'] == i, list(mods[i].keys())] = list(mods[i].values()) ### Find closest REC segment to points if max_distance == np.inf: buffer_dis = 100000 else: buffer_dis = max_distance pts_extent = box(*pts.unary_union.buffer(buffer_dis).bounds) s_order = list(range(1, ignore_order + 1)) rec_streams2 = rec_streams[~rec_streams[stream_order_col].isin(s_order)] rec_pts2 = convert_lines_to_points(rec_streams2, segment_id_col, pts_extent) # rec_pts1 = rec_streams2[rec_streams2.intersects(pts_extent)].set_index(segment_id_col).copy() # coords = rec_pts1.geometry.apply(lambda x: list(x.coords)).explode() # geo1 = coords.apply(lambda x: Point(x)) # # rec_pts2 = gpd.GeoDataFrame(coords, geometry=geo1, crs=rec_pts1.crs).reset_index() pts_seg = kd_nearest(pts, rec_pts2, segment_id_col, max_distance=max_distance) pts_seg = pts_seg[pts_seg[segment_id_col].notnull()].copy() nzreach = pts_seg[segment_id_col].copy().unique() ### Find all upstream reaches reaches = find_upstream(nzreach, rec_streams=rec_streams, segment_id_col=segment_id_col, from_node_col=from_node_col, to_node_col=to_node_col) ### Clip reaches to in-between sites if required if site_delineate == 'between': reaches1 = reaches.reset_index().copy() reaches2 = reaches1.loc[ reaches1[segment_id_col].isin(reaches1.start.unique()), ['start', segment_id_col]] reaches2 = reaches2[reaches2.start != reaches2[segment_id_col]] grp1 = reaches2.groupby('start') for index, r in grp1: # print(index, r) r2 = reaches1[reaches1.start.isin( r[segment_id_col])][segment_id_col].unique() reaches1 = reaches1[~((reaches1.start == index) & (reaches1[segment_id_col].isin(r2)))] reaches = reaches1.set_index('start').copy() ### Extract associated catchments rec_catch2 = extract_catch(reaches, rec_catch=rec_catch, segment_id_col=segment_id_col) ### Aggregate individual catchments rec_shed = agg_catch(rec_catch2) rec_shed.columns = [segment_id_col, 'geometry', 'area'] rec_shed1 = rec_shed.merge(pts_seg.drop('geometry', axis=1), on=segment_id_col) ### Return if returns == 'catch': return rec_shed1 else: return rec_shed1, reaches, pts_seg
rec_catch_shp = 'rec_catch_pareora.shp' catch_shp = 'catchment_pareora.shp' sites_shp_path = os.path.join(data_dir, sites_shp) rec_streams_shp_path = os.path.join(data_dir, rec_streams_shp) rec_catch_shp_path = os.path.join(data_dir, rec_catch_shp) catch_shp_path = os.path.join(data_dir, catch_shp) sites_col_name = 'SITENUMBER' poly_col_name = 'Catchmen_1' line_site_col = 'NZREACH' ####################################### ### Tests pts = util.load_geo_data(sites_shp_path) pts['geometry'] = pts.geometry.simplify(1) rec_streams1 = util.load_geo_data(rec_streams_shp_path) rec_pts1 = rec_streams1.copy() rec_pts1['geometry'] = rec_streams1.centroid def test_sel_sites_poly(): pts1 = vector.sel_sites_poly(sites_shp_path, rec_catch_shp_path, buffer_dis=10) assert (len(pts1) == 2) & isinstance(pts1, gpd.GeoDataFrame) def test_pts_poly_join():
def catch_delineate(sites_shp, rec_streams_shp, rec_catch_shp, max_distance=np.inf): """ Catchment delineation using the REC streams and catchments. Parameters ---------- sites_shp : str path or GeoDataFrame Points shapfile of the sites along the streams or the equivelant GeoDataFrame. rec_streams_shp : str path, GeoDataFrame, or dict str path to the REC streams shapefile, the equivelant GeoDataFrame, or a dict of parameters to read in an mssql table using the rd_sql function. rec_catch_shp : str path, GeoDataFrame, or dict str path to the REC catchment shapefile, the equivelant GeoDataFrame, or a dict of parameters to read in an mssql table using the rd_sql function. sites_col : str The column name of the site numbers in the sites_shp. catch_output : str or None The output polygon shapefile path of the catchment delineation. Returns ------- GeoDataFrame Polygons """ ### Parameters ### Modifications {NZREACH: {NZTNODE/NZFNODE: node # to change}} mods = { 13053151: { 'NZTNODE': 13055874 }, 13048353: { 'NZTNODE': 13048851 }, 13048498: { 'NZTNODE': 13048851 } } ### Load data rec_catch = load_geo_data(rec_catch_shp) rec_streams = load_geo_data(rec_streams_shp) pts = load_geo_data(sites_shp) pts['geometry'] = pts.geometry.simplify(1) ### make mods for i in mods: rec_streams.loc[rec_streams['NZREACH'] == i, list(mods[i].keys())] = list(mods[i].values()) ### Find closest REC segment to points rec_pts1 = rec_streams.copy() rec_pts1['geometry'] = rec_streams.centroid pts_seg = kd_nearest(pts, rec_pts1, 'NZREACH', max_distance=max_distance) nzreach = pts_seg.copy().NZREACH.unique() ### Find all upstream reaches reaches = find_upstream(nzreach, rec_streams_shp=rec_streams) ### Extract associated catchments rec_catch = extract_catch(reaches, rec_catch_shp=rec_catch) ### Aggregate individual catchments rec_shed = agg_catch(rec_catch) rec_shed.columns = ['NZREACH', 'geometry', 'area'] rec_shed1 = rec_shed.merge(pts_seg.drop('geometry', axis=1), on='NZREACH') ### Return return rec_shed1