def process_waps(param): """ """ run_time_start = pd.Timestamp.today().strftime('%Y-%m-%d %H:%M:%S') print(run_time_start) ### Read in source data and update accela tables in ConsentsReporting db print('--Reading in source data...') ## Make object to contain the source data db = types.SimpleNamespace() for t in param['misc']['WapProcessing']['tables']: p = param['source data'][t] stmt = 'select * from "{table}"'.format(table=p['table']) setattr( db, t, sf.read_table(p['username'], p['password'], p['account'], p['database'], p['schema'], stmt)) # Spatial data gw_dict = param['source data']['gw_zones'] setattr( db, 'gw_zones', mssql.rd_sql(gw_dict['server'], gw_dict['database'], gw_dict['table'], gw_dict['col_names'], username=gw_dict['username'], password=gw_dict['password'], geo_col=True, rename_cols=gw_dict['rename_cols'])) sw_dict = param['source data']['sw_reaches'] setattr( db, 'sw_reaches', mssql.rd_sql(sw_dict['server'], sw_dict['database'], sw_dict['table'], sw_dict['col_names'], username=gw_dict['username'], password=gw_dict['password'], geo_col=True)) ################################################## ### Waps print('--Process Waps') sites1 = vector.xy_to_gpd('Wap', 'NzTmX', 'NzTmY', db.sites.drop('EffectiveFromDate', axis=1)) waps1 = sites1.merge(db.wap_sd.drop('EffectiveFromDate', axis=1), on='Wap') waps1.loc[waps1['SD1_7Day'].isnull(), 'SD1_7Day'] = 0 waps1.loc[waps1['SD1_30Day'].isnull(), 'SD1_30Day'] = 0 waps1.loc[waps1['SD1_150Day'].isnull(), 'SD1_150Day'] = 0 waps1[['SD1_7Day', 'SD1_30Day', 'SD1_150Day']] = waps1[['SD1_7Day', 'SD1_30Day', 'SD1_150Day']].round().astype(int) ## Aquifer tests aq1 = db.wap_aquifer_test.dropna(subset=['Storativity']).drop( 'EffectiveFromDate', axis=1).copy() aq2 = aq1.groupby('Wap')['Storativity'].mean().dropna().reset_index() aq2.Storativity = True waps2 = waps1.merge(aq2, on='Wap', how='left') waps2.loc[waps2.Storativity.isnull(), 'Storativity'] = False ## Add spaital info # GW gw_zones = db.gw_zones.copy() gw_zones.rename(columns={'SpatialUnitID': 'GwSpatialUnitId'}, inplace=True) waps3, poly1 = vector.pts_poly_join(waps2, gw_zones, 'GwSpatialUnitId') waps3.drop_duplicates('Wap', inplace=True) waps3['Combined'] = waps3.apply(lambda x: 'CWAZ' in x['GwSpatialUnitId'], axis=1) # SW sw1 = db.sw_reaches.copy() sw1.rename(columns={'SpatialUnitID': 'SwSpatialUnitId'}, inplace=True) lst1 = [] for index, row in sw1.iterrows(): for j in list(row['geometry'].coords): lst1.append([row['SwSpatialUnitId'], Point(j)]) df1 = pd.DataFrame(lst1, columns=['SwSpatialUnitId', 'geometry']) sw2 = gpd.GeoDataFrame(df1, geometry='geometry') waps3b = vector.kd_nearest(waps3, sw2, 'SwSpatialUnitId') ## prepare output waps3b['NzTmX'] = waps3b.geometry.x waps3b['NzTmY'] = waps3b.geometry.y waps4 = pd.DataFrame(waps3b.drop(['geometry'], axis=1)) waps4[['NzTmX', 'NzTmY']] = waps4[['NzTmX', 'NzTmY']].round().astype(int) waps4.rename(columns={ 'Name': 'SpatialUnitName', 'distance': 'DistanceToSw' }, inplace=True) ## Check for differences print('Save results') wap_dict = param['source data']['waps'] # old_stmt = 'select * from "{table}"'.format(table=wap_dict['table']) # old1 = sf.read_table(wap_dict['username'], wap_dict['password'], wap_dict['account'], wap_dict['database'], wap_dict['schema'], old_stmt).drop('EffectiveFromDate', axis=1) # # change1 = compare_dfs(old1, waps4, ['Wap']) # new1 = change1['new'] # diff1 = change1['diff'] ## Save data waps4['EffectiveFromDate'] = run_time_start sf.to_table(waps4, wap_dict['table'], wap_dict['username'], wap_dict['password'], wap_dict['account'], wap_dict['database'], wap_dict['schema'], True) return waps4
def test_kd_nearest(): line2 = vector.kd_nearest(pts, rec_pts1, line_site_col) assert (len(line2) == 2) & isinstance( line2, gpd.GeoDataFrame) & line2[line_site_col].notnull().all()
def catch_delineate(sites, rec_streams, rec_catch, segment_id_col='nzsegment', from_node_col='FROM_NODE', to_node_col='TO_NODE', ignore_order=1, stream_order_col='StreamOrde', max_distance=np.inf, site_delineate='all', returns='catch'): """ Catchment delineation using the REC streams and catchments. Parameters ---------- sites : str path or GeoDataFrame Points shapfile of the sites along the streams or the equivelant GeoDataFrame. rec_streams : str path or GeoDataFrame str path to the REC streams shapefile, the equivelant GeoDataFrame, or a dict of parameters to read in an mssql table using the rd_sql function. rec_catch : str path or GeoDataFrame str path to the REC catchment shapefile, the equivelant GeoDataFrame, or a dict of parameters to read in an mssql table using the rd_sql function. segment_id_col : str The column name of the line segment id. from_node_col : str The from node column to_node_col : str The to node column ignore_order : int Ignore the stream orders in the search up to this int. stream_order_col : str The stream order column. max_distance : non-negative float, optional Return only neighbors within this distance. This is used to prune tree searches, so if you are doing a series of nearest-neighbor queries, it may help to supply the distance to the nearest neighbor of the most recent point. It's best to define a reasonable distance for the search. site_delineate : 'all' or 'between' Whether the catchments should be dileated all the way to the top or only in between the sites. returns : 'catch' or 'all' Return only the catchment polygons or the catchments, reaches, and sites Returns ------- GeoDataFrame Polygons """ ### Parameters ### Modifications {segment_id_col: {NZTNODE/NZFNODE: node # to change}} # mods = {13053151: {segment_id_col: 13055874}, 13048353: {'NZTNODE': 13048851}, 13048498: {'NZTNODE': 13048851}, 13048490: {'ORDER': 3}} ### Load data rec_catch = load_geo_data(rec_catch) rec_streams = load_geo_data(rec_streams) pts = load_geo_data(sites) pts['geometry'] = pts.geometry.simplify(1) ### make mods # for i in mods: # rec_streams.loc[rec_streams['segment_id_col'] == i, list(mods[i].keys())] = list(mods[i].values()) ### Find closest REC segment to points if max_distance == np.inf: buffer_dis = 100000 else: buffer_dis = max_distance pts_extent = box(*pts.unary_union.buffer(buffer_dis).bounds) s_order = list(range(1, ignore_order + 1)) rec_streams2 = rec_streams[~rec_streams[stream_order_col].isin(s_order)] rec_pts2 = convert_lines_to_points(rec_streams2, segment_id_col, pts_extent) # rec_pts1 = rec_streams2[rec_streams2.intersects(pts_extent)].set_index(segment_id_col).copy() # coords = rec_pts1.geometry.apply(lambda x: list(x.coords)).explode() # geo1 = coords.apply(lambda x: Point(x)) # # rec_pts2 = gpd.GeoDataFrame(coords, geometry=geo1, crs=rec_pts1.crs).reset_index() pts_seg = kd_nearest(pts, rec_pts2, segment_id_col, max_distance=max_distance) pts_seg = pts_seg[pts_seg[segment_id_col].notnull()].copy() nzreach = pts_seg[segment_id_col].copy().unique() ### Find all upstream reaches reaches = find_upstream(nzreach, rec_streams=rec_streams, segment_id_col=segment_id_col, from_node_col=from_node_col, to_node_col=to_node_col) ### Clip reaches to in-between sites if required if site_delineate == 'between': reaches1 = reaches.reset_index().copy() reaches2 = reaches1.loc[ reaches1[segment_id_col].isin(reaches1.start.unique()), ['start', segment_id_col]] reaches2 = reaches2[reaches2.start != reaches2[segment_id_col]] grp1 = reaches2.groupby('start') for index, r in grp1: # print(index, r) r2 = reaches1[reaches1.start.isin( r[segment_id_col])][segment_id_col].unique() reaches1 = reaches1[~((reaches1.start == index) & (reaches1[segment_id_col].isin(r2)))] reaches = reaches1.set_index('start').copy() ### Extract associated catchments rec_catch2 = extract_catch(reaches, rec_catch=rec_catch, segment_id_col=segment_id_col) ### Aggregate individual catchments rec_shed = agg_catch(rec_catch2) rec_shed.columns = [segment_id_col, 'geometry', 'area'] rec_shed1 = rec_shed.merge(pts_seg.drop('geometry', axis=1), on=segment_id_col) ### Return if returns == 'catch': return rec_shed1 else: return rec_shed1, reaches, pts_seg
def catch_delineate(sites_shp, rec_streams_shp, rec_catch_shp, max_distance=np.inf): """ Catchment delineation using the REC streams and catchments. Parameters ---------- sites_shp : str path or GeoDataFrame Points shapfile of the sites along the streams or the equivelant GeoDataFrame. rec_streams_shp : str path, GeoDataFrame, or dict str path to the REC streams shapefile, the equivelant GeoDataFrame, or a dict of parameters to read in an mssql table using the rd_sql function. rec_catch_shp : str path, GeoDataFrame, or dict str path to the REC catchment shapefile, the equivelant GeoDataFrame, or a dict of parameters to read in an mssql table using the rd_sql function. sites_col : str The column name of the site numbers in the sites_shp. catch_output : str or None The output polygon shapefile path of the catchment delineation. Returns ------- GeoDataFrame Polygons """ ### Parameters ### Modifications {NZREACH: {NZTNODE/NZFNODE: node # to change}} mods = { 13053151: { 'NZTNODE': 13055874 }, 13048353: { 'NZTNODE': 13048851 }, 13048498: { 'NZTNODE': 13048851 } } ### Load data rec_catch = load_geo_data(rec_catch_shp) rec_streams = load_geo_data(rec_streams_shp) pts = load_geo_data(sites_shp) pts['geometry'] = pts.geometry.simplify(1) ### make mods for i in mods: rec_streams.loc[rec_streams['NZREACH'] == i, list(mods[i].keys())] = list(mods[i].values()) ### Find closest REC segment to points rec_pts1 = rec_streams.copy() rec_pts1['geometry'] = rec_streams.centroid pts_seg = kd_nearest(pts, rec_pts1, 'NZREACH', max_distance=max_distance) nzreach = pts_seg.copy().NZREACH.unique() ### Find all upstream reaches reaches = find_upstream(nzreach, rec_streams_shp=rec_streams) ### Extract associated catchments rec_catch = extract_catch(reaches, rec_catch_shp=rec_catch) ### Aggregate individual catchments rec_shed = agg_catch(rec_catch) rec_shed.columns = ['NZREACH', 'geometry', 'area'] rec_shed1 = rec_shed.merge(pts_seg.drop('geometry', axis=1), on='NZREACH') ### Return return rec_shed1