Esempio n. 1
0
def process_waps(param):
    """

    """
    run_time_start = pd.Timestamp.today().strftime('%Y-%m-%d %H:%M:%S')
    print(run_time_start)

    ### Read in source data and update accela tables in ConsentsReporting db
    print('--Reading in source data...')

    ## Make object to contain the source data
    db = types.SimpleNamespace()

    for t in param['misc']['WapProcessing']['tables']:
        p = param['source data'][t]
        stmt = 'select * from "{table}"'.format(table=p['table'])
        setattr(
            db, t,
            sf.read_table(p['username'], p['password'], p['account'],
                          p['database'], p['schema'], stmt))

    # Spatial data
    gw_dict = param['source data']['gw_zones']

    setattr(
        db, 'gw_zones',
        mssql.rd_sql(gw_dict['server'],
                     gw_dict['database'],
                     gw_dict['table'],
                     gw_dict['col_names'],
                     username=gw_dict['username'],
                     password=gw_dict['password'],
                     geo_col=True,
                     rename_cols=gw_dict['rename_cols']))

    sw_dict = param['source data']['sw_reaches']

    setattr(
        db, 'sw_reaches',
        mssql.rd_sql(sw_dict['server'],
                     sw_dict['database'],
                     sw_dict['table'],
                     sw_dict['col_names'],
                     username=gw_dict['username'],
                     password=gw_dict['password'],
                     geo_col=True))

    ##################################################
    ### Waps
    print('--Process Waps')

    sites1 = vector.xy_to_gpd('Wap', 'NzTmX', 'NzTmY',
                              db.sites.drop('EffectiveFromDate', axis=1))

    waps1 = sites1.merge(db.wap_sd.drop('EffectiveFromDate', axis=1), on='Wap')
    waps1.loc[waps1['SD1_7Day'].isnull(), 'SD1_7Day'] = 0
    waps1.loc[waps1['SD1_30Day'].isnull(), 'SD1_30Day'] = 0
    waps1.loc[waps1['SD1_150Day'].isnull(), 'SD1_150Day'] = 0
    waps1[['SD1_7Day', 'SD1_30Day',
           'SD1_150Day']] = waps1[['SD1_7Day', 'SD1_30Day',
                                   'SD1_150Day']].round().astype(int)

    ## Aquifer tests
    aq1 = db.wap_aquifer_test.dropna(subset=['Storativity']).drop(
        'EffectiveFromDate', axis=1).copy()
    aq2 = aq1.groupby('Wap')['Storativity'].mean().dropna().reset_index()
    aq2.Storativity = True

    waps2 = waps1.merge(aq2, on='Wap', how='left')
    waps2.loc[waps2.Storativity.isnull(), 'Storativity'] = False

    ## Add spaital info
    # GW
    gw_zones = db.gw_zones.copy()
    gw_zones.rename(columns={'SpatialUnitID': 'GwSpatialUnitId'}, inplace=True)

    waps3, poly1 = vector.pts_poly_join(waps2, gw_zones, 'GwSpatialUnitId')
    waps3.drop_duplicates('Wap', inplace=True)
    waps3['Combined'] = waps3.apply(lambda x: 'CWAZ' in x['GwSpatialUnitId'],
                                    axis=1)

    # SW
    sw1 = db.sw_reaches.copy()
    sw1.rename(columns={'SpatialUnitID': 'SwSpatialUnitId'}, inplace=True)

    lst1 = []
    for index, row in sw1.iterrows():
        for j in list(row['geometry'].coords):
            lst1.append([row['SwSpatialUnitId'], Point(j)])
    df1 = pd.DataFrame(lst1, columns=['SwSpatialUnitId', 'geometry'])
    sw2 = gpd.GeoDataFrame(df1, geometry='geometry')

    waps3b = vector.kd_nearest(waps3, sw2, 'SwSpatialUnitId')

    ## prepare output
    waps3b['NzTmX'] = waps3b.geometry.x
    waps3b['NzTmY'] = waps3b.geometry.y

    waps4 = pd.DataFrame(waps3b.drop(['geometry'], axis=1))
    waps4[['NzTmX', 'NzTmY']] = waps4[['NzTmX', 'NzTmY']].round().astype(int)
    waps4.rename(columns={
        'Name': 'SpatialUnitName',
        'distance': 'DistanceToSw'
    },
                 inplace=True)

    ## Check for differences
    print('Save results')
    wap_dict = param['source data']['waps']

    #    old_stmt = 'select * from "{table}"'.format(table=wap_dict['table'])
    #    old1 = sf.read_table(wap_dict['username'], wap_dict['password'], wap_dict['account'], wap_dict['database'], wap_dict['schema'], old_stmt).drop('EffectiveFromDate', axis=1)
    #
    #    change1 = compare_dfs(old1, waps4, ['Wap'])
    #    new1 = change1['new']
    #    diff1 = change1['diff']

    ## Save data
    waps4['EffectiveFromDate'] = run_time_start

    sf.to_table(waps4, wap_dict['table'], wap_dict['username'],
                wap_dict['password'], wap_dict['account'],
                wap_dict['database'], wap_dict['schema'], True)

    return waps4
Esempio n. 2
0
def test_kd_nearest():
    line2 = vector.kd_nearest(pts, rec_pts1, line_site_col)

    assert (len(line2) == 2) & isinstance(
        line2, gpd.GeoDataFrame) & line2[line_site_col].notnull().all()
Esempio n. 3
0
def catch_delineate(sites,
                    rec_streams,
                    rec_catch,
                    segment_id_col='nzsegment',
                    from_node_col='FROM_NODE',
                    to_node_col='TO_NODE',
                    ignore_order=1,
                    stream_order_col='StreamOrde',
                    max_distance=np.inf,
                    site_delineate='all',
                    returns='catch'):
    """
    Catchment delineation using the REC streams and catchments.

    Parameters
    ----------
    sites : str path or GeoDataFrame
        Points shapfile of the sites along the streams or the equivelant GeoDataFrame.
    rec_streams : str path or GeoDataFrame
        str path to the REC streams shapefile, the equivelant GeoDataFrame, or a dict of parameters to read in an mssql table using the rd_sql function.
    rec_catch : str path or GeoDataFrame
        str path to the REC catchment shapefile, the equivelant GeoDataFrame, or a dict of parameters to read in an mssql table using the rd_sql function.
    segment_id_col : str
        The column name of the line segment id.
    from_node_col : str
        The from node column
    to_node_col : str
        The to node column
    ignore_order : int
        Ignore the stream orders in the search up to this int.
    stream_order_col : str
        The stream order column.
    max_distance : non-negative float, optional
        Return only neighbors within this distance. This is used to prune tree searches, so if you are doing a series of nearest-neighbor queries, it may help to supply the distance to the nearest neighbor of the most recent point. It's best to define a reasonable distance for the search.
    site_delineate : 'all' or 'between'
        Whether the catchments should be dileated all the way to the top or only in between the sites.
    returns : 'catch' or 'all'
        Return only the catchment polygons or the catchments, reaches, and sites

    Returns
    -------
    GeoDataFrame
        Polygons
    """

    ### Parameters

    ### Modifications {segment_id_col: {NZTNODE/NZFNODE: node # to change}}
    # mods = {13053151: {segment_id_col: 13055874}, 13048353: {'NZTNODE': 13048851}, 13048498: {'NZTNODE': 13048851}, 13048490: {'ORDER': 3}}

    ### Load data
    rec_catch = load_geo_data(rec_catch)
    rec_streams = load_geo_data(rec_streams)
    pts = load_geo_data(sites)
    pts['geometry'] = pts.geometry.simplify(1)

    ### make mods
    # for i in mods:
    #     rec_streams.loc[rec_streams['segment_id_col'] == i, list(mods[i].keys())] = list(mods[i].values())

    ### Find closest REC segment to points
    if max_distance == np.inf:
        buffer_dis = 100000
    else:
        buffer_dis = max_distance

    pts_extent = box(*pts.unary_union.buffer(buffer_dis).bounds)

    s_order = list(range(1, ignore_order + 1))
    rec_streams2 = rec_streams[~rec_streams[stream_order_col].isin(s_order)]

    rec_pts2 = convert_lines_to_points(rec_streams2, segment_id_col,
                                       pts_extent)

    # rec_pts1 = rec_streams2[rec_streams2.intersects(pts_extent)].set_index(segment_id_col).copy()
    # coords = rec_pts1.geometry.apply(lambda x: list(x.coords)).explode()
    # geo1 = coords.apply(lambda x: Point(x))
    #
    # rec_pts2 = gpd.GeoDataFrame(coords, geometry=geo1, crs=rec_pts1.crs).reset_index()

    pts_seg = kd_nearest(pts,
                         rec_pts2,
                         segment_id_col,
                         max_distance=max_distance)
    pts_seg = pts_seg[pts_seg[segment_id_col].notnull()].copy()
    nzreach = pts_seg[segment_id_col].copy().unique()

    ### Find all upstream reaches
    reaches = find_upstream(nzreach,
                            rec_streams=rec_streams,
                            segment_id_col=segment_id_col,
                            from_node_col=from_node_col,
                            to_node_col=to_node_col)

    ### Clip reaches to in-between sites if required
    if site_delineate == 'between':
        reaches1 = reaches.reset_index().copy()
        reaches2 = reaches1.loc[
            reaches1[segment_id_col].isin(reaches1.start.unique()),
            ['start', segment_id_col]]
        reaches2 = reaches2[reaches2.start != reaches2[segment_id_col]]

        grp1 = reaches2.groupby('start')

        for index, r in grp1:
            #            print(index, r)
            r2 = reaches1[reaches1.start.isin(
                r[segment_id_col])][segment_id_col].unique()
            reaches1 = reaches1[~((reaches1.start == index) &
                                  (reaches1[segment_id_col].isin(r2)))]

        reaches = reaches1.set_index('start').copy()

    ### Extract associated catchments
    rec_catch2 = extract_catch(reaches,
                               rec_catch=rec_catch,
                               segment_id_col=segment_id_col)

    ### Aggregate individual catchments
    rec_shed = agg_catch(rec_catch2)
    rec_shed.columns = [segment_id_col, 'geometry', 'area']
    rec_shed1 = rec_shed.merge(pts_seg.drop('geometry', axis=1),
                               on=segment_id_col)

    ### Return
    if returns == 'catch':
        return rec_shed1
    else:
        return rec_shed1, reaches, pts_seg
Esempio n. 4
0
def catch_delineate(sites_shp,
                    rec_streams_shp,
                    rec_catch_shp,
                    max_distance=np.inf):
    """
    Catchment delineation using the REC streams and catchments.

    Parameters
    ----------
    sites_shp : str path or GeoDataFrame
        Points shapfile of the sites along the streams or the equivelant GeoDataFrame.
    rec_streams_shp : str path, GeoDataFrame, or dict
        str path to the REC streams shapefile, the equivelant GeoDataFrame, or a dict of parameters to read in an mssql table using the rd_sql function.
    rec_catch_shp : str path, GeoDataFrame, or dict
        str path to the REC catchment shapefile, the equivelant GeoDataFrame, or a dict of parameters to read in an mssql table using the rd_sql function.
    sites_col : str
        The column name of the site numbers in the sites_shp.
    catch_output : str or None
        The output polygon shapefile path of the catchment delineation.

    Returns
    -------
    GeoDataFrame
        Polygons
    """

    ### Parameters

    ### Modifications {NZREACH: {NZTNODE/NZFNODE: node # to change}}
    mods = {
        13053151: {
            'NZTNODE': 13055874
        },
        13048353: {
            'NZTNODE': 13048851
        },
        13048498: {
            'NZTNODE': 13048851
        }
    }

    ### Load data
    rec_catch = load_geo_data(rec_catch_shp)
    rec_streams = load_geo_data(rec_streams_shp)
    pts = load_geo_data(sites_shp)
    pts['geometry'] = pts.geometry.simplify(1)

    ### make mods
    for i in mods:
        rec_streams.loc[rec_streams['NZREACH'] == i,
                        list(mods[i].keys())] = list(mods[i].values())

    ### Find closest REC segment to points
    rec_pts1 = rec_streams.copy()
    rec_pts1['geometry'] = rec_streams.centroid

    pts_seg = kd_nearest(pts, rec_pts1, 'NZREACH', max_distance=max_distance)
    nzreach = pts_seg.copy().NZREACH.unique()

    ### Find all upstream reaches
    reaches = find_upstream(nzreach, rec_streams_shp=rec_streams)

    ### Extract associated catchments
    rec_catch = extract_catch(reaches, rec_catch_shp=rec_catch)

    ### Aggregate individual catchments
    rec_shed = agg_catch(rec_catch)
    rec_shed.columns = ['NZREACH', 'geometry', 'area']
    rec_shed1 = rec_shed.merge(pts_seg.drop('geometry', axis=1), on='NZREACH')

    ### Return
    return rec_shed1