Beispiel #1
0
def sel_sites_poly(pts, poly, buffer_dis=0):
    """
    Simple function to select points within a single polygon. Optional buffer.

    Parameters
    ----------
    pts: GeoDataFrame or str
        A GeoDataFrame of points with the site names as the index. Or a shapefile with the first column as the site names.
    poly: GeoDataFrame or str
        A GeoDataFrame of polygons with the site names as the index. Or a shapefile with the first column as the site names.
    buffer_dis: int
        Distance in coordinate system units for a buffer around the polygon.

    Returns
    -------
    GeoDataFrame
        Of points.
    """

    #### Read in data
    gdf_pts = load_geo_data(pts)
    gdf_poly = load_geo_data(poly)

    #### Perform vector operations for initial processing
    ## Dissolve polygons by id
    poly2 = gdf_poly.unary_union

    ## Create buffer
    poly_buff = poly2.buffer(buffer_dis)

    ## Select only the vcn sites within the buffer
    points2 = gdf_pts[gdf_pts.within(poly_buff)]

    return points2
Beispiel #2
0
def closest_line_to_pts(pts, lines, line_site_col, max_distance=1000):
    """
    Function to determine the line closest to each point. Inputs must be GeoDataframes.

    Parameters
    ----------
    pts: GeoDataFrame
        The points input.
    lines: GeoDataFrame
        The lines input.
    line_site_col: str
        The site column from the 'lines' that should be retained at the output.
    buffer_dis: int
        The max distance from each point to search for a line. Try to use the shortest buffer_dis that will cover all of your points as a larger buffer_dis will significantly slow down the operation.

    Returns
    -------
    GeoDataFrame
    """
    ## Load data
    gdf_pts = load_geo_data(pts)
    gdf_lines = load_geo_data(lines)

    ## Process data
    pts_line_seg = gpd.GeoDataFrame()
    for i in gdf_pts.index:
        pts_seg = gdf_pts.loc[[i]]
        dis = 50
        while dis < max_distance:
            bound = pts_seg.buffer(dis).unary_union
            lines1 = gdf_lines[gdf_lines.intersects(bound)]
            if lines1.empty:
                dis = dis + 50
            else:
                break
        if lines1.empty:
            continue
        near1 = lines1.distance(gdf_pts.geometry[i]).idxmin()
        line_seg1 = lines1.loc[near1, line_site_col]
        pts_seg[line_site_col] = line_seg1
        pts_line_seg = pd.concat([pts_line_seg, pts_seg])
    #        print(i)

    ### Determine points that did not find a line
    mis_pts = gdf_pts.loc[~gdf_pts.index.isin(pts_line_seg.index)]
    if not mis_pts.empty:
        print(mis_pts)
        print('Did not find a line segment for these sites')

    return pts_line_seg
Beispiel #3
0
def extract_catch(reaches, rec_catch, segment_id_col='nzsegment'):
    """
    Function to extract the catchment polygons from the rec catchments layer. Appends to reaches layer.

    Parameters
    ----------
    reaches : DataFrame
        The output DataFrame from the find_upstream function.
    rec_catch_shp : str path, dict, or GeoDataFrame
        str path to the REC catchment shapefile or a GeoDataFrame.
    segment_id_col : str
        The column name of the line segment id.

    Returns
    -------
    GeoDataFrame
    """
    sites = reaches[segment_id_col].unique().astype('int32')
    catch0 = load_geo_data(rec_catch)

    catch1 = catch0[catch0[segment_id_col].isin(sites)].copy()
    catch2 = catch1.dissolve(segment_id_col).reset_index()[[
        segment_id_col, 'geometry'
    ]]

    ### Combine with original sites
    catch3 = catch2.merge(reaches.reset_index(), on=segment_id_col)
    catch3.crs = catch0.crs

    return catch3
Beispiel #4
0
def pts_poly_join(pts, poly, poly_id_col):
    """
    Simple function to join the attributes of the polygon to the points. Specifically for an ID field in the polygon.

    Parameters
    ----------
    pts: GeoDataFrame
        A GeoDataFrame of points with the site names as the index.
    poly: GeoDataFrame
        A GeoDataFrame of polygons with the site names as the index.
    poly_id_col: str or list of str
        The names of the columns to join.

    Returns
    -------
    GeoDataFrame
    """
    #### Read in data
    gdf_pts = load_geo_data(pts)
    gdf_poly = load_geo_data(poly)

    if isinstance(poly_id_col, str):
        poly_id_col = [poly_id_col]
    cols = poly_id_col.copy()
    cols.extend(['geometry'])
    poly2 = gdf_poly[cols].copy()
    poly3 = poly2.dissolve(poly_id_col).reset_index()

    join1 = sjoin(gdf_pts.copy(), poly3.copy(), how='inner', op='within')
    cols = set(gdf_pts.columns)
    cols.update(set(poly3.columns))
    join1.drop([i for i in join1.columns if i not in cols],
               axis=1,
               inplace=True)

    return join1, poly3
Beispiel #5
0
def find_upstream(nzreach,
                  rec_streams,
                  segment_id_col='nzsegment',
                  from_node_col='FROM_NODE',
                  to_node_col='TO_NODE'):
    """
    Function to estimate all of the reaches (and nodes) upstream of specific reaches.

    Parameters
    ----------
    nzreach : list, ndarray, Series of int
        The NZ reach IDs
    rec_streams_shp : str path or GeoDataFrame
        str path to the REC streams shapefile or the equivelant GeoDataFrame.
    segment_id_col : str
        The column name of the line segment id.
    from_node_col : str
        The from node column
    to_node_col : str
        The to node column

    Returns
    -------
    DataFrame
    """
    if not isinstance(nzreach, (list, np.ndarray, pd.Series)):
        raise TypeError('nzreach must be a list, ndarray or Series.')

    ### Load data
    rec = load_geo_data(rec_streams).drop('geometry', axis=1).copy()

    ### Run through all nzreaches
    reaches_lst = []
    for i in nzreach:
        reach1 = rec[rec[segment_id_col] == i].copy()
        up1 = rec[rec[to_node_col].isin(reach1[from_node_col])]
        while not up1.empty:
            reach1 = pd.concat([reach1, up1])
            up1 = rec[rec[to_node_col].isin(up1[from_node_col])]
        reach1.loc[:, 'start'] = i
        reaches_lst.append(reach1)

    reaches = pd.concat(reaches_lst)
    reaches.set_index('start', inplace=True)

    return reaches
Beispiel #6
0
def find_upstream(nzreach, rec_streams_shp):
    """
    Function to estimate all of the reaches (and nodes) upstream of specific reaches.

    Parameters
    ----------
    nzreach : list, ndarray, Series of int
        The NZ reach IDs
    rec_streams_shp : str path or GeoDataFrame
        str path to the REC streams shapefile or the equivelant GeoDataFrame.

    Returns
    -------
    DataFrame
    """
    if not isinstance(nzreach, (list, np.ndarray, pd.Series)):
        raise TypeError('nzreach must be a list, ndarray or Series.')

    ### Parameters
#    server = 'SQL2012PROD05'
#    db = 'GIS'
#    table = 'MFE_NZTM_REC'
#    cols = ['NZREACH', 'NZFNODE', 'NZTNODE']
#
#    ### Load data
    rec = load_geo_data(rec_streams_shp).drop('geometry', axis=1)

    ### Run through all nzreaches
    reaches_lst = []
    for i in nzreach:
        reach1 = rec[rec.NZREACH == i]
        up1 = rec[rec.NZTNODE.isin(reach1.NZFNODE)]
        while not up1.empty:
            reach1 = pd.concat([reach1, up1])
            up1 = rec[rec.NZTNODE.isin(up1.NZFNODE)]
        reach1.loc[:, 'start'] = i
        reaches_lst.append(reach1)

    reaches = pd.concat(reaches_lst)
    reaches.set_index('start', inplace=True)
    return reaches
Beispiel #7
0
def extract_catch(reaches, rec_catch_shp):
    """
    Function to extract the catchment polygons from the rec catchments layer. Appends to reaches layer.

    Parameters
    ----------
    reaches : DataFrame
        The output DataFrame from the find_upstream function.
    rec_catch_shp : str path, dict, or GeoDataFrame
        str path to the REC catchment shapefile, dict of pdsql.mssql.rd_sql parameters, or the equivelant GeoDataFrame.

    Returns
    -------
    GeoDataFrame
    """

    ### Parameters
    #    server = 'SQL2012PROD05'
    #    db = 'GIS'
    #    table = 'MFE_NZTM_RECWATERSHEDCANTERBURY'
    #    cols = ['NZREACH']
    #
    sites = reaches.NZREACH.unique().astype('int32').tolist()
    #
    #    ### Extract reaches from SQL
    #    catch1 = rd_sql(server, db, table, cols, where_col='NZREACH', where_val=sites, geo_col=True)
    #    catch2 = catch1.dissolve('NZREACH')
    catch0 = load_geo_data(rec_catch_shp)

    catch1 = catch0[catch0.NZREACH.isin(sites)]
    catch2 = catch1.dissolve('NZREACH').reset_index()[['NZREACH', 'geometry']]

    ### Combine with original sites
    catch3 = catch2.merge(reaches.reset_index(), on='NZREACH')
    catch3.crs = catch0.crs

    return catch3
Beispiel #8
0
rec_catch_shp = 'rec_catch_pareora'
catch_shp = 'catchment_pareora'

sites_shp_path = get_path(sites_shp)
rec_streams_shp_path = get_path(rec_streams_shp)
rec_catch_shp_path = get_path(rec_catch_shp)
catch_shp_path = get_path(catch_shp)

sites_col_name = 'SITENUMBER'
poly_col_name = 'Catchmen_1'
line_site_col = 'NZREACH'

#######################################
### Examples

pts = util.load_geo_data(sites_shp_path)
pts['geometry'] = pts.geometry.simplify(1)

## Selecting points from within a polygon
pts1 = vector.sel_sites_poly(sites_shp_path, rec_catch_shp_path, buffer_dis=10)

## Spatial join attributes of polygons to points
pts2, poly2 = vector.pts_poly_join(sites_shp_path, catch_shp_path, poly_col_name)

## Create a GeoDataFrame from x and y data
pts_df = pts[[sites_col_name, 'geometry']].copy()
pts_df['x'] = pts_df.geometry.x
pts_df['y'] = pts_df.geometry.y
pts_df.drop('geometry', axis=1, inplace=True)

pts3 = vector.xy_to_gpd(sites_col_name, 'x', 'y', pts_df)
Beispiel #9
0
#id_col = 'Management'

#id_col = 'SITENUMBER'
#gdf_from = pts.copy()
#max_distance=500
#waterway_name=True
#
#n0 = ways1[0]['nodes']
#n1 = 1213916012
#
#osm_nodes_from = res1.copy()

#######################################
### Tests

gdf_from = util.load_geo_data(sites_shp_path)
gdf_from['geometry'] = gdf_from.geometry.simplify(1)


def test_get_nearest():
    pts1, no_node_ids = osm.get_nearest_waterways(gdf_from, id_col)

    assert (len(pts1) == 2) & isinstance(pts1, gpd.GeoDataFrame)


pts1, no_node_ids = osm.get_nearest_waterways(gdf_from, id_col)


def test_get_waterways():
    waterways, nodes = osm.get_waterways(pts1)
Beispiel #10
0
def catch_delineate(sites,
                    rec_streams,
                    rec_catch,
                    segment_id_col='nzsegment',
                    from_node_col='FROM_NODE',
                    to_node_col='TO_NODE',
                    ignore_order=1,
                    stream_order_col='StreamOrde',
                    max_distance=np.inf,
                    site_delineate='all',
                    returns='catch'):
    """
    Catchment delineation using the REC streams and catchments.

    Parameters
    ----------
    sites : str path or GeoDataFrame
        Points shapfile of the sites along the streams or the equivelant GeoDataFrame.
    rec_streams : str path or GeoDataFrame
        str path to the REC streams shapefile, the equivelant GeoDataFrame, or a dict of parameters to read in an mssql table using the rd_sql function.
    rec_catch : str path or GeoDataFrame
        str path to the REC catchment shapefile, the equivelant GeoDataFrame, or a dict of parameters to read in an mssql table using the rd_sql function.
    segment_id_col : str
        The column name of the line segment id.
    from_node_col : str
        The from node column
    to_node_col : str
        The to node column
    ignore_order : int
        Ignore the stream orders in the search up to this int.
    stream_order_col : str
        The stream order column.
    max_distance : non-negative float, optional
        Return only neighbors within this distance. This is used to prune tree searches, so if you are doing a series of nearest-neighbor queries, it may help to supply the distance to the nearest neighbor of the most recent point. It's best to define a reasonable distance for the search.
    site_delineate : 'all' or 'between'
        Whether the catchments should be dileated all the way to the top or only in between the sites.
    returns : 'catch' or 'all'
        Return only the catchment polygons or the catchments, reaches, and sites

    Returns
    -------
    GeoDataFrame
        Polygons
    """

    ### Parameters

    ### Modifications {segment_id_col: {NZTNODE/NZFNODE: node # to change}}
    # mods = {13053151: {segment_id_col: 13055874}, 13048353: {'NZTNODE': 13048851}, 13048498: {'NZTNODE': 13048851}, 13048490: {'ORDER': 3}}

    ### Load data
    rec_catch = load_geo_data(rec_catch)
    rec_streams = load_geo_data(rec_streams)
    pts = load_geo_data(sites)
    pts['geometry'] = pts.geometry.simplify(1)

    ### make mods
    # for i in mods:
    #     rec_streams.loc[rec_streams['segment_id_col'] == i, list(mods[i].keys())] = list(mods[i].values())

    ### Find closest REC segment to points
    if max_distance == np.inf:
        buffer_dis = 100000
    else:
        buffer_dis = max_distance

    pts_extent = box(*pts.unary_union.buffer(buffer_dis).bounds)

    s_order = list(range(1, ignore_order + 1))
    rec_streams2 = rec_streams[~rec_streams[stream_order_col].isin(s_order)]

    rec_pts2 = convert_lines_to_points(rec_streams2, segment_id_col,
                                       pts_extent)

    # rec_pts1 = rec_streams2[rec_streams2.intersects(pts_extent)].set_index(segment_id_col).copy()
    # coords = rec_pts1.geometry.apply(lambda x: list(x.coords)).explode()
    # geo1 = coords.apply(lambda x: Point(x))
    #
    # rec_pts2 = gpd.GeoDataFrame(coords, geometry=geo1, crs=rec_pts1.crs).reset_index()

    pts_seg = kd_nearest(pts,
                         rec_pts2,
                         segment_id_col,
                         max_distance=max_distance)
    pts_seg = pts_seg[pts_seg[segment_id_col].notnull()].copy()
    nzreach = pts_seg[segment_id_col].copy().unique()

    ### Find all upstream reaches
    reaches = find_upstream(nzreach,
                            rec_streams=rec_streams,
                            segment_id_col=segment_id_col,
                            from_node_col=from_node_col,
                            to_node_col=to_node_col)

    ### Clip reaches to in-between sites if required
    if site_delineate == 'between':
        reaches1 = reaches.reset_index().copy()
        reaches2 = reaches1.loc[
            reaches1[segment_id_col].isin(reaches1.start.unique()),
            ['start', segment_id_col]]
        reaches2 = reaches2[reaches2.start != reaches2[segment_id_col]]

        grp1 = reaches2.groupby('start')

        for index, r in grp1:
            #            print(index, r)
            r2 = reaches1[reaches1.start.isin(
                r[segment_id_col])][segment_id_col].unique()
            reaches1 = reaches1[~((reaches1.start == index) &
                                  (reaches1[segment_id_col].isin(r2)))]

        reaches = reaches1.set_index('start').copy()

    ### Extract associated catchments
    rec_catch2 = extract_catch(reaches,
                               rec_catch=rec_catch,
                               segment_id_col=segment_id_col)

    ### Aggregate individual catchments
    rec_shed = agg_catch(rec_catch2)
    rec_shed.columns = [segment_id_col, 'geometry', 'area']
    rec_shed1 = rec_shed.merge(pts_seg.drop('geometry', axis=1),
                               on=segment_id_col)

    ### Return
    if returns == 'catch':
        return rec_shed1
    else:
        return rec_shed1, reaches, pts_seg
Beispiel #11
0
rec_catch_shp = 'rec_catch_pareora.shp'
catch_shp = 'catchment_pareora.shp'

sites_shp_path = os.path.join(data_dir, sites_shp)
rec_streams_shp_path = os.path.join(data_dir, rec_streams_shp)
rec_catch_shp_path = os.path.join(data_dir, rec_catch_shp)
catch_shp_path = os.path.join(data_dir, catch_shp)

sites_col_name = 'SITENUMBER'
poly_col_name = 'Catchmen_1'
line_site_col = 'NZREACH'

#######################################
### Tests

pts = util.load_geo_data(sites_shp_path)
pts['geometry'] = pts.geometry.simplify(1)
rec_streams1 = util.load_geo_data(rec_streams_shp_path)
rec_pts1 = rec_streams1.copy()
rec_pts1['geometry'] = rec_streams1.centroid


def test_sel_sites_poly():
    pts1 = vector.sel_sites_poly(sites_shp_path,
                                 rec_catch_shp_path,
                                 buffer_dis=10)

    assert (len(pts1) == 2) & isinstance(pts1, gpd.GeoDataFrame)


def test_pts_poly_join():
Beispiel #12
0
def catch_delineate(sites_shp,
                    rec_streams_shp,
                    rec_catch_shp,
                    max_distance=np.inf):
    """
    Catchment delineation using the REC streams and catchments.

    Parameters
    ----------
    sites_shp : str path or GeoDataFrame
        Points shapfile of the sites along the streams or the equivelant GeoDataFrame.
    rec_streams_shp : str path, GeoDataFrame, or dict
        str path to the REC streams shapefile, the equivelant GeoDataFrame, or a dict of parameters to read in an mssql table using the rd_sql function.
    rec_catch_shp : str path, GeoDataFrame, or dict
        str path to the REC catchment shapefile, the equivelant GeoDataFrame, or a dict of parameters to read in an mssql table using the rd_sql function.
    sites_col : str
        The column name of the site numbers in the sites_shp.
    catch_output : str or None
        The output polygon shapefile path of the catchment delineation.

    Returns
    -------
    GeoDataFrame
        Polygons
    """

    ### Parameters

    ### Modifications {NZREACH: {NZTNODE/NZFNODE: node # to change}}
    mods = {
        13053151: {
            'NZTNODE': 13055874
        },
        13048353: {
            'NZTNODE': 13048851
        },
        13048498: {
            'NZTNODE': 13048851
        }
    }

    ### Load data
    rec_catch = load_geo_data(rec_catch_shp)
    rec_streams = load_geo_data(rec_streams_shp)
    pts = load_geo_data(sites_shp)
    pts['geometry'] = pts.geometry.simplify(1)

    ### make mods
    for i in mods:
        rec_streams.loc[rec_streams['NZREACH'] == i,
                        list(mods[i].keys())] = list(mods[i].values())

    ### Find closest REC segment to points
    rec_pts1 = rec_streams.copy()
    rec_pts1['geometry'] = rec_streams.centroid

    pts_seg = kd_nearest(pts, rec_pts1, 'NZREACH', max_distance=max_distance)
    nzreach = pts_seg.copy().NZREACH.unique()

    ### Find all upstream reaches
    reaches = find_upstream(nzreach, rec_streams_shp=rec_streams)

    ### Extract associated catchments
    rec_catch = extract_catch(reaches, rec_catch_shp=rec_catch)

    ### Aggregate individual catchments
    rec_shed = agg_catch(rec_catch)
    rec_shed.columns = ['NZREACH', 'geometry', 'area']
    rec_shed1 = rec_shed.merge(pts_seg.drop('geometry', axis=1), on='NZREACH')

    ### Return
    return rec_shed1