Exemple #1
0
    def _get_matches(self, pts, S1_df, S2_df):

        def apply_matches(pt):
            S1_slice = S1_df.iloc[Q_S1.loc[Q_S1['pt_idx']==pt.name,'S1_idx'],:]  #ah, perhaps here? index fuckup?
            S2_slice = S2_df.iloc[Q_S2.loc[Q_S2['pt_idx']==pt.name,'S2_idx'],:]

            if len(S2_slice)==0 or len(S1_slice)==0:
                return []

            S2_slice['matches'] = S2_slice.apply(lambda el: S1_slice.loc[(S1_slice['beginposition']>(el['beginposition']-timedelta(days=self.CONFIG['day_offset'])))&(S1_slice['beginposition']<(el['beginposition']+timedelta(days=self.CONFIG['day_offset']))),:].index.values, axis=1)
            #print ('pre',S2_slice.columns, S2_slice.index.name)
            S2_slice = S2_slice.explode('matches').reset_index()
            S2_slice = S2_slice.loc[~S2_slice['matches'].isna()]
            #print ('post',S2_slice.columns, S2_slice.index.name)
            return S2_slice[['level1cpdiidentifier','matches']].values.tolist()

        S1_tree = pygeos.STRtree([pygeos.io.from_wkt(subshp) for subshp in S1_df['footprint'].values.tolist()])
        S2_tree = pygeos.STRtree([pygeos.io.from_shapely(subshp) for subshp in S2_df['intersection_geom'].values.tolist()])

        Q_S1 = pd.DataFrame(S1_tree.query_bulk([pygeos.io.from_shapely(pp) for pp in pts['bbox_wgs'].values.tolist()], predicate='within').T, columns=['pt_idx','S1_idx'])
        Q_S2 = pd.DataFrame(S2_tree.query_bulk([pygeos.io.from_shapely(pp) for pp in pts['bbox_wgs'].values.tolist()], predicate='within').T, columns=['pt_idx','S2_idx'])
        
        S2_df['beginposition'] = pd.to_datetime(S2_df['beginposition'])
        S1_df['beginposition'] = pd.to_datetime(S1_df['beginposition'])
        S1_df['endposition'] = pd.to_datetime(S1_df['endposition'])

        pts['matches'] = pts.apply(apply_matches, axis=1)

        return pts[pts['matches'].str.len()>0]
Exemple #2
0
def prepare_possible_OD(gridDF, nodes, tolerance = 1):
    """Returns an array of tuples, with the first value the node ID to consider, and the
       second value the total population associated with this node. 
       The tolerance is the size of the bounding box to search for nodes within

    Args:
        gridDF (pandas.DataFrame): A dataframe with the grid centroids and their population
        nodes (pandas.DataFrame): A dataframe of the road network nodes
        tolerance (float, optional): size of the bounding box . Defaults to 0.1.

    Returns:
        final_possible_pop (list): a list of tuples representing the nodes and their population
    """    
    nodeIDs = []
    sindex = pyg.STRtree(nodes['geometry'])

    pos_OD_nodes = []
    pos_tot_pop = []
    for i in gridDF.itertuples():
        ID = nearest(i.geometry, nodes, sindex, tolerance)
        #If a node was found
        if ID > -1: 
            pos_OD_nodes.append(ID)
            pos_tot_pop.append(i.tot_pop)
    a = nodes.loc[nodes.id.isin(pos_OD_nodes)]
    #Create a geopackage of the possible ODs
    #with Geopackage('nodyBGR.gpkg', 'w') as out:
    #    out.add_layer(a, name='finanod', crs='EPSG:4326')
    nodes = np.array([pos_OD_nodes])
    node_unique = np.unique(nodes)
    count = np.array([pos_tot_pop])
    
    #List comprehension to add total populations of recurring nodes 
    final_possible_pop = [(i, count[nodes==i].sum()) for i in node_unique]
    return final_possible_pop
Exemple #3
0
def poly_tree():
    # create buffers so that midpoint between two buffers intersects
    # each buffer.  NOTE: add EPS to help mitigate rounding errors at midpoint.
    geoms = pygeos.buffer(pygeos.points(np.arange(10), np.arange(10)),
                          HALF_UNIT_DIAG + EPS,
                          quadsegs=32)
    yield pygeos.STRtree(geoms)
Exemple #4
0
    def __init__(self, iso_geographies=None, conf=None):

        if not conf:
            self.CONFIG = yaml.load(open(
                os.path.join(os.getcwd(), 'conf', 'DATA_CONFIG.yaml'), 'r'),
                                    Loader=yaml.SafeLoader)
        else:
            self.CONFIG = yaml.load(open(conf, 'r'), Loader=yaml.SafeLoader)

        self.sentinelsat_auth = json.load(open(self.CONFIG['scihub_auth'],
                                               'r'))['scihub']

        countries = gpd.read_file(
            os.path.join(self.CONFIG['NE_ROOT'], 'ne_10m_countries.gpkg'))
        countries = countries[~countries['geometry'].isna()]

        if iso_geographies:
            countries = countries[countries['ISO_A2'].isin(iso_geographies)]

        self.tree = pygeos.STRtree([
            pygeos.io.from_shapely(subshp)
            for subshp in list(countries['geometry'].unary_union)
        ])

        self.S2_tiles = gpd.read_file(
            os.path.join(self.CONFIG['NE_ROOT'], 'S2_utm.gpkg'))

        # Get datastrip_ids after Nov-2019 from GCP buckets
        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = self.CONFIG[
            'gcp_credentials_path']
        self.client = storage.Client()
        self.bucketname = 'gcp-public-data-sentinel-2'
Exemple #5
0
def test_flush_geometries(tree):
    arr = pygeos.points(np.arange(10), np.arange(10))
    tree = pygeos.STRtree(arr)
    # Dereference geometries
    arr[:] = None
    # Still it does not lead to a segfault
    tree.query(point)
def mp_sjoin_dissolve(df_left, df_right, left_geom_column, right_geom_column, N_workers, logger, include_geometry=False):
    
    ### get and query tree using pygeos
    logger.info('Getting STRTree and querying')
    tree = pygeos.STRtree([pygeos.io.from_wkt(el) for el in df_right[right_geom_column].values])
    Q = tree.query_bulk([pygeos.io.from_wkt(el) for el in df_left[left_geom_column].values])
    logger.info(f'Got tree of with {Q.shape[1]} results, now verifying intersections.')
    
    # temp for prototyping
    # Q = Q[:,0:1200]
    
    logger.info('Assigning SharedMemory')
    left_shm, left_spec = to_shm(df_left[['unique_id',left_geom_column]],'left')
    right_shm, right_spec = to_shm(df_right[['unique_id',right_geom_column]],'right')
    
    # do multiprocess
    logger.info('Async SJoin')
    chunk = Q.shape[1]//N_workers +1
    
    args = [(left_spec, right_spec, include_geometry, Q.T[ii*chunk:(ii+1)*chunk,:]) for ii in range(N_workers)]
    
    with mp.Pool(N_workers) as pool:
        res = pool.starmap(chunk_worker, args)
        
    res = [item for sublist in res for item in sublist]
    
    logger.info(f'obtained results {len(res)}')
    
    # release shared mem
    left_shm.close()
    left_shm.unlink()
    right_shm.close()
    right_shm.unlink()
    
    return res
Exemple #7
0
def find_contiguous_groups(geometries):
    """Find all adjacent geometries

    Parameters
    ----------
    geometries : ndarray of pygeos geometries


    Returns
    -------
    DataFrame indexed on the integer index of geometries
    """
    tree = pg.STRtree(geometries)
    left, right = tree.query_bulk(geometries, predicate="intersects")
    # drop self intersections
    ix = left != right
    left = left[ix]
    right = right[ix]

    groups = DirectedGraph.from_arrays(left, right).components()
    groups = (pd.DataFrame(
        {i: list(g)
         for i, g in enumerate(groups)}.items(),
        columns=["group", "index"],
    ).explode("index").set_index("index"))

    return groups
Exemple #8
0
def spatial_join(points: Sequence[pygeos.Geometry],
                 voronoi_polygons: Sequence[pygeos.Geometry]) -> Sequence[int]:
    query_result = pygeos.STRtree(voronoi_polygons).query_bulk(
        points, predicate="intersects")
    assert (query_result[0] == range(query_result[0].size)
            ).all()  # check that the first array is a standard range
    return query_result[1]
Exemple #9
0
def summarize_raster_by_geometry(geometries,
                                 extract_func,
                                 outfilename,
                                 progress_label="",
                                 bounds=None,
                                 **kwargs):
    """Summarize values of input dataset by geometry and writes results to
    a feather file, with one column for shape_mask and one for each raster value.

    Parameters
    ----------
    geometries : Series of pygeos geometries, indexed by HUC12 / marine block
    extract_func : function that extracts results for each geometry
    outfilename : str
    progress_label : str
    """

    if bounds is not None:
        # select only those areas that overlap input area
        tree = pg.STRtree(geometries)
        ix = tree.query(pg.box(*bounds))
        geometries = geometries.iloc[ix].copy()

    if not len(geometries):
        return

    index = []
    results = []
    for ix, geometry in Bar(progress_label,
                            max=len(geometries)).iter(geometries.iteritems()):
        zone_results = extract_func([to_dict(geometry)],
                                    bounds=pg.total_bounds(geometry),
                                    **kwargs)
        if zone_results is None:
            continue

        index.append(ix)
        results.append(zone_results)

    if not len(results):
        return

    df = pd.DataFrame(results, index=index)

    results = df[["shape_mask"]].copy()
    results.index.name = "id"

    avg_cols = [c for c in df.columns if c.endswith("_avg")]

    # each column is an array of counts for each
    for col in df.columns.difference(["shape_mask"] + avg_cols):
        s = df[col].apply(pd.Series).fillna(0)
        s.columns = [f"{col}_{c}" for c in s.columns]
        results = results.join(s)

    if len(avg_cols) > 0:
        results = results.join(df[avg_cols]).round()

    results.reset_index().to_feather(outfilename)
Exemple #10
0
def sjoin_points_to_poly(point_df, poly_df):
    """Perfom a spatial join between left and right, then remove duplicate entries
    for right by left (e.g., feature in left overlaps multiple features in right).

    Returns the first spatial join for each point.

    Parameters
    ----------
    point_df : GeoDataFrame
    poly_df : GeoDataFrame

    Returns
    -------
    GeoDataFrame
        all columns of left plus all non-geometry columns from right
    """
    if len(point_df) > len(poly_df):
        tree = pg.STRtree(point_df.geometry.values.data)
        poly_ix, pt_ix = tree.query_bulk(poly_df.geometry.values.data,
                                         predicate="intersects")

    else:
        tree = pg.STRtree(poly_df.geometry.values.data)
        pt_ix, poly_ix = tree.query_bulk(point_df.geometry.values.data,
                                         predicate="intersects")

    # reduce to unique poly per pt
    j = pd.DataFrame(
        {"index_right": poly_df.index.values.take(poly_ix)},
        index=point_df.index.values.take(pt_ix),
    )
    grouped = j.groupby(level=0)
    if grouped.size().max() > 1:
        print(
            "WARNING: multiple target areas returned in spatial join for a single point"
        )

        j = grouped.first()

    return (point_df.join(j.index_right,
                          how="left").join(
                              poly_df.drop(columns=["geometry"]),
                              on="index_right").drop(columns=["index_right"]))
Exemple #11
0
def _overlay(a, b, return_indices=False):
    """
    Compute geometries from overlaying a onto b
    """
    tree = pygeos.STRtree(a)
    bix, aix = tree.query_bulk(b)
    overlay = pygeos.intersection(a[aix], b[bix])
    if return_indices:
        return aix, bix, overlay
    return overlay
Exemple #12
0
def nearest(geom, gdf):
    """Find the element of a GeoDataFrame nearest a shapely geometry
    """
    sindex = pygeos.STRtree(gdf['geometry'])
    matches_idx = sindex.query(geom)
    #pygeos.measurement.bounds(geom)
    #matches_idx = gdf.sindex.nearest(geom.bounds)
    nearest_geom = min(
        [gdf.iloc[match_idx] for match_idx in matches_idx],
        key=lambda match: pygeos.measurement.distance(match.geometry, geom))
    return nearest_geom
Exemple #13
0
def sjoin_geometry(left, right, predicate="intersects", how="inner"):
    """Use pygeos to do a spatial join between 2 series or ndarrays of geometries.

    Parameters
    ----------
    left : Series or ndarray
        left geometries, will form basis of index that is returned
    right : Series or ndarray
        right geometries, their indices will be returned where thy meet predicate
    predicate : str, optional (default: "intersects")
        name of pygeos predicate function (any of the pygeos predicates should work: intersects, contains, within, overlaps, crosses)
    how : str, optional (default: "inner")
        one of "inner" or "left"; "right" is not supported at this time.

    Returns
    -------
    Series
        indexed on index of left, containing values of right index
    """

    if not how in ("inner", "left"):
        raise NotImplementedError("Other join types not implemented")

    if isinstance(left, pd.Series):
        left_values = left.values
        left_index = left.index

    else:
        left_values = left
        left_index = np.arange(0, len(left))

    if isinstance(right, pd.Series):
        right_values = right.values
        right_index = right.index

    else:
        right_values = right
        right_index = np.arange(0, len(right))

    tree = pg.STRtree(right_values)
    # hits are in 0-based indicates of right
    hits = tree.query_bulk(left_values, predicate=predicate)

    if how == "inner":
        index = left_index[hits[0]]
        values = right_index[hits[1]]

    elif how == "left":
        index = left_index.copy()
        values = np.empty(shape=index.shape)
        values.fill(np.nan)
        values[hits[0]] = right_index[hits[1]]

    return pd.Series(values, index=index, name="index_right")
Exemple #14
0
 def __init__(self, geometry):
     # set empty geometries to None to avoid segfault on GEOS <= 3.6
     # see:
     # https://github.com/pygeos/pygeos/issues/146
     # https://github.com/pygeos/pygeos/issues/147
     non_empty = geometry.copy()
     non_empty[pygeos.is_empty(non_empty)] = None
     # set empty geometries to None to maintain indexing
     self._tree = pygeos.STRtree(non_empty)
     # store geometries, including empty geometries for user access
     self.geometries = geometry.copy()
Exemple #15
0
    def setup(self):
        # create irregular polygons my merging overlapping point buffers
        self.polygons = pygeos.get_parts(
            pygeos.union_all(
                pygeos.buffer(pygeos.points(np.random.random((2000, 2)) * 500),
                              5)))
        self.tree = pygeos.STRtree(self.polygons)
        # initialize the tree by making a tiny query first
        self.tree.query(pygeos.points(0, 0))

        # create points that extend beyond the domain of the above polygons to ensure
        # some don't overlap
        self.points = pygeos.points((np.random.random((2000, 2)) * 750) - 125)
        self.point_tree = pygeos.STRtree(
            pygeos.points(np.random.random((2000, 2)) * 750))
        self.point_tree.query(pygeos.points(0, 0))

        # create points on a grid for testing equidistant nearest neighbors
        # creates 2025 points
        grid_coords = np.mgrid[:45, :45].T.reshape(-1, 2)
        self.grid_point_tree = pygeos.STRtree(pygeos.points(grid_coords))
        self.grid_points = pygeos.points(grid_coords + 0.5)
Exemple #16
0
def split_edges_at_nodes_pyg(network, tolerance=1e-9):
    """Split network edges where they intersect node geometries
    """

    #already initiate the spatial index, so we dont have to do that every time
    sindex = pygeos.STRtree(network.nodes['geometry'])

    grab_all_edges = []
    for edge in tqdm(network.edges.itertuples(index=False),
                     desc="split",
                     total=len(network.edges)):
        hits = nodes_intersecting_pyg(edge.geometry,
                                      network.nodes['geometry'],
                                      sindex,
                                      tolerance=1e-9)

        if len(hits) < 3:
            grab_all_edges.append([[edge.osm_id], [edge.geometry],
                                   [edge.highway]])

            continue

        # get points and geometry as list of coordinates
        split_points = pygeos.coordinates.get_coordinates(
            pygeos.snap(hits, edge.geometry, tolerance=1e-9))
        coor_geom = pygeos.coordinates.get_coordinates(edge.geometry)

        # potentially split to multiple edges
        split_locs = np.argwhere(np.isin(coor_geom,
                                         split_points).all(axis=1))[:, 0]
        split_locs = list(zip(split_locs.tolist(), split_locs.tolist()[1:]))

        new_edges = [
            coor_geom[split_loc[0]:split_loc[1] + 1]
            for split_loc in split_locs
        ]

        grab_all_edges.append(
            [[edge.osm_id] * len(new_edges),
             [pygeos.linestrings(edge) for edge in new_edges],
             [edge.infra_type] * len(new_edges)])

    # combine all new edges
    edges = pd.DataFrame([
        item
        for sublist in [list(zip(x[0], x[1], x[2])) for x in grab_all_edges]
        for item in sublist
    ],
                         columns=['osm_id', 'geometry', 'infra_type'])
    # return new network with split edges
    return Network(nodes=network.nodes, edges=edges)
def remove_singular_segments(segments, image_stack, image_transform, t_shape=None, bandnames_mean=["rVH_mean", "rVV_mean", "fVH_mean", "fVV_mean"], bandnames_stdev=["rVV_std", "rVH_std", "fVV_std", "fVH_std"]):
    """Remove 1-pixel segments from GeoDataFrame
    
    Inputs:
    segments: geopandas GeoDataFrame
        GeoDataFrame to remove 1-pixel segments from.
    image_stack: nd array
        Image to retriev properties from.
    image_transform: Affine
        Transformation from pixel to geographic coordinates.
    t_shape: int, float or None (default=None)
        If not None, maximum Perimeter/sqrt(Area) to be considered for merger.
    bandnames_mean: list (default=["rVH_mean", "rVV_mean", "fVH_mean", "fVV_mean"])
        Column names to store image band means (same order as bands in image_stack).
    bandnames_stdev: list (default=["rVV_std", "rVH_std", "fVV_std", "fVH_std"])
        Column names to store image band st. devs. (same order as bands in image_stack).
    Ouputs:
    segments: geopandas GeoDataFrame
        GeoDataFrame without 1-pixel segments.
    """
    segments["removed"] = [False] * len(segments)
    segments.reset_index(drop=True, inplace=True)

    singular_segments = segments[segments["area"] == 1]
    singular_segments_index = list(singular_segments.index)
    print("Found {} 1-pixel segments".format(len(singular_segments)))

    segments_sindex = pygeos.STRtree(segments.geometry.values.data)
    left, right = segments_sindex.query_bulk(singular_segments.geometry.values.data, predicate="touches")

    for seg_index, seg in singular_segments.iterrows():
        possible_neighbours = segments.iloc[right[left == singular_segments_index.index(seg_index)]]
        possible_neighbours = possible_neighbours[~possible_neighbours["removed"]] # remove removed segments from selection
        possible_neighbours = possible_neighbours[possible_neighbours["area"] > 1] # remove 1-pixel segments from selection
        neighbours = possible_neighbours[[type(el) is not shapely.geometry.Point for el in possible_neighbours.geometry.buffer(0).intersection(seg.geometry.buffer(0))]]
        if len(neighbours) == 0: # only look at 2-connectivity if no neighbours in 1-connectivity (should not happen)
            print("No neighbours found for segment {}. Swithching to 2-connectivity".format(seg_index))
            neighbours = possible_neighbours
        diff = abs(neighbours[bandnames_mean]- seg[bandnames_mean]) # difference of means current segment and neighbours
        most_similar_segment = neighbours[np.sum(np.array(diff), axis=1) == np.min(np.sum(np.array(diff), axis=1))].iloc[0]
        seg_updated, merged = merge_similar_segments(most_similar_segment.copy(), seg.copy(), image_stack, image_transform, bandnames_mean, bandnames_stdev, t_shape)
        segments.loc[most_similar_segment.name] = seg_updated
        segments.loc[seg_index, "removed"] = True
    return segments[segments["removed"] == False].drop("removed", axis=1)   
Exemple #18
0
def summarize_by_huc12(geometries):
    """Summarize by HUC12

    Parameters
    ----------
    geometries : Series of pygeos geometries, indexed by HUC12 id
    """

    # find the indexes of the geometries that overlap with SLR bounds; these are the only
    # ones that need to be analyzed for SLR impacts
    slr_bounds = gp.read_feather(slr_bounds_filename).geometry
    tree = pg.STRtree(geometries)
    ix = tree.query(slr_bounds.geometry.values.data[0], predicate="intersects")
    geometries = geometries.iloc[ix].copy()

    if not len(geometries):
        return

    results = []
    index = []
    for huc12, geometry in Bar(
        "Calculating SLR counts for HUC12", max=len(geometries)
    ).iter(geometries.iteritems()):
        zone_results = extract_by_geometry(
            [to_dict(geometry)], bounds=pg.total_bounds(geometry)
        )
        if zone_results is None:
            continue

        index.append(huc12)
        results.append(zone_results)

    df = pd.DataFrame(results, index=index)

    # reorder columns
    df = df[["shape_mask"] + list(df.columns.difference(["shape_mask"]))]
    # extract only areas that actually had SLR pixels
    df = df[df[df.columns[1:]].sum(axis=1) > 0]
    df.columns = [str(c) for c in df.columns]
    df = df.reset_index().rename(columns={"index": "id"}).round()
    df.to_feather(results_filename)
def tile_segments(segments, tile_size=(5000,5000)):
    """Split up segments in subsets
    
    Inputs:
    segments: geopandas GeoDataFrame
        Segments to split up.
    tile_size: tuple
        Size of subset footprints (geographic coordinates).
    Ouputs:
    tiled_segments: list
        List of segments subsets.
    """
    bounds = segments.total_bounds
    nct, nrt = np.ceil((bounds[2:] - bounds[:2]) / tile_size).astype('int')
    # Calculate tile bboxes
    tile_numbers = np.arange(nrt*nct)
    tile_ir = (np.floor(tile_numbers / nct)).astype(int)
    tile_ic = tile_numbers % nct
    xmin = bounds[0] + tile_ic * tile_size[1]
    ymax = bounds[3] - tile_ir * tile_size[0]
    xmax = xmin + tile_size[0]
    ymin = ymax - tile_size[1]
    tile_boxes = pygeos.creation.box(xmin, ymin, xmax, ymax)
    # Spatial query on segments 
    segments_sindex = pygeos.STRtree(segments.geometry.values.data)
    left, right = segments_sindex.query_bulk(tile_boxes, predicate="intersects")
    # Divide segments amongst tiles
    segments["taken"] =  [False] * len(segments)   
    tiled_segments = []
    for tile_i in tile_numbers:
        segments_subset = segments.iloc[right[left == tile_i]].copy()
        segments_subset.drop(segments_subset[segments_subset["taken"]].index, inplace=True)
        segments.loc[segments_subset.index, "taken"] = True
        segments_subset.reset_index(drop=True, inplace=True)
        tiled_segments.append(segments_subset.drop("taken", axis=1))
    # Return
    return tiled_segments
Exemple #20
0
def occult(lines: LineCollection, tolerance: float) -> LineCollection:
    """
    Remove occulted lines.

    The order of the geometries in 'lines' matters, see example below.

    'tolerance' controls the distance tolerance between the first and last points
    of a geometry to consider it closed.

    Examples:
        $ vpype line 0 0 5 5 rect 2 2 1 1 occult show  # line is occulted by rect

        $ vpype rect 2 2 1 1 line 0 0 5 5 occult show  # line is NOT occulted by rect,
        as the line is drawn after the rectangle.
    """

    line_arr = np.array(
        [pygeos.linestrings(list(zip(line.real, line.imag))) for line in lines]
    )

    for i, line in enumerate(line_arr):
        coords = pygeos.get_coordinates(line)

        if math.hypot(coords[-1, 0] - coords[0, 0], coords[-1, 1] - coords[0, 1]) < tolerance:
            tree = pygeos.STRtree(line_arr[:i])
            p = pygeos.polygons(coords)
            geom_idx = tree.query(p, predicate="intersects")
            line_arr[geom_idx] = pygeos.set_operations.difference(line_arr[geom_idx], p)

    new_lines = LineCollection()
    for geom in line_arr:
        for i in range(pygeos.get_num_geometries(geom)):
            coords = pygeos.get_coordinates(pygeos.get_geometry(geom, i))
            new_lines.append(coords[:, 0] + coords[:, 1] * 1j)

    return new_lines
Exemple #21
0
 def time_tree_create(self):
     tree = pygeos.STRtree(self.polygons)
     tree.query(pygeos.points(0, 0))
Exemple #22
0
    factor=MASK_FACTOR,
    ignore_zero=False,
)

### Process freshwater resilience by watershed

print("Processing freshwater resilience")

bnd = get_input_area_boundary("nn")
df = read_dataframe(
    src_dir /
    "indicators/Freshwater_Resilience/FW_resilience_highesthigh_watersheds.shp",
    columns=["RES_CLASS"],
).to_crs(DATA_CRS)

tree = pg.STRtree(df.geometry.values.data)
ix = tree.query(bnd, predicate="intersects")
df = df.iloc[ix].copy()

# remap values so that they are 1-4; 0 is fill value, 255 nodata
df["value"] = df.RES_CLASS.map({
    "Complex: Highest Relative Resilience": 4,
    "Complex: High Relative Resilience": 3,
    "Non-Complex: Highest Relative Score": 2,
    "Non-Complex: High Relative Score": 2,
})

# dissolve by value
df = dissolve(explode(df), by=["value"])

# rasterize
Exemple #23
0
def test_init_increases_refcount():
    arr = np.array([point])
    with assert_increases_refcount(point):
        _ = pygeos.STRtree(arr)
    ### Associate with waterbody drain points
    print("Joining to waterbody drain points...")
    join_start = time()

    drains = gp.read_feather(
        clean_dir / huc2 / "waterbody_drain_points.feather",
        columns=["wbID", "drainID", "lineID", "geometry"],
    ).set_index("drainID")

    # find any waterbody drain points within MAX_DRAIN_DISTANCE of dam polygons

    # Find the nearest dam polygon for each drain, within MAX_DRAIN_DISTANCE
    # We do it this way because the dam may intersect or affect multiple drain points
    # so we can't always take the first or nearest from the dam's perspective
    tmp_dams = dams.groupby("damID").geometry.first()
    tree = pg.STRtree(tmp_dams.values.data)
    drain_ix, dam_ix = tree.nearest_all(
        drains.geometry.values.data, max_distance=MAX_DRAIN_DISTANCE
    )
    near_drains = pd.DataFrame(
        {"drainID": drains.index.values.take(drain_ix),},
        index=pd.Series(tmp_dams.index.values.take(dam_ix), name="damID"),
    ).join(drains, on="drainID")

    # near_drains = pd.DataFrame(
    #         {
    #             "damID"
    #             "drainID": drains.index.values.take(drain_ix),
    #         },
    #         index=pd.Series(dams.index.values.take(dam_ix), name="damPtID"),
    #     ).join(drains, on="drainID")
Exemple #25
0
def test_del_decreases_refcount():
    arr = np.array([point])
    tree = pygeos.STRtree(arr)
    with assert_decreases_refcount(point):
        del tree
Exemple #26
0
def verify(g, shp, thorough=False):
    #---------------------------------------------------------------------
    logger.info(' Verify grid against coastline\n')
    #---------------------------------------------------------------------

    lon_min = g.Dataset.SCHISM_hgrid_node_x.values.min()
    lon_max = g.Dataset.SCHISM_hgrid_node_x.values.max()
    lat_min = g.Dataset.SCHISM_hgrid_node_y.values.min()
    lat_max = g.Dataset.SCHISM_hgrid_node_y.values.max()

    c = shp.cx[lon_min:lon_max, lat_min:lat_max]

    # ## Test polygons

    d = g.Dataset

    x = d.SCHISM_hgrid_node_x.values
    y = d.SCHISM_hgrid_node_y.values
    tri = d.SCHISM_hgrid_face_nodes.values

    nodes = pd.DataFrame({'lon': x, 'lat': y})

    elems = pd.DataFrame(tri, columns=['a', 'b', 'c'])

    bnodes = g.Dataset[['node', 'id', 'type']].to_dataframe()

    # ### Find the invalid nodes (that cross the coasts)
    cos = pygeos.from_shapely(c.geometry)
    cos_ = pygeos.set_operations.union_all(cos)

    gps = pygeos.points(list(nodes.values))

    gtree = pygeos.STRtree(gps)

    invs = gtree.query(cos_, predicate='contains').tolist()

    #---------------------------------------------------------------------
    logger.info('Number of nodes within the coastlines {}\n'.format(len(invs)))
    #---------------------------------------------------------------------

    nps = len(invs)

    nels = 1

    if thorough:

        # ### Find invalid elements (that cross land)

        # cells to polygons
        ap = nodes.loc[elems.a]
        bp = nodes.loc[elems.b]
        cp = nodes.loc[elems.c]

        elems['ap'] = ap.values.tolist()
        elems['bp'] = bp.values.tolist()
        elems['cp'] = cp.values.tolist()

        n = 2
        al = elems.ap + elems.bp + elems.cp + elems.ap
        coords = [[l[i:i + n] for i in range(0, len(l), n)] for l in al]
        elems['coordinates'] = coords

        jig = pygeos.polygons(coords)

        jtree = pygeos.STRtree(jig)

        jig_ = pygeos.set_operations.union_all(jig)

        cross = pygeos.set_operations.intersection(jig_, cos_)

        # #### convert to dataframe

        fd = pd.DataFrame({'overlap': pygeos.to_wkt(cross)}, index=[0])

        fd['overlap'] = fd['overlap'].apply(shapely.wkt.loads)

        gover = gp.GeoDataFrame(fd, geometry='overlap')

        # #### Reject small injuctions
        ipols = gover.explode().loc[0]

        ipols.columns = ['geometry']

        mask = ipols.area.values == 0.

        ipols = ipols[~mask].reset_index(drop=True)
        ipols = gp.GeoDataFrame(ipols)

        #---------------------------------------------------------------------
        logger.info(
            'Number of elements intersecting the coastlines {}\n'.format(
                ipols.shape[0]))
        #---------------------------------------------------------------------

        nels = ipols.shape[0]

    if nps == 0 and nels == 0:
        #---------------------------------------------------------------------
        logger.info('Grid is verified against the coastline')
        #---------------------------------------------------------------------
        return True
    elif nps == 0:
        #---------------------------------------------------------------------
        logger.info('Grid is node verified against the coastline')
        #---------------------------------------------------------------------
        return True
    else:
        #---------------------------------------------------------------------
        logger.warning('Grid is not verified against the coastline')
        #---------------------------------------------------------------------
        return False
Exemple #27
0
def test_len():
    arr = np.array([point, None, point])
    tree = pygeos.STRtree(arr)
    assert len(tree) == 2
Exemple #28
0
def test_geometries_property():
    arr = np.array([point])
    tree = pygeos.STRtree(arr)
    assert arr is tree.geometries
# note: drop any from NABD that have duplicate NIDID
nabd = (
    read_dataframe(src_dir / "NABD_V2_beta/NABD_V2_beta.shp", columns=["NIDID"])
    .dropna(subset=["NIDID"])
    .drop_duplicates(subset=["NIDID"], keep=False)
    .to_crs(CRS)
    .set_index("NIDID")
)


### Select within outer HUC4s
huc4 = gp.read_feather(boundaries_dir / "outer_huc4.feather")

# select out NID within outer HUC4s
tree = pg.STRtree(nid.geometry.values.data)
left, right = tree.query_bulk(huc4.geometry.values.data, predicate="intersects")
ix = np.unique(right)
nid = nid.iloc[ix].copy()

# select out prev within outer HUC4s
tree = pg.STRtree(prev.geometry.values.data)
left, right = tree.query_bulk(huc4.geometry.values.data, predicate="intersects")
ix = np.unique(right)
prev = prev.iloc[ix].copy()

# select out prev NID within outer HUC4s
tree = pg.STRtree(prev_nid.geometry.values.data)
left, right = tree.query_bulk(huc4.geometry.values.data, predicate="intersects")
ix = np.unique(right)
prev_nid = prev_nid.iloc[ix].copy()
def union_or_combine(geometries, grid_size=None, op="union"):
    """First does a check for overlap of geometries according to STRtree
    intersects.  If any overlap, then will use union_all on all of them;
    otherwise will return as a multipolygon.

    If only one polygon is present, it will be returned in a MultiPolygon.

    If coverage_union op is provided, geometries must be polygons and
    topologically related or this will produce bad output or fail outright.
    See docs for coverage_union in GEOS.

    Parameters
    ----------
    geometries : ndarray of single part polygons
    grid_size : [type], optional (default: None)
        provided to union_all; otherwise no effect
    op : str, one of {'union', 'coverage_union'}

    Returns
    -------
    MultiPolygon
    """

    if not (pg.get_type_id(geometries) == 3).all():
        print("Inputs to union or combine must be single-part geometries")

    if len(geometries) == 1:
        return pg.multipolygons(geometries)

    tree = pg.STRtree(geometries)
    left, right = tree.query_bulk(geometries, predicate="intersects")
    # drop self intersections
    ix = left != right
    left = left[ix]
    right = right[ix]

    # no intersections, just combine parts
    if len(left) == 0:
        return pg.multipolygons(geometries)

    # find groups of contiguous geometries and union them together individually
    contiguous = np.sort(np.unique(np.concatenate([left, right])))
    discontiguous = np.setdiff1d(np.arange(len(geometries), dtype="uint"),
                                 contiguous)
    groups = find_adjacent_groups(left, right)

    parts = []

    if op == "coverage_union":
        for group in groups:
            parts.extend(
                pg.get_parts(pg.coverage_union_all(geometries[list(group)])))

    else:
        for group in groups:
            parts.extend(
                pg.get_parts(
                    pg.union_all(geometries[list(group)],
                                 grid_size=grid_size)))

    parts.extend(pg.get_parts(geometries[discontiguous]))

    return pg.multipolygons(parts)