Beispiel #1
0
def close_gaps(df, tolerance):
    """Close gaps in LineString geometry where it should be contiguous.

    Snaps both lines to a centroid of a gap in between.

    """
    geom = df.geometry.values.data
    coords = pygeos.get_coordinates(geom)
    indices = pygeos.get_num_coordinates(geom)

    # generate a list of start and end coordinates and create point geometries
    edges = [0]
    i = 0
    for ind in indices:
        ix = i + ind
        edges.append(ix - 1)
        edges.append(ix)
        i = ix
    edges = edges[:-1]
    points = pygeos.points(np.unique(coords[edges], axis=0))

    buffered = pygeos.buffer(points, tolerance)

    dissolved = pygeos.union_all(buffered)

    exploded = [
        pygeos.get_geometry(dissolved, i)
        for i in range(pygeos.get_num_geometries(dissolved))
    ]

    centroids = pygeos.centroid(exploded)

    snapped = pygeos.snap(geom, pygeos.union_all(centroids), tolerance)

    return snapped
Beispiel #2
0
def close_gaps(gdf, tolerance):
    """Close gaps in LineString geometry where it should be contiguous.

    Snaps both lines to a centroid of a gap in between.

    Parameters
    ----------
    gdf : GeoDataFrame, GeoSeries
        GeoDataFrame  or GeoSeries containing LineString representation of a network.
    tolerance : float
        nodes within a tolerance will be snapped together

    Returns
    -------
    GeoSeries
    
    See also
    --------
    momepy.extend_lines
    momepy.remove_false_nodes

    """
    geom = gdf.geometry.values.data
    coords = pygeos.get_coordinates(geom)
    indices = pygeos.get_num_coordinates(geom)

    # generate a list of start and end coordinates and create point geometries
    edges = [0]
    i = 0
    for ind in indices:
        ix = i + ind
        edges.append(ix - 1)
        edges.append(ix)
        i = ix
    edges = edges[:-1]
    points = pygeos.points(np.unique(coords[edges], axis=0))

    buffered = pygeos.buffer(points, tolerance / 2)

    dissolved = pygeos.union_all(buffered)

    exploded = [
        pygeos.get_geometry(dissolved, i)
        for i in range(pygeos.get_num_geometries(dissolved))
    ]

    centroids = pygeos.centroid(exploded)

    snapped = pygeos.snap(geom, pygeos.union_all(centroids), tolerance)

    return gpd.GeoSeries(snapped, crs=gdf.crs)
Beispiel #3
0
def get_input_area_boundary(input_area):
    """Extract and union polygons associated with input area into a single
    boundary (Multi)Polygon.

    Parameters
    ----------
    input_area : str
        id of input area

    Returns
    -------
    (Multi)Polygon
    """
    # have to make valid or we get errors during union for FL
    values = [
        e["value"] for e in INPUT_AREA_VALUES
        if input_area in set(e["id"].split(","))
    ]

    inputs_df = gp.read_feather(bnd_dir / "input_areas.feather")

    bnd = pg.union_all(
        pg.make_valid(
            inputs_df.loc[inputs_df.value.isin(values)].geometry.values.data))

    return bnd
Beispiel #4
0
 def setup(self):
     # create irregular polygons by merging overlapping point buffers
     self.left = pygeos.union_all(
         pygeos.buffer(pygeos.points(np.random.random((500, 2)) * 500), 15)
     )
     # shift this up and right
     self.right = pygeos.apply(self.left, lambda x: x + 50)
Beispiel #5
0
def unary_union(data):
    if compat.USE_PYGEOS:
        return _pygeos_to_shapely(pygeos.union_all(data))
    else:
        data = [g for g in data if g is not None]
        if data:
            return shapely.ops.unary_union(data)
        else:
            return None
Beispiel #6
0
 def setup(self):
     # create irregular polygons by merging overlapping point buffers
     self.polygon = pygeos.union_all(
         pygeos.buffer(pygeos.points(np.random.random((1000, 2)) * 500),
                       10))
     xmin = np.random.random(100) * 100
     xmax = xmin + 100
     ymin = np.random.random(100) * 100
     ymax = ymin + 100
     self.bounds = np.array([xmin, ymin, xmax, ymax]).T
     self.boxes = pygeos.box(xmin, ymin, xmax, ymax)
    def _pandas(cls, column, **kwargs):

        shape = kwargs.get("shape")
        shape_format = kwargs.get("shape_format")
        column_shape_format = kwargs.get("column_shape_format")

        # Check that shape is given and given in the correct format
        if shape is not None:
            try:
                if shape_format == "wkt":
                    shape_ref = geos.from_wkt(shape)
                elif shape_format == "wkb":
                    shape_ref = geos.from_wkb(shape)
                elif shape_format == "geojson":
                    shape_ref = geos.from_geojson(shape)
                else:
                    raise NotImplementedError(
                        "Shape constructor method not implemented. Must be in WKT, WKB, or GeoJSON format."
                    )
            except:
                raise Exception("A valid reference shape was not given.")
        else:
            raise Exception("A shape must be provided for this method.")

        # Load the column into a pygeos Geometry vector from numpy array (Series not supported).
        if column_shape_format == "wkt":
            shape_test = geos.from_wkt(column.to_numpy(), on_invalid="ignore")
        elif column_shape_format == "wkb":
            shape_test = geos.from_wkb(column.to_numpy(), on_invalid="ignore")
        else:
            raise NotImplementedError(
                "Column values shape format not implemented.")

        # Allow for an array of reference shapes to be provided. Return a union of all the shapes in the array (Polygon or Multipolygon)
        shape_ref = geos.union_all(shape_ref)

        # Prepare the geometries
        geos.prepare(shape_ref)
        geos.prepare(shape_test)
        column_centroids = geos.centroid(shape_test)

        print(column_centroids)

        return pd.Series(geos.within(column_centroids, shape_ref))
    def _pandas(cls, column, **kwargs):

        column_shape_format = kwargs.get("column_shape_format")

        # Load the column into a pygeos Geometry vector from numpy array (Series not supported).
        if column_shape_format == "wkt":
            shape_test = geos.from_wkt(column.to_numpy(), on_invalid="ignore")
        elif column_shape_format == "wkb":
            shape_test = geos.from_wkb(column.to_numpy(), on_invalid="ignore")
        elif column_shape_format == "xy":
            shape_df = pd.DataFrame(column.to_list(), columns=("x", "y"))
            shape_test = geos.points(shape_df.lon, y=shape_df.lat)
        else:
            raise NotImplementedError(
                "Column values shape format not implemented.")

        shape_test = geos.union_all(shape_test)

        radius = geos.minimum_bounding_radius(shape_test)
        return radius
Beispiel #9
0
    def setup(self):
        # create irregular polygons my merging overlapping point buffers
        self.polygons = pygeos.get_parts(
            pygeos.union_all(
                pygeos.buffer(pygeos.points(np.random.random((2000, 2)) * 500),
                              5)))
        self.tree = pygeos.STRtree(self.polygons)
        # initialize the tree by making a tiny query first
        self.tree.query(pygeos.points(0, 0))

        # create points that extend beyond the domain of the above polygons to ensure
        # some don't overlap
        self.points = pygeos.points((np.random.random((2000, 2)) * 750) - 125)
        self.point_tree = pygeos.STRtree(
            pygeos.points(np.random.random((2000, 2)) * 750))
        self.point_tree.query(pygeos.points(0, 0))

        # create points on a grid for testing equidistant nearest neighbors
        # creates 2025 points
        grid_coords = np.mgrid[:45, :45].T.reshape(-1, 2)
        self.grid_point_tree = pygeos.STRtree(pygeos.points(grid_coords))
        self.grid_points = pygeos.points(grid_coords + 0.5)
Beispiel #10
0
async def create_custom_report(ctx, zip_filename, dataset, layer, name=""):
    """Create a Blueprint report for a user-uploaded GIS file contained in a zip.
    Zip must contain either a shapefile or a file geodatabase.

    Parameters
    ----------
    ctx : job context
    zip_filename : str
        full path to zip filename
    dataset : str
        full path to dataset within zip file
    layer : str
        name of layer within dataset
    name : str, optional (default: "")
        Name of area of interest (included in output report)

    Returns
    -------
    str
        path to output file

    Raises
    ------
    DataError
        Raised if bounds are too large or if area of interest doesn't overalap SA region
    """

    errors = []

    await set_progress(ctx["job_id"], 0, "Loading data")

    path = f"/vsizip/{zip_filename}/{dataset}"

    df = read_dataframe(path, layer=layer)

    geometry = pg.make_valid(df.geometry.values.data)

    await set_progress(ctx["job_id"], 5, "Preparing area of interest")

    # dissolve
    geometry = np.asarray([pg.union_all(geometry)])

    geo_geometry = to_crs(geometry, df.crs, GEO_CRS)
    bounds = pg.total_bounds(geo_geometry)

    # estimate area
    extent_area = (
        pg.area(pg.box(*pg.total_bounds(to_crs(geometry, df.crs, DATA_CRS)))) *
        M2_ACRES)
    if extent_area >= CUSTOM_REPORT_MAX_ACRES:
        raise DataError(
            f"The bounding box of your area of interest is too large ({extent_area:,.0f} acres), it must be < {CUSTOM_REPORT_MAX_ACRES:,.0f} acres."
        )

    await set_progress(ctx["job_id"], 10,
                       "Calculating results (this might take a while)")

    # calculate results, data must be in DATA_CRS
    print("Calculating results...")
    results = CustomArea(geometry, df.crs, name).get_results()

    if results is None:
        raise DataError(
            "area of interest does not overlap Southeast Blueprint")

    if name:
        results["name"] = name

    has_urban = "proj_urban" in results and results["proj_urban"][4] > 0
    has_slr = "slr" in results
    has_ownership = "ownership" in results
    has_protection = "protection" in results

    # compile indicator IDs across all inputs
    indicators = []
    for input_area in results["inputs"]:
        for ecosystem in input_area.get("ecosystems", []):
            indicators.extend([i["id"] for i in ecosystem["indicators"]])

    await set_progress(ctx["job_id"], 25,
                       "Creating maps (this might take a while)")

    print("Rendering maps...")
    maps, scale, map_errors = await render_maps(
        bounds,
        geometry=geo_geometry[0],
        input_ids=results["input_ids"],
        indicators=indicators,
        urban=has_urban,
        slr=has_slr,
        ownership=has_ownership,
        protection=has_protection,
    )

    if map_errors:
        log.error(f"Map rendering errors: {map_errors}")
        if "basemap" in map_errors:
            errors.append("Error creating basemap for all maps")

        if "aoi" in map_errors:
            errors.append("Error rendering area of interest on maps")

        if set(map_errors.keys()).difference(["basemap", "aoi"]):
            errors.append("Error creating one or more maps")

    await set_progress(ctx["job_id"],
                       75,
                       "Creating PDF (this might take a while)",
                       errors=errors)

    results["scale"] = scale

    pdf = create_report(maps=maps, results=results)

    await set_progress(ctx["job_id"], 95, "Nearly done", errors=errors)

    fp, name = tempfile.mkstemp(suffix=".pdf", dir=TEMP_DIR)
    with open(fp, "wb") as out:
        out.write(pdf)

    await set_progress(ctx["job_id"], 100, "All done!", errors=errors)

    log.debug(f"Created PDF at: {name}")

    return name, errors
Beispiel #11
0
import rasterio
import geopandas as gp
import pygeos as pg

from analysis.constants import DATA_CRS
from analysis.lib.raster import add_overviews

data_dir = Path("data/inputs")
src_dir = data_dir / "threats/slr"

boxes = []
for filename in (src_dir).glob("*.tif"):
    with rasterio.open(filename) as src:
        boxes.append(pg.box(*src.bounds))

# union them together into a single polygon
bnd = pg.union_all(boxes)

df = gp.GeoDataFrame({"geometry": [bnd], "index": [0]}, crs=DATA_CRS)
df.to_feather(src_dir / "slr_bounds.feather")

# For debugging
# write_dataframe(df, "/tmp/slr_bounds.gpkg", driver="GPKG")

# Create overviews for each individual file in the VRT
# Note: these have varying resolution, but this creates lower resolutions for each
print("Adding overviews to SLR files...")
for filename in src_dir.glob("*.tif"):
    print(f"Processing {filename}...")
    add_overviews(filename)
                        message=".*initial implementation of Parquet.*")

from analysis.constants import DATA_CRS, GEO_CRS, M2_ACRES

src_dir = Path("source_data")
data_dir = Path("data")
analysis_dir = data_dir / "inputs/summary_units"
bnd_dir = data_dir / "boundaries"  # GPKGs output for reference
tile_dir = data_dir / "for_tiles"

### Extract the boundary

sa_df = read_dataframe(src_dir /
                       "boundaries/SABlueprint2020_Extent.shp")[["geometry"]]
# boundary has self-intersections and 4 geometries, need to clean up
bnd = pg.union_all(pg.make_valid(sa_df.geometry.values.data))

### Extract HUC12 within boundary
print("Reading source HUC12s...")
merged = None
for huc2 in [2, 3, 5, 6]:
    df = read_dataframe(
        src_dir / f"summary_units/WBD_0{huc2}_HU2_GDB/WBD_0{huc2}_HU2_GDB.gdb",
        layer="WBDHU12",
    )[["huc12", "name", "geometry"]].rename(columns={"huc12": "id"})

    if merged is None:
        merged = df

    else:
        merged = merged.append(df, ignore_index=True)
connectors_outfilename = out_dir / "ns_connectors.tif"
tnc_outfilename = out_dir / "tnc_resilient_connected.tif"
ns_outfilename = out_dir / "ns_priority.tif"

if not out_dir.exists():
    os.makedirs(out_dir)

### Get input area mask
print("Extracting NatureScape input area mask...")
mask, transform, window = get_input_area_mask("app")

inputs_df = gp.read_feather(data_dir / "boundaries/input_areas.feather")
values = [
    e["value"] for e in INPUT_AREA_VALUES if "app" in set(e["id"].split(","))
]
bnd = pg.union_all(
    inputs_df.loc[inputs_df.value.isin(values)].geometry.values.data)

### Warp TNC resilient and connected landscapes to match Blueprint input area
print("Reading and warping TNC resilient and connected landscapes...")
with rasterio.open(src_dir / "Resilient_and_Connected20180308.tif") as rc:
    vrt = WarpedVRT(
        rc,
        width=window.width,
        height=window.height,
        nodata=int(rc.nodata),
        transform=transform,
        crs=DATA_CRS,
        resampling=Resampling.nearest,
    )

    data = vrt.read()[0]
def union_or_combine(geometries, grid_size=None, op="union"):
    """First does a check for overlap of geometries according to STRtree
    intersects.  If any overlap, then will use union_all on all of them;
    otherwise will return as a multipolygon.

    If only one polygon is present, it will be returned in a MultiPolygon.

    If coverage_union op is provided, geometries must be polygons and
    topologically related or this will produce bad output or fail outright.
    See docs for coverage_union in GEOS.

    Parameters
    ----------
    geometries : ndarray of single part polygons
    grid_size : [type], optional (default: None)
        provided to union_all; otherwise no effect
    op : str, one of {'union', 'coverage_union'}

    Returns
    -------
    MultiPolygon
    """

    if not (pg.get_type_id(geometries) == 3).all():
        print("Inputs to union or combine must be single-part geometries")

    if len(geometries) == 1:
        return pg.multipolygons(geometries)

    tree = pg.STRtree(geometries)
    left, right = tree.query_bulk(geometries, predicate="intersects")
    # drop self intersections
    ix = left != right
    left = left[ix]
    right = right[ix]

    # no intersections, just combine parts
    if len(left) == 0:
        return pg.multipolygons(geometries)

    # find groups of contiguous geometries and union them together individually
    contiguous = np.sort(np.unique(np.concatenate([left, right])))
    discontiguous = np.setdiff1d(np.arange(len(geometries), dtype="uint"),
                                 contiguous)
    groups = find_adjacent_groups(left, right)

    parts = []

    if op == "coverage_union":
        for group in groups:
            parts.extend(
                pg.get_parts(pg.coverage_union_all(geometries[list(group)])))

    else:
        for group in groups:
            parts.extend(
                pg.get_parts(
                    pg.union_all(geometries[list(group)],
                                 grid_size=grid_size)))

    parts.extend(pg.get_parts(geometries[discontiguous]))

    return pg.multipolygons(parts)
                         columns=["ls_cond"]).rename(columns=field_map).rename(
                             columns={"chat_rank": "chatrank"}))

for col in chat_fields:
    df[col] = df[col].astype("uint8")

df = df.drop(columns=["hexagon_id"])

### Find the CHAT units that intersect with OK / TX input areas
# Use centerpoints, since input area roughly follows edges of hexes
points = pg.centroid(df.geometry.values.data)
tree = pg.STRtree(points)

for state in ["ok", "tx"]:
    print(f"Processing {state} CHAT...")
    input_area = pg.union_all(
        inputs_df.loc[inputs_df.inputs == f"{state}chat"].geometry.values.data)
    ix = tree.query(input_area, predicate="intersects")
    state_df = df.iloc[ix].reset_index(drop=True)

    # lcon not present for TX
    if state == "tx":
        state_df = state_df.drop(columns=["lcon"])
        # Reclassify chatrank to match blueprint integration rules.
        # First shift other values up one
        ix = state_df.chatrank >= 2
        state_df.loc[ix, "chatrank"] = state_df.chatrank + 1

        # for any that were previously a chatrank of 2 but with higher values of aquatic or
        # terrestrial, map them back to 2
        ix = ((state_df.chatrank == 3)
              & state_df.arank.isin([2, 3])
    # {"name": "Cave Spring, VA area", "path": "CaveSpring"},
    # {"name": "South Atlantic Offshore", "path": "SAOffshore"},
    # {"name": "Florida Offshore", "path": "FLOffshore"}
]

for aoi in aois:
    name = aoi["name"]
    path = aoi["path"]
    print(f"Creating report for {name}...")

    start = time()
    df = read_dataframe(f"examples/{path}.shp", columns=[])
    geometry = pg.make_valid(df.geometry.values.data)

    # dissolve
    geometry = np.asarray([pg.union_all(geometry)])

    extent_area = (
        pg.area(pg.box(*pg.total_bounds(to_crs(geometry, df.crs, DATA_CRS)))) *
        M2_ACRES)
    print("Area of extent", extent_area.round())

    ### calculate results, data must be in DATA_CRS
    print("Calculating results...")
    results = CustomArea(geometry, df.crs, name=name).get_results()

    if results is None:
        print(f"AOI: {path} does not overlap Blueprint")
        continue

    out_dir = Path("/tmp/aoi") / path
Beispiel #17
0
def find_nhd_waterbody_breaks(geometries, nhd_lines):
    """Some large waterbody complexes are divided by dams; these breaks
    need to be preserved.  This is done by finding the shared edges between
    adjacent waterbodies that fall near NHD lines (which include dams) and
    buffering them by 10 meters (arbitrary, from trial and error).

    This should be skipped if nhd_lines is empty.

    Parameters
    ----------
    df : GeoDataFrame
    nhd_lines : GeoDataFrame

    Returns
    -------
    MultiPolygon containing all buffered lines between waterbodies that are near
        NHD lines.  Returns None if no adjacent waterbodies meet these criteria
    """

    # find all nhd lines that intersect waterbodies
    # first, buffer them slightly
    nhd_lines = pg.get_parts(pg.union_all(pg.buffer(nhd_lines, 0.1)))
    tree = pg.STRtree(geometries)
    left, right = tree.query_bulk(nhd_lines, predicate="intersects")

    # add these to the return
    keep_nhd_lines = nhd_lines[np.unique(left)]

    # find connected boundaries
    boundaries = pg.polygons(pg.get_exterior_ring(geometries))
    tree = pg.STRtree(boundaries)
    left, right = tree.query_bulk(boundaries, predicate="intersects")
    # drop self intersections
    ix = left != right
    left = left[ix]
    right = right[ix]

    # extract unique pairs (dedup symmetric pairs)
    pairs = np.array([left, right]).T
    pairs = (
        pd.DataFrame({"left": pairs.min(axis=1), "right": pairs.max(axis=1)})
        .groupby(["left", "right"])
        .first()
        .reset_index()
    )

    # calculate geometric intersection
    i = pg.intersection(
        geometries.take(pairs.left.values), geometries.take(pairs.right.values)
    )

    # extract individual parts (may be geom collections)
    parts = pg.get_parts(pg.get_parts(pg.get_parts(i)))

    # extract only the lines or polygons
    t = pg.get_type_id(parts)
    parts = parts[((t == 1) | (t == 3)) & (~pg.is_empty(parts))].copy()

    # buffer and merge
    split_lines = pg.get_parts(pg.union_all(pg.buffer(parts, 10)))

    # now find the ones that are within 100m of nhd lines
    nhd_lines = pg.get_parts(nhd_lines)
    tree = pg.STRtree(nhd_lines)
    left, right = tree.nearest_all(split_lines, max_distance=100)

    split_lines = split_lines[np.unique(left)]

    if len(split_lines) or len(keep_nhd_lines):
        return pg.union_all(np.append(split_lines, keep_nhd_lines))

    return None
Beispiel #18
0
from analysis.lib.pygeos_util import explode

src_dir = Path("source_data")
data_dir = Path("data")
out_dir = data_dir / "inputs/boundaries"  # used as inputs for other steps
tile_dir = data_dir / "for_tiles"

sa_df = read_dataframe(src_dir / "boundaries/SABlueprint2020_Extent.shp")

### Create mask by cutting SA bounds out of world bounds
print("Creating mask...")
world = pg.box(-180, -85, 180, 85)

# boundary has self-intersections and 4 geometries, need to clean up

bnd = pg.union_all(pg.make_valid(sa_df.geometry.values.data))
bnd_geo = pg.union_all(
    pg.make_valid(sa_df.to_crs(GEO_CRS).geometry.values.data))
mask = pg.normalize(pg.difference(world, bnd_geo))

gp.GeoDataFrame(geometry=[bnd],
                crs=DATA_CRS).to_feather(out_dir / "sa_boundary.feather")

write_dataframe(
    gp.GeoDataFrame({"geometry": bnd_geo}, index=[0], crs=GEO_CRS),
    tile_dir / "sa_boundary.geojson",
    driver="GeoJSONSeq",
)

write_dataframe(
    gp.GeoDataFrame({"geometry": mask}, index=[0], crs=GEO_CRS),
Beispiel #19
0
def test_union_all_prec(geom, grid_size, expected):
    actual = pygeos.union_all(geom, grid_size=grid_size)
    assert pygeos.equals(actual, expected)
Beispiel #20
0
 def time_union_all_prec2(self):
     pygeos.union_all([self.left, self.right], grid_size=2)
Beispiel #21
0
def unary_union(data):
    if compat.USE_PYGEOS:
        return _pygeos_to_shapely(pygeos.union_all(data))
    else:
        return shapely.ops.unary_union(data)
Beispiel #22
0
def convex_hull_all(arr):
    if isinstance(arr, LazyObj):
        arr = arr.values()
    points = pg.union_all(pg.extract_unique_points(pg.from_wkb(arr)))
    return pg.to_wkb(pg.convex_hull(points))
Beispiel #23
0
def union_all(arr):
    if isinstance(arr, LazyObj):
        arr = arr.values()
    return pg.union_all(from_wkb(arr))
Beispiel #24
0
 def time_union_all(self):
     pygeos.union_all([self.left, self.right])
Beispiel #25
0
tile_dir = data_dir / "for_tiles"

if not out_dir.exists():
    os.makedirs(out_dir)

if not tile_dir.exists():
    os.makedirs(tile_dir)

### Extract the boundary
bnd_df = read_dataframe(
    src_dir / "blueprint/SE_Blueprint_2021_Vectors.gdb",
    layer="SECAS_Boundary_2021_20211117",
)[["geometry"]]
# boundary has multiple geometries, union together and cleanup
bnd_df = gp.GeoDataFrame(
    geometry=[pg.union_all(pg.make_valid(bnd_df.geometry.values.data))],
    index=[0],
    crs=bnd_df.crs,
)
bnd_df.to_feather(out_dir / "se_boundary.feather")
write_dataframe(bnd_df, data_dir / "boundaries/se_boundary.fgb")

# create GeoJSON for tiling
bnd_geo = bnd_df.to_crs(GEO_CRS)
write_dataframe(bnd_geo, tile_dir / "se_boundary.geojson", driver="GeoJSONSeq")

### Create mask by cutting SA bounds out of world bounds
print("Creating mask...")
world = pg.box(-180, -85, 180, 85)
mask = pg.normalize(pg.difference(world, bnd_geo.geometry.values.data))
Beispiel #26
0
# save all states for spatial joins
state_df.to_feather(out_dir / "states.feather")

state_df = state_df.loc[state_df.id.isin(STATES.keys())].copy()
state_df.to_feather(out_dir / "region_states.feather")
write_dataframe(
    state_df[["State", "geometry"]].rename(columns={"State": "id"}),
    out_dir / "region_states.gpkg",
)

# dissolve to create outer state boundary for total analysis area and regions
bnd_df = gp.GeoDataFrame(
    [
        {
            "geometry": pg.union_all(state_df.geometry.values.data),
            "id": "total"
        },
    ] + [{
        "geometry":
        pg.union_all(state_df.loc[state_df.id.isin(
            REGION_STATES[region])].geometry.values.data),
        "id":
        region,
    } for region in REGION_STATES],
    crs=CRS,
)
write_dataframe(bnd_df, out_dir / "region_boundary.gpkg")
bnd_df.to_feather(out_dir / "region_boundary.feather")

bnd = bnd_df.geometry.values.data[0]
Beispiel #27
0
def remove_false_nodes(gdf):
    """
    Clean topology of existing LineString geometry by removal of nodes of degree 2.

    Parameters
    ----------
    gdf : GeoDataFrame, GeoSeries, array of pygeos geometries
        (Multi)LineString data of street network

    Returns
    -------
    gdf : GeoDataFrame, GeoSeries
    
    See also
    --------
    momepy.extend_lines
    momepy.close_gaps
    """
    if isinstance(gdf, (gpd.GeoDataFrame, gpd.GeoSeries)):
        # explode to avoid MultiLineStrings
        # double reset index due to the bug in GeoPandas explode
        df = gdf.reset_index(drop=True).explode().reset_index(drop=True)

        # get underlying pygeos geometry
        geom = df.geometry.values.data
    else:
        geom = gdf

    # extract array of coordinates and number per geometry
    coords = pygeos.get_coordinates(geom)
    indices = pygeos.get_num_coordinates(geom)

    # generate a list of start and end coordinates and create point geometries
    edges = [0]
    i = 0
    for ind in indices:
        ix = i + ind
        edges.append(ix - 1)
        edges.append(ix)
        i = ix
    edges = edges[:-1]
    points = pygeos.points(np.unique(coords[edges], axis=0))

    # query LineString geometry to identify points intersecting 2 geometries
    tree = pygeos.STRtree(geom)
    inp, res = tree.query_bulk(points, predicate="intersects")
    unique, counts = np.unique(inp, return_counts=True)
    merge = res[np.isin(inp, unique[counts == 2])]

    if len(merge) > 0:
        # filter duplications and create a dictionary with indication of components to
        # be merged together
        dups = [
            item for item, count in collections.Counter(merge).items()
            if count > 1
        ]
        split = np.split(merge, len(merge) / 2)
        components = {}
        for i, a in enumerate(split):
            if a[0] in dups or a[1] in dups:
                if a[0] in components.keys():
                    i = components[a[0]]
                elif a[1] in components.keys():
                    i = components[a[1]]
            components[a[0]] = i
            components[a[1]] = i

        # iterate through components and create new geometries
        new = []
        for c in set(components.values()):
            keys = []
            for item in components.items():
                if item[1] == c:
                    keys.append(item[0])
            new.append(pygeos.line_merge(pygeos.union_all(geom[keys])))

        # remove incorrect geometries and append fixed versions
        df = df.drop(merge)
        final = gpd.GeoSeries(new).explode().reset_index(drop=True)
        if isinstance(gdf, gpd.GeoDataFrame):
            return df.append(
                gpd.GeoDataFrame({df.geometry.name: final},
                                 geometry=df.geometry.name),
                ignore_index=True,
            )
        return df.append(final, ignore_index=True)
Beispiel #28
0
def enclosures(primary_barriers,
               limit=None,
               additional_barriers=None,
               enclosure_id="eID"):
    """
    Generate enclosures based on passed barriers.

    Enclosures are areas enclosed from all sides by at least one type of
    a barrier. Barriers are typically roads, railways, natural features
    like rivers and other water bodies or coastline. Enclosures are a
    result of polygonization of the  ``primary_barrier`` and ``limit`` and its
    subdivision based on additional_barriers.

    Parameters
    ----------
    primary_barriers : GeoDataFrame, GeoSeries
        GeoDataFrame or GeoSeries containing primary barriers.
        (Multi)LineString geometry is expected.
    limit : GeoDataFrame, GeoSeries (default None)
        GeoDataFrame or GeoSeries containing external limit of enclosures,
        i.e. the area which gets partitioned. If None is passed,
        the internal area of ``primary_barriers`` will be used.
    additional_barriers : GeoDataFrame
        GeoDataFrame or GeoSeries containing additional barriers.
        (Multi)LineString geometry is expected.
    enclosure_id : str (default 'eID')
        name of the enclosure_id (to be created).

    Returns
    -------
    enclosures : GeoDataFrame
       GeoDataFrame containing enclosure geometries and enclosure_id

    Examples
    --------
    >>> enclosures = mm.enclosures(streets, admin_boundary, [railway, rivers])

    """
    if limit is not None:
        if limit.geom_type.isin(["Polygon", "MultiPolygon"]).any():
            limit = limit.boundary
        barriers = pd.concat([primary_barriers.geometry, limit.geometry])
    else:
        barriers = primary_barriers
    unioned = barriers.unary_union
    polygons = polygonize(unioned)
    enclosures = gpd.GeoSeries(list(polygons), crs=primary_barriers.crs)

    if additional_barriers is not None:
        if not isinstance(additional_barriers, list):
            raise TypeError(
                "`additional_barriers` expects a list of GeoDataFrames or GeoSeries."
                f"Got {type(additional_barriers)}.")
        additional = pd.concat([gdf.geometry for gdf in additional_barriers])

        inp, res = enclosures.sindex.query_bulk(additional.geometry,
                                                predicate="intersects")
        unique = np.unique(res)

        new = []

        for i in unique:
            poly = enclosures.values.data[i]  # get enclosure polygon
            crossing = inp[res == i]  # get relevant additional barriers
            buf = pygeos.buffer(poly, 0.01)  # to avoid floating point errors
            crossing_ins = pygeos.intersection(
                buf, additional.values.data[crossing]
            )  # keeping only parts of additional barriers within polygon
            union = pygeos.union_all(
                np.append(crossing_ins, pygeos.boundary(poly)))  # union
            polygons = np.array(list(polygonize(
                _pygeos_to_shapely(union))))  # polygonize
            within = pygeos.covered_by(
                pygeos.from_shapely(polygons),
                buf)  # keep only those within original polygon
            new += list(polygons[within])

        final_enclosures = (gpd.GeoSeries(enclosures).drop(unique).append(
            gpd.GeoSeries(new)).reset_index(drop=True)).set_crs(
                primary_barriers.crs)

        return gpd.GeoDataFrame({enclosure_id: range(len(final_enclosures))},
                                geometry=final_enclosures)

    return gpd.GeoDataFrame({enclosure_id: range(len(enclosures))},
                            geometry=enclosures)
Beispiel #29
0
marine = atl.append(gulf, ignore_index=True)
marine["id"] = marine.PROT_NUMBE.str.strip() + "-" + marine.BLOCK_NUMB.str.strip()
marine["name"] = (
    marine.PROT_NUMBE.str.strip() + ": Block " + marine.BLOCK_NUMB.str.strip()
)

# there are a couple blocks without proper names and 0 area; drop them
marine = marine[["id", "name", "geometry"]].dropna().to_crs(DATA_CRS)

# some blocks have multiple parts, merge them
grouped = marine.groupby("id")

# save as DataFrame instead of GeoDataFrame for easier processing
marine = pd.DataFrame(
    grouped.geometry.apply(lambda g: g.values.data).apply(
        lambda g: pg.union_all(g) if len(g) > 1 else g[0]
    )
).join(grouped.name.first())

# coerce all to MultiPolygons
ix = pg.get_type_id(marine.geometry.values) == 3
marine.loc[ix, "geometry"] = marine.loc[ix].geometry.apply(
    lambda g: pg.multipolygons([g])
)

marine = (
    gp.GeoDataFrame(marine, geometry="geometry", crs=DATA_CRS)
    .reset_index()
    .rename(columns={"index": "id"})
)