Exemple #1
0
def get_input_area_boundary(input_area):
    """Extract and union polygons associated with input area into a single
    boundary (Multi)Polygon.

    Parameters
    ----------
    input_area : str
        id of input area

    Returns
    -------
    (Multi)Polygon
    """
    # have to make valid or we get errors during union for FL
    values = [
        e["value"] for e in INPUT_AREA_VALUES
        if input_area in set(e["id"].split(","))
    ]

    inputs_df = gp.read_feather(bnd_dir / "input_areas.feather")

    bnd = pg.union_all(
        pg.make_valid(
            inputs_df.loc[inputs_df.value.isin(values)].geometry.values.data))

    return bnd
Exemple #2
0
def make_valid(geometry: Optional[sh_geom.base.BaseGeometry]) -> Optional[sh_geom.base.BaseGeometry]:
    """
    Make a geometry valid.

    Args:
        geometry (Optional[sh_geom.base.BaseGeometry]): A (possibly) invalid geometry.

    Returns:
        Optional[sh_geom.base.BaseGeometry]: The fixed geometry.
    """
    if geometry is None:
        return None
    else:
        return sh_wkb.loads(pygeos.io.to_wkb(pygeos.make_valid(pygeos.io.from_shapely(geometry))))
Exemple #3
0
def make_valid(geometries):
    """Make geometries valid.

    Parameters
    ----------
    geometries : ndarray of pygeos geometries

    Returns
    -------
    ndarray of pygeos geometries
    """

    ix = ~pg.is_valid(geometries)
    if ix.sum():
        geometries = geometries.copy()
        print(f"Repairing {ix.sum()} geometries")
        geometries[ix] = pg.make_valid(geometries[ix])

    return geometries
Exemple #4
0
def constructive(arr, operation, *args, **kwargs):
    if operation == 'boundary':
        geometries = pg.boundary(pg.from_wkb(arr), **kwargs)
    elif operation == 'buffer':
        geometries = pg.buffer(pg.from_wkb(arr), *args, **kwargs)
    elif operation == 'build_area':
        geometries = pg.build_area(pg.from_wkb(arr), **kwargs)
    elif operation == 'centroid':
        geometries = pg.centroid(pg.from_wkb(arr), **kwargs)
    elif operation == 'clip_by_rect':
        geometries = pg.clip_by_rect(pg.from_wkb(arr), *args, **kwargs)
    elif operation == 'convex_hull':
        geometries = pg.convex_hull(pg.from_wkb(arr), **kwargs)
    elif operation == 'delaunay_triangles':
        geometries = pg.delaunay_triangles(pg.from_wkb(arr), **kwargs)
    elif operation == 'envelope':
        geometries = pg.envelope(pg.from_wkb(arr), **kwargs)
    elif operation == 'extract_unique_points':
        geometries = pg.extract_unique_points(pg.from_wkb(arr), **kwargs)
    elif operation == 'make_valid':
        geometries = pg.make_valid(pg.from_wkb(arr), **kwargs)
    elif operation == 'normalize':
        geometries = pg.normalize(pg.from_wkb(arr), **kwargs)
    elif operation == 'offset_curve':
        geometries = pg.offset_curve(pg.from_wkb(arr), *args, **kwargs)
    elif operation == 'point_on_surface':
        geometries = pg.point_on_surface(pg.from_wkb(arr), **kwargs)
    elif operation == 'reverse':
        geometries = pg.reverse(pg.from_wkb(arr), **kwargs)
    elif operation == 'simplify':
        geometries = pg.simplify(pg.from_wkb(arr), *args, **kwargs)
    elif operation == 'snap':
        geometries = pg.snap(pg.from_wkb(arr), *args, **kwargs)
    elif operation == 'voronoi_polygons':
        geometries = pg.voronoi_polygons(pg.from_wkb(arr), **kwargs)
    else:
        warnings.warn(f'Operation {operation} not supported.')
        return None
    return pg.to_wkb(geometries)
    where=f"State_Nm in ({states})",
)

# set the CRS, it is same as 5070 but not recognized properly
df = df.set_crs(DATA_CRS)

# drop BOEM lease block groups
df = df.loc[df.Agg_Src != "USGS_PADUS2_0Marine_BOEM_Block_Dissolve"].drop(
    columns=["Agg_Src"])

tree = pg.STRtree(df.geometry.values.data)
ix = tree.query(bnd_df.geometry.values.data[0], predicate="intersects")
df = df.iloc[ix].copy()

print("making valid...")
df["geometry"] = pg.make_valid(df.geometry.values.data)

df = explode(df).reset_index()
# there are some geometry errors after cleaning up above, keep only polys
df = df.loc[pg.get_type_id(df.geometry.values.data) == 3].copy()

print("Writing files")
df.to_feather(out_dir / "ownership.feather")
write_dataframe(df, data_dir / "boundaries/ownership.gpkg", driver="GPKG")

# Write for tiles
print("Writing GeoJSON for tiles")
write_dataframe(
    df[["geometry", "Own_Type", "GAP_Sts"]].to_crs(GEO_CRS),
    tile_dir / "ownership.geojson",
    driver="GeoJSONSeq",
Exemple #6
0
                {"break_geometry": breaks.take(left)}, index=df.index.take(right)
            )
            grouped = pairs.groupby(level=0).break_geometry.apply(
                lambda g: pg.multipolygons(g.values.data)
            )
            df.loc[grouped.index, "geometry"] = pg.difference(
                df.loc[grouped.index].geometry.values.data, grouped.values
            )

            df = explode(df).reset_index(drop=True)

    # make sure all polygons are valid
    ix = ~pg.is_valid(df.geometry.values.data)
    if ix.sum():
        print(f"Repairing {ix.sum()} invalid waterbodies")
        df.loc[ix, "geometry"] = pg.make_valid(df.loc[ix].geometry.values.data)
        df = explode(explode(df))
        df = df.loc[pg.get_type_id(df.geometry.values.data) == 3].reset_index()

    # assign a new unique wbID
    df["wbID"] = df.index.values.astype("uint32") + 1 + int(huc2) * 1000000
    df["km2"] = pg.area(df.geometry.values.data) / 1e6

    df.to_feather(huc2_dir / "waterbodies.feather")
    write_dataframe(df, huc2_dir / "waterbodies.gpkg")

    print("--------------------")
    print(f"HUC2: {huc2} done in {time() - huc2_start:.0f}s\n\n")

print(f"Done in {time() - start:.2f}s\n============================")
county_df = (read_dataframe(
    county_filename,
    columns=["NAME", "GEOID", "STATEFP"],
).to_crs(CRS).rename(columns={
    "NAME": "County",
    "GEOID": "COUNTYFIPS",
    "STATEFP": "STATEFIPS"
}))

# keep only those within the region HUC4 outer boundary
tree = pg.STRtree(county_df.geometry.values.data)
ix = np.unique(
    tree.query_bulk(huc4_df.geometry.values.data, predicate="intersects")[1])
ix.sort()
county_df = county_df.iloc[ix].reset_index(drop=True)
county_df.geometry = pg.make_valid(county_df.geometry.values.data)

# keep larger set for spatial joins
county_df.to_feather(out_dir / "counties.feather")

# Subset these in the region and SARP for tiles
write_dataframe(
    county_df.loc[county_df.STATEFIPS.isin(states)].rename(columns={
        "COUNTYFIPS": "id",
        "County": "name"
    }),
    out_dir / "region_counties.gpkg",
)
write_dataframe(
    county_df.loc[county_df.STATEFIPS.isin(sarp_states)].rename(
        columns={
Exemple #8
0
async def create_custom_report(ctx, zip_filename, dataset, layer, name=""):
    """Create a Blueprint report for a user-uploaded GIS file contained in a zip.
    Zip must contain either a shapefile or a file geodatabase.

    Parameters
    ----------
    ctx : job context
    zip_filename : str
        full path to zip filename
    dataset : str
        full path to dataset within zip file
    layer : str
        name of layer within dataset
    name : str, optional (default: "")
        Name of area of interest (included in output report)

    Returns
    -------
    str
        path to output file

    Raises
    ------
    DataError
        Raised if bounds are too large or if area of interest doesn't overalap SA region
    """

    errors = []

    await set_progress(ctx["job_id"], 0, "Loading data")

    path = f"/vsizip/{zip_filename}/{dataset}"

    df = read_dataframe(path, layer=layer)

    geometry = pg.make_valid(df.geometry.values.data)

    await set_progress(ctx["job_id"], 5, "Preparing area of interest")

    # dissolve
    geometry = np.asarray([pg.union_all(geometry)])

    geo_geometry = to_crs(geometry, df.crs, GEO_CRS)
    bounds = pg.total_bounds(geo_geometry)

    # estimate area
    extent_area = (
        pg.area(pg.box(*pg.total_bounds(to_crs(geometry, df.crs, DATA_CRS)))) *
        M2_ACRES)
    if extent_area >= CUSTOM_REPORT_MAX_ACRES:
        raise DataError(
            f"The bounding box of your area of interest is too large ({extent_area:,.0f} acres), it must be < {CUSTOM_REPORT_MAX_ACRES:,.0f} acres."
        )

    await set_progress(ctx["job_id"], 10,
                       "Calculating results (this might take a while)")

    # calculate results, data must be in DATA_CRS
    print("Calculating results...")
    results = CustomArea(geometry, df.crs, name).get_results()

    if results is None:
        raise DataError(
            "area of interest does not overlap Southeast Blueprint")

    if name:
        results["name"] = name

    has_urban = "proj_urban" in results and results["proj_urban"][4] > 0
    has_slr = "slr" in results
    has_ownership = "ownership" in results
    has_protection = "protection" in results

    # compile indicator IDs across all inputs
    indicators = []
    for input_area in results["inputs"]:
        for ecosystem in input_area.get("ecosystems", []):
            indicators.extend([i["id"] for i in ecosystem["indicators"]])

    await set_progress(ctx["job_id"], 25,
                       "Creating maps (this might take a while)")

    print("Rendering maps...")
    maps, scale, map_errors = await render_maps(
        bounds,
        geometry=geo_geometry[0],
        input_ids=results["input_ids"],
        indicators=indicators,
        urban=has_urban,
        slr=has_slr,
        ownership=has_ownership,
        protection=has_protection,
    )

    if map_errors:
        log.error(f"Map rendering errors: {map_errors}")
        if "basemap" in map_errors:
            errors.append("Error creating basemap for all maps")

        if "aoi" in map_errors:
            errors.append("Error rendering area of interest on maps")

        if set(map_errors.keys()).difference(["basemap", "aoi"]):
            errors.append("Error creating one or more maps")

    await set_progress(ctx["job_id"],
                       75,
                       "Creating PDF (this might take a while)",
                       errors=errors)

    results["scale"] = scale

    pdf = create_report(maps=maps, results=results)

    await set_progress(ctx["job_id"], 95, "Nearly done", errors=errors)

    fp, name = tempfile.mkstemp(suffix=".pdf", dir=TEMP_DIR)
    with open(fp, "wb") as out:
        out.write(pdf)

    await set_progress(ctx["job_id"], 100, "All done!", errors=errors)

    log.debug(f"Created PDF at: {name}")

    return name, errors
    # {"name": "TriState area at junction of MO, OK, KS", "path": "TriState"},
    # {"name": "Quincy, FL area", "path": "Quincy"},
    # {"name": "Doyle Springs, TN area", "path": "DoyleSprings"},
    # {"name": "Cave Spring, VA area", "path": "CaveSpring"},
    # {"name": "South Atlantic Offshore", "path": "SAOffshore"},
    # {"name": "Florida Offshore", "path": "FLOffshore"}
]

for aoi in aois:
    name = aoi["name"]
    path = aoi["path"]
    print(f"Creating report for {name}...")

    start = time()
    df = read_dataframe(f"examples/{path}.shp", columns=[])
    geometry = pg.make_valid(df.geometry.values.data)

    # dissolve
    geometry = np.asarray([pg.union_all(geometry)])

    extent_area = (
        pg.area(pg.box(*pg.total_bounds(to_crs(geometry, df.crs, DATA_CRS)))) *
        M2_ACRES)
    print("Area of extent", extent_area.round())

    ### calculate results, data must be in DATA_CRS
    print("Calculating results...")
    results = CustomArea(geometry, df.crs, name=name).get_results()

    if results is None:
        print(f"AOI: {path} does not overlap Blueprint")
Exemple #10
0
def test_make_valid_1d(geom, expected):
    actual = pygeos.make_valid(geom)
    # normalize needed to handle variation in output across GEOS versions
    assert np.all(pygeos.normalize(actual) == pygeos.normalize(expected))
Exemple #11
0
tile_dir = data_dir / "for_tiles"

if not out_dir.exists():
    os.makedirs(out_dir)

if not tile_dir.exists():
    os.makedirs(tile_dir)

### Extract the boundary
bnd_df = read_dataframe(
    src_dir / "blueprint/SE_Blueprint_2021_Vectors.gdb",
    layer="SECAS_Boundary_2021_20211117",
)[["geometry"]]
# boundary has multiple geometries, union together and cleanup
bnd_df = gp.GeoDataFrame(
    geometry=[pg.union_all(pg.make_valid(bnd_df.geometry.values.data))],
    index=[0],
    crs=bnd_df.crs,
)
bnd_df.to_feather(out_dir / "se_boundary.feather")
write_dataframe(bnd_df, data_dir / "boundaries/se_boundary.fgb")

# create GeoJSON for tiling
bnd_geo = bnd_df.to_crs(GEO_CRS)
write_dataframe(bnd_geo, tile_dir / "se_boundary.geojson", driver="GeoJSONSeq")

### Create mask by cutting SA bounds out of world bounds
print("Creating mask...")
world = pg.box(-180, -85, 180, 85)
mask = pg.normalize(pg.difference(world, bnd_geo.geometry.values.data))
Exemple #12
0
from analysis.lib.pygeos_util import explode

src_dir = Path("source_data")
data_dir = Path("data")
out_dir = data_dir / "inputs/boundaries"  # used as inputs for other steps
tile_dir = data_dir / "for_tiles"

sa_df = read_dataframe(src_dir / "boundaries/SABlueprint2020_Extent.shp")

### Create mask by cutting SA bounds out of world bounds
print("Creating mask...")
world = pg.box(-180, -85, 180, 85)

# boundary has self-intersections and 4 geometries, need to clean up

bnd = pg.union_all(pg.make_valid(sa_df.geometry.values.data))
bnd_geo = pg.union_all(
    pg.make_valid(sa_df.to_crs(GEO_CRS).geometry.values.data))
mask = pg.normalize(pg.difference(world, bnd_geo))

gp.GeoDataFrame(geometry=[bnd],
                crs=DATA_CRS).to_feather(out_dir / "sa_boundary.feather")

write_dataframe(
    gp.GeoDataFrame({"geometry": bnd_geo}, index=[0], crs=GEO_CRS),
    tile_dir / "sa_boundary.geojson",
    driver="GeoJSONSeq",
)

write_dataframe(
    gp.GeoDataFrame({"geometry": mask}, index=[0], crs=GEO_CRS),
Exemple #13
0
def summarize_by_areas(df, state, rank_only=False):
    """Calculate acres by value and area-weighted value for each CHAT field in fields.

    Parameters
    ----------
    df : GeoDataFrame
        area(s) of interest
    state : str, one of ['ok', 'tx']
    rank_only : bool (default False)
        if True, will only calculate areas for CHAT Rank
    Returns
    -------
    DataFrame
        columns for total_acres, analysis_acrs, chat_acres, and avg (bare) and
        _x suffixed fields for each field
    """
    if not df.index.name:
        df.index.name = "index"

    index_name = df.index.name
    df = df.reset_index()

    chat_df = gp.read_feather(chat_dir / f"{state}chat.feather")
    fields = ["chatrank"]

    if not rank_only:
        fields += [e["id"] for e in INPUTS[f"{state}chat"]["indicators"]]

    print("Intersecting with CHAT...")
    chat_df = intersection(df, chat_df)
    chat_df["acres"] = pg.area(chat_df.geometry_right.values.data) * M2_ACRES
    chat_df = chat_df.loc[chat_df.acres > 0].copy()

    if not len(chat_df):
        return None

    # total_acres = chat_df.groupby(index_name).geometry.first()
    total_acres = df.loc[df[index_name].isin(chat_df[index_name])].set_index(index_name)
    total_acres["total_acres"] = pg.area(total_acres.geometry.values.data) * M2_ACRES

    results = pd.DataFrame(
        chat_df.groupby(index_name).acres.sum().rename("chat_acres")
    ).join(total_acres[["total_acres"]], how="left")

    # intersect edge units with SE input areas to determine areas outside
    edge_df = explode(
        df.loc[
            df[index_name].isin(
                results.loc[(results.chat_acres < results.total_acres - 1)].index
            )
        ].copy()[[index_name, "geometry"]]
    )

    print("Intersecting with input areas, this may take a while...")
    input_df = gp.read_feather(input_filename).reset_index(drop=True)
    # this is inverted because input_df performs better if prepared (left side)
    # note: we don't do intersection() here because of topology errors

    left = pd.Series(input_df.geometry.values.data, index=input_df.index)
    right = pd.Series(edge_df.geometry.values.data, index=edge_df.index)
    intersects = sjoin_geometry(left, right, predicate="intersects")

    tmp = input_df.loc[intersects.index.unique()]

    # have to make valid first or fails with topology errors
    tmp.geometry = pg.make_valid(tmp.geometry.values.data)

    # clip to general area, otherwise intersection takes a way long time
    clip_box = pg.box(*pg.total_bounds(edge_df.geometry.values.data))
    tmp.geometry = pg.intersection(tmp.geometry.values.data, clip_box)

    tmp = tmp.join(intersects, how="inner").join(
        edge_df, on="index_right", rsuffix="_right"
    )

    tmp.geometry_right = pg.intersection(
        tmp.geometry.values.data, tmp.geometry_right.values.data
    )

    tmp["acres"] = pg.area(tmp.geometry_right.values.data) * M2_ACRES
    analysis_acres = (
        tmp.groupby(index_name)
        .acres.sum()
        .round(ACRES_PRECISION)
        .rename("analysis_acres")
    )

    # join analysis acres back to results
    results = results.join(analysis_acres)
    results.loc[results.analysis_acres.isnull(), "analysis_acres"] = results.total_acres

    area_results = dict()
    avg_results = dict()
    for field in fields:
        # Note: values are categorical, so this will add 0 area values for each category
        grouped = (
            chat_df.groupby([index_name, field])
            .acres.sum()
            .fillna(0)
            .round(ACRES_PRECISION)
            .reset_index()
        )
        # create an array of [<acres for value 0>, <acres for value 1>,... ]
        area_results[field] = grouped.groupby(index_name).acres.apply(np.array)

        # exclude nodata to calculate area-weighted average
        values = grouped.loc[grouped[field] > 0].set_index(index_name)
        total_acres = values.groupby(level=0).acres.sum().rename("total")
        values = values.join(total_acres)
        values["wtd_value"] = (values.acres / values.total) * values[field].astype(
            "uint8"
        )
        avg_results[field] = values.groupby(level=0).wtd_value.sum().round(1)

    area_results = pd.DataFrame(area_results)
    avg_results = pd.DataFrame(avg_results)

    results = results.join(avg_results).fillna(0)

    for field in fields:
        # convert areas array to columns
        s = area_results[field].apply(pd.Series)
        s.columns = [f"{field}_{c}" for c in s.columns]

        # drop any that are all 0; these are not present
        s = s.drop(columns=s.columns[s.max() == 0].tolist())
        results = results.join(s)

    return results
                        message=".*initial implementation of Parquet.*")

from analysis.constants import DATA_CRS, GEO_CRS, M2_ACRES

src_dir = Path("source_data")
data_dir = Path("data")
analysis_dir = data_dir / "inputs/summary_units"
bnd_dir = data_dir / "boundaries"  # GPKGs output for reference
tile_dir = data_dir / "for_tiles"

### Extract the boundary

sa_df = read_dataframe(src_dir /
                       "boundaries/SABlueprint2020_Extent.shp")[["geometry"]]
# boundary has self-intersections and 4 geometries, need to clean up
bnd = pg.union_all(pg.make_valid(sa_df.geometry.values.data))

### Extract HUC12 within boundary
print("Reading source HUC12s...")
merged = None
for huc2 in [2, 3, 5, 6]:
    df = read_dataframe(
        src_dir / f"summary_units/WBD_0{huc2}_HU2_GDB/WBD_0{huc2}_HU2_GDB.gdb",
        layer="WBDHU12",
    )[["huc12", "name", "geometry"]].rename(columns={"huc12": "id"})

    if merged is None:
        merged = df

    else:
        merged = merged.append(df, ignore_index=True)
Exemple #15
0
def test_make_valid_none():
    actual = pygeos.make_valid(None)
    assert actual is None
Exemple #16
0
def test_make_valid(geom, expected):
    actual = pygeos.make_valid(geom)
    assert actual is not expected
    # normalize needed to handle variation in output across GEOS versions
    assert pygeos.normalize(actual) == expected
Exemple #17
0
    ### Process waterbodies
    # only keep that intersect flowlines
    print(f"Extracted {len(waterbodies):,} NWI lakes and ponds")
    left, right = tree.query_bulk(waterbodies.geometry.values.data,
                                  predicate="intersects")
    waterbodies = waterbodies.iloc[np.unique(left)].reset_index(drop=True)
    print(f"Kept {len(waterbodies):,} that intersect flowlines")

    # TODO: explode, repair, dissolve, explode, reset index
    waterbodies = explode(waterbodies)
    # make valid
    ix = ~pg.is_valid(waterbodies.geometry.values.data)
    if ix.sum():
        print(f"Repairing {ix.sum():,} invalid waterbodies")
        waterbodies.loc[ix, "geometry"] = pg.make_valid(
            waterbodies.loc[ix].geometry.values.data)

    # note: nwi_code, nwi_type are discarded here since they aren't used later
    print("Dissolving adjacent waterbodies")
    waterbodies = dissolve(waterbodies, by=["altered"])
    waterbodies = explode(waterbodies).reset_index(drop=True)

    waterbodies["km2"] = pg.area(waterbodies.geometry.values.data) / 1e6

    waterbodies.to_feather(huc2_dir / "waterbodies.feather")
    write_dataframe(waterbodies, huc2_dir / "waterbodies.gpkg")

    ### Process riverine
    print(f"Extracted {len(rivers):,} NWI altered river polygons")
    left, right = tree.query_bulk(rivers.geometry.values.data,
                                  predicate="intersects")
def convert_to_polygon(input_array,
                       trim_invalid_geometry=False,
                       autocorrect_invalid_geometry=False):
    r"""Convert an input array to a Polygon array.

    Args:
        input_array (ndarray, list): A ndarray of Polygons optionally followed by a confidence value and/or a label
            where each row is: ``[[[outer_ring], [inner_rings]], (confidence), (label)]``
        trim_invalid_geometry (bool): Optional, default to ``False``. If set to ``True`` conversion will ignore invalid
            geometries and leave them out of ``output_array``. This means that the function will return an array where
            ``output_array.shape[0] <= input_array.shape[0]``.  If set to ``False``, an invalid geometry will raise an
            :exc:`~playground_metrics.utils.geometry_utils.InvalidGeometryError`.
        autocorrect_invalid_geometry (Bool): Optional, default to ``False``. Whether to attempt correcting a faulty
            geometry to form a valid one. If set to ``True`` and the autocorrect attempt is unsuccessful, it falls back
            to the behaviour defined in ``trim_invalid_geometry``.

    Note:
        * Polygon auto-correction only corrects self-crossing exterior rings, in which case it creates one Polygon
          out of every simple ring which might be extracted from the original Polygon exterior.
        * Polygon auto-correction will systematically fail on Polygons with at least one inner ring.

    Returns:
        ndarray: A Polygon ndarray where each row contains a geometry followed by optionally confidence and a label
        e.g.: ``[Polygon, (confidence), (label)]``

    Raises:
        ValueError: If ``input_array`` have invalid dimensions.

    """
    input_array = np.array(input_array, dtype=np.dtype('O'))
    if input_array.size == 0:
        return 'undefined', input_array

    if (len(input_array.shape) == 1 or len(input_array.shape) > 2) and \
            (not len(input_array.shape) == 5 and not len(input_array.shape) == 3):
        raise ValueError('Invalid array number of dimensions: '
                         'Expected a 2D array, found {}D.'.format(
                             len(input_array.shape)))

    if len(input_array.shape) == 5 and not input_array.shape[4] == 2:
        raise ValueError('Invalid array fifth dimension: '
                         'Expected 2, found {}.'.format(len(
                             input_array.shape)))

    elif len(input_array.shape) == 3 and not input_array.shape[2] == 1:
        raise ValueError('Invalid array third dimension: '
                         'Expected 1, found {}.'.format(len(
                             input_array.shape)))

    object_array = np.ndarray((input_array.shape[0], input_array.shape[1]),
                              dtype=np.dtype('O'))
    for i, coordinate in enumerate(input_array[:, 0]):
        line = [polygons(np.array(coordinate[0], dtype=np.float64), np.array(coordinate[1:], dtype=np.float64))] \
            if len(coordinate) > 1 else [polygons(np.array(coordinate[0], dtype=np.float64))]
        line.extend(input_array[i, 1:])
        object_array[i] = np.array(line, dtype=np.dtype('O'))

    if autocorrect_invalid_geometry:
        object_array[:, 0] = _clean_multi_geometries(
            make_valid(object_array[:, 0]))

    if trim_invalid_geometry:
        object_array = object_array[is_valid(object_array[:, 0]), :]

    if not np.all(is_type(object_array[:, 0], GeometryType.POLYGON)):
        raise ValueError(
            'Conversion is impossible: Some geometries could not be converted to valid polygons.'
        )

    return object_array
Exemple #19
0
def test_make_valid(geom, expected):
    actual = pygeos.make_valid(geom)
    assert actual is not expected
    assert actual == expected
Exemple #20
0
def test_make_valid_1d(geom, expected):
    actual = pygeos.make_valid(geom)
    assert np.all(actual == expected)
Exemple #21
0
ui_dir = Path("ui/data")

state_filename = data_dir / "boundaries/source/tl_2019_us_state/tl_2019_us_state.shp"
wbd_gdb = data_dir / "nhd/source/wbd/WBD_National_GDB/WBD_National_GDB.gdb"

### Construct region and SARP boundaries from states
print("Processing states...")
state_df = (read_dataframe(
    state_filename,
    columns=["STUSPS", "STATEFP", "NAME"],
).to_crs(CRS).rename(columns={
    "STUSPS": "id",
    "NAME": "State",
    "STATEFP": "STATEFIPS"
}))
state_df.geometry = pg.make_valid(state_df.geometry.values.data)

# save all states for spatial joins
state_df.to_feather(out_dir / "states.feather")

state_df = state_df.loc[state_df.id.isin(STATES.keys())].copy()
state_df.to_feather(out_dir / "region_states.feather")
write_dataframe(
    state_df[["State", "geometry"]].rename(columns={"State": "id"}),
    out_dir / "region_states.gpkg",
)

# dissolve to create outer state boundary for total analysis area and regions
bnd_df = gp.GeoDataFrame(
    [
        {
nhd_dams = dissolve(
    explode(nhd_dams),
    by=["HUC2", "source", "group"],
    agg={
        "GNIS_Name": lambda n: ", ".join({s for s in n if s}),
        # set missing NHD fields as 0
        "FType": lambda n: ", ".join({str(s) for s in n}),
        "FCode": lambda n: ", ".join({str(s) for s in n}),
        "NHDPlusID": lambda n: ", ".join({str(s) for s in n}),
    },
).reset_index(drop=True)

# fill in missing values
nhd_dams.GNIS_Name = nhd_dams.GNIS_Name.fillna("")

nhd_dams.geometry = pg.make_valid(nhd_dams.geometry.values.data)

nhd_dams["damID"] = nhd_dams.index.copy()
nhd_dams.damID = nhd_dams.damID.astype("uint32")

nhd_dams = nhd_dams.set_index("damID")

merged = None
for huc2 in huc2s:
    region_start = time()

    print(f"----- {huc2} ------")

    dams = nhd_dams.loc[nhd_dams.HUC2 == huc2, ["geometry"]].copy()

    print("Reading flowlines...")
Exemple #23
0
    else:
        merged = merged.append(df, ignore_index=True)

print("Projecting to match SE region data...")
huc12 = merged.to_crs(DATA_CRS)


# select out those within the SE states
print("Selecting HUC12s in region...")
tree = pg.STRtree(huc12.geometry.values.data)
ix = tree.query(bnd, predicate="intersects")
huc12 = huc12.iloc[ix].copy().reset_index(drop=True)

# make sure data are valid
huc12["geometry"] = pg.make_valid(huc12.geometry.values.data)

# calculate area
huc12["acres"] = (pg.area(huc12.geometry.values.data) * M2_ACRES).round().astype("uint")

# for those that touch the edge of the region, drop any that are not >= 50% in
# raster input area.  We are not able to use polygon intersection because it
# takes too long.
tree = pg.STRtree(huc12.geometry.values.data)
ix = tree.query(bnd, predicate="contains")

edge_df = huc12.loc[~huc12.id.isin(huc12.iloc[ix].id)].copy()
geometries = pd.Series(edge_df.geometry.values.data, index=edge_df.id)
drop_ids = []
for id, geometry in Bar(
    "Calculating HUC12 overlap with input area", max=len(geometries)