def test_read_where(naturalearth_lowres): # empty filter should return full set of records df = read_dataframe(naturalearth_lowres, where="") assert len(df) == 177 # should return singular item df = read_dataframe(naturalearth_lowres, where="iso_a3 = 'CAN'") assert len(df) == 1 assert df.iloc[0].iso_a3 == "CAN" df = read_dataframe(naturalearth_lowres, where="iso_a3 IN ('CAN', 'USA', 'MEX')") assert len(df) == 3 assert len(set(df.iso_a3.unique()).difference(["CAN", "USA", "MEX"])) == 0 # should return items within range df = read_dataframe(naturalearth_lowres, where="POP_EST >= 10000000 AND POP_EST < 100000000") assert len(df) == 75 assert df.pop_est.min() >= 10000000 assert df.pop_est.max() < 100000000 # should match no items df = read_dataframe(naturalearth_lowres, where="ISO_A3 = 'INVALID'") assert len(df) == 0
def test_read_bbox(naturalearth_lowres): # should return no features with pytest.warns(UserWarning, match="does not have any features to read"): df = read_dataframe(naturalearth_lowres, bbox=(0, 0, 0.00001, 0.00001)) assert len(df) == 0 df = read_dataframe(naturalearth_lowres, bbox=(-140, 20, -100, 40)) assert len(df) == 2 assert np.array_equal(df.iso_a3, ["USA", "MEX"])
def test_read_force_2d(test_fgdb_vsi): with pytest.warns( UserWarning, match=r"Measured \(M\) geometry types are not supported"): df = read_dataframe(test_fgdb_vsi, layer="test_lines", max_features=1) assert df.iloc[0].geometry.has_z df = read_dataframe(test_fgdb_vsi, layer="test_lines", force_2d=True, max_features=1) assert not df.iloc[0].geometry.has_z
def test_read_layer(test_fgdb_vsi): layers = list_layers(test_fgdb_vsi) # The first layer is read by default (NOTE: first layer has no features) df = read_dataframe(test_fgdb_vsi, read_geometry=False, max_features=1) df2 = read_dataframe(test_fgdb_vsi, layer=layers[0][0], read_geometry=False, max_features=1) assert_frame_equal(df, df2) # Reading a specific layer should return that layer. # Detected here by a known column. df = read_dataframe(test_fgdb_vsi, layer="test_lines", read_geometry=False, max_features=1) assert "RIVER_MILE" in df.columns
def test_write_dataframe_benchmark_nhd_shp(tmpdir, nhd_hr, benchmark): layer = "NHDFlowline" df = read_dataframe(nhd_hr, layer=layer) # Datetime not currently supported df = df.drop(columns="FDate") filename = os.path.join(str(tmpdir), "test.shp") benchmark(write_dataframe, df, filename, layer=layer, driver="ESRI Shapefile")
def test_write_dataframe(tmpdir, naturalearth_lowres, driver, ext): expected = read_dataframe(naturalearth_lowres) filename = os.path.join(str(tmpdir), f"test.{ext}") write_dataframe(expected, filename, driver=driver) assert os.path.exists(filename) df = read_dataframe(filename) if driver != "GeoJSONSeq": # GeoJSONSeq driver I/O reorders features and / or vertices, and does # not support roundtrip comparison # Coordinates are not precisely equal when written to JSON # dtypes do not necessarily round-trip precisely through JSON is_json = driver == "GeoJSON" assert_geodataframe_equal(df, expected, check_less_precise=is_json, check_dtype=not is_json)
def test_read_dataframe(naturalearth_lowres): df = read_dataframe(naturalearth_lowres) assert isinstance(df, gp.GeoDataFrame) assert df.crs == "EPSG:4326" assert len(df) == 177 assert df.columns.tolist() == [ "pop_est", "continent", "name", "iso_a3", "gdp_md_est", "geometry", ] assert df.geometry.iloc[0].type == "MultiPolygon"
def test_read_no_geometry(naturalearth_lowres): df = read_dataframe(naturalearth_lowres, read_geometry=False) assert isinstance(df, pd.DataFrame) assert not isinstance(df, gp.GeoDataFrame)
def test_read_dataframe_vsi(naturalearth_lowres_vsi): df = read_dataframe(naturalearth_lowres_vsi) assert len(df) == 177
def test_write_dataframe_benchmark_lowres_gpkg(tmpdir, naturalearth_lowres, benchmark): df = read_dataframe(naturalearth_lowres) filename = os.path.join(str(tmpdir), "test.gpkg") benchmark(write_dataframe, df, filename, driver="GPKG")
def test_write_dataframe_benchmark_lowres_shp(tmpdir, naturalearth_lowres, benchmark): df = read_dataframe(naturalearth_lowres) filename = os.path.join(str(tmpdir), "test.shp") benchmark(write_dataframe, df, filename, driver="ESRI Shapefile")
def test_read_null_values(test_fgdb_vsi): df = read_dataframe(test_fgdb_vsi, read_geometry=False) # make sure that Null values are preserved assert df.SEGMENT_NAME.isnull().max() == True assert df.loc[df.SEGMENT_NAME.isnull()].SEGMENT_NAME.iloc[0] == None
import rasterio from rasterio.features import rasterize from rasterio.enums import Resampling from rasterio.vrt import WarpedVRT import numpy as np from pyogrio.geopandas import read_dataframe from analysis.lib.pygeos_util import to_dict_all src_dir = Path("source_data") out_dir = Path("data/inputs") ### Rasterize and merge hubs and corridors print("Processing hubs & corridors") corridors_dir = src_dir / "corridors" inland_hubs = read_dataframe(corridors_dir / "TerrestrialHubs.shp") marine_hubs = read_dataframe(corridors_dir / "MarineHubs.shp") # The rasters have the same footprint, but inland is at 30m and marine is at 200m with rasterio.open(corridors_dir / "TerrestrialCorridors.tif") as inland, rasterio.open( corridors_dir / "MarineCorridors.tif") as marine: print("Rasterizing hubs...") # rasterize hubs to match inland inland_hubs_data = rasterize( to_dict_all(inland_hubs.geometry.values.data), inland.shape, transform=inland.transform, dtype="uint8", ) marine_hubs_data = rasterize(
message=".*initial implementation of Parquet.*") src_dir = Path("source_data") data_dir = Path("data") out_dir = data_dir / "inputs/boundaries" # used as inputs for other steps tile_dir = data_dir / "for_tiles" if not out_dir.exists(): os.makedirs(out_dir) if not tile_dir.exists(): os.makedirs(tile_dir) ### Extract the boundary bnd_df = read_dataframe( src_dir / "blueprint/SE_Blueprint_2021_Vectors.gdb", layer="SECAS_Boundary_2021_20211117", )[["geometry"]] # boundary has multiple geometries, union together and cleanup bnd_df = gp.GeoDataFrame( geometry=[pg.union_all(pg.make_valid(bnd_df.geometry.values.data))], index=[0], crs=bnd_df.crs, ) bnd_df.to_feather(out_dir / "se_boundary.feather") write_dataframe(bnd_df, data_dir / "boundaries/se_boundary.fgb") # create GeoJSON for tiling bnd_geo = bnd_df.to_crs(GEO_CRS) write_dataframe(bnd_geo, tile_dir / "se_boundary.geojson", driver="GeoJSONSeq") ### Create mask by cutting SA bounds out of world bounds
# suppress warnings abuot writing to feather import warnings warnings.filterwarnings("ignore", message=".*initial implementation of Parquet.*") from analysis.constants import GEO_CRS, DATA_CRS from analysis.lib.pygeos_util import explode src_dir = Path("source_data") data_dir = Path("data") out_dir = data_dir / "inputs/boundaries" # used as inputs for other steps tile_dir = data_dir / "for_tiles" sa_df = read_dataframe(src_dir / "boundaries/SABlueprint2020_Extent.shp") ### Create mask by cutting SA bounds out of world bounds print("Creating mask...") world = pg.box(-180, -85, 180, 85) # boundary has self-intersections and 4 geometries, need to clean up bnd = pg.union_all(pg.make_valid(sa_df.geometry.values.data)) bnd_geo = pg.union_all( pg.make_valid(sa_df.to_crs(GEO_CRS).geometry.values.data)) mask = pg.normalize(pg.difference(world, bnd_geo)) gp.GeoDataFrame(geometry=[bnd], crs=DATA_CRS).to_feather(out_dir / "sa_boundary.feather")
# aoi_names = ["Fort_Mill_townlimits"] # aoi_names = ["Enviva_Hamlet_80_mile_sourcing_radius"] # aoi_names = ["Razor", "Groton_all"] # aoi_names = ["ACF_area"] # aoi_names = ["NC"] # aoi_names = ["SA_boundary"] for aoi_name in aoi_names: print(f"Making maps for {aoi_name}...") ### Write maps for an aoi out_dir = Path("/tmp/aoi") / aoi_name / "maps" if not out_dir.exists(): os.makedirs(out_dir) df = read_dataframe(f"examples/{aoi_name}.shp") geometry = pg.make_valid(df.geometry.values.data) # dissolve geometry = np.asarray([pg.union_all(geometry)]) print("Calculating results...") results = CustomArea(geometry, df.crs, name="Test").get_results() # FIXME: # results = {"indicators": []} ### Convert to WGS84 for mapping geometry = to_crs(geometry, df.crs, GEO_CRS) bounds = pg.total_bounds(geometry) # only include urban up to 2060
import warnings warnings.filterwarnings("ignore", message=".*initial implementation of Parquet.*") from analysis.constants import DATA_CRS, GEO_CRS, M2_ACRES src_dir = Path("source_data") data_dir = Path("data") analysis_dir = data_dir / "inputs/summary_units" bnd_dir = data_dir / "boundaries" # GPKGs output for reference tile_dir = data_dir / "for_tiles" ### Extract the boundary sa_df = read_dataframe(src_dir / "boundaries/SABlueprint2020_Extent.shp")[["geometry"]] # boundary has self-intersections and 4 geometries, need to clean up bnd = pg.union_all(pg.make_valid(sa_df.geometry.values.data)) ### Extract HUC12 within boundary print("Reading source HUC12s...") merged = None for huc2 in [2, 3, 5, 6]: df = read_dataframe( src_dir / f"summary_units/WBD_0{huc2}_HU2_GDB/WBD_0{huc2}_HU2_GDB.gdb", layer="WBDHU12", )[["huc12", "name", "geometry"]].rename(columns={"huc12": "id"}) if merged is None: merged = df
def test_write_dataframe_benchmark_lowres_geojsonseq(tmpdir, naturalearth_lowres, benchmark): df = read_dataframe(naturalearth_lowres) filename = os.path.join(str(tmpdir), "test.json") benchmark(write_dataframe, df, filename, driver="GeoJSONSeq")
def test_read_datetime(test_fgdb_vsi): df = read_dataframe(test_fgdb_vsi, layer="test_lines", max_features=1) assert df.SURVEY_DAT.dtype.name == "datetime64[ns]"
def test_read_where_invalid(naturalearth_lowres): with pytest.raises(ValueError, match="Invalid SQL"): read_dataframe(naturalearth_lowres, where="invalid")
# {"name": "Area near Magnet, TX", "path": "magnet"}, # {"name": "TriState area at junction of MO, OK, KS", "path": "TriState"}, # {"name": "Quincy, FL area", "path": "Quincy"}, # {"name": "Doyle Springs, TN area", "path": "DoyleSprings"}, # {"name": "Cave Spring, VA area", "path": "CaveSpring"}, # {"name": "South Atlantic Offshore", "path": "SAOffshore"}, # {"name": "Florida Offshore", "path": "FLOffshore"} ] for aoi in aois: name = aoi["name"] path = aoi["path"] print(f"Creating report for {name}...") start = time() df = read_dataframe(f"examples/{path}.shp", columns=[]) geometry = pg.make_valid(df.geometry.values.data) # dissolve geometry = np.asarray([pg.union_all(geometry)]) extent_area = ( pg.area(pg.box(*pg.total_bounds(to_crs(geometry, df.crs, DATA_CRS)))) * M2_ACRES) print("Area of extent", extent_area.round()) ### calculate results, data must be in DATA_CRS print("Calculating results...") results = CustomArea(geometry, df.crs, name=name).get_results() if results is None:
def test_read_bbox_invalid(naturalearth_lowres, bbox): with pytest.raises(ValueError, match="Invalid bbox"): read_dataframe(naturalearth_lowres, bbox=bbox)