Python read_dataframe Examples, pyogrio.geopandas.read_dataframe Python Examples

Example #1

0

Show file

def test_read_where(naturalearth_lowres):
    # empty filter should return full set of records
    df = read_dataframe(naturalearth_lowres, where="")
    assert len(df) == 177

    # should return singular item
    df = read_dataframe(naturalearth_lowres, where="iso_a3 = 'CAN'")
    assert len(df) == 1
    assert df.iloc[0].iso_a3 == "CAN"

    df = read_dataframe(naturalearth_lowres,
                        where="iso_a3 IN ('CAN', 'USA', 'MEX')")
    assert len(df) == 3
    assert len(set(df.iso_a3.unique()).difference(["CAN", "USA", "MEX"])) == 0

    # should return items within range
    df = read_dataframe(naturalearth_lowres,
                        where="POP_EST >= 10000000 AND POP_EST < 100000000")
    assert len(df) == 75
    assert df.pop_est.min() >= 10000000
    assert df.pop_est.max() < 100000000

    # should match no items
    df = read_dataframe(naturalearth_lowres, where="ISO_A3 = 'INVALID'")
    assert len(df) == 0

Example #2

0

Show file

def test_read_bbox(naturalearth_lowres):
    # should return no features
    with pytest.warns(UserWarning, match="does not have any features to read"):
        df = read_dataframe(naturalearth_lowres, bbox=(0, 0, 0.00001, 0.00001))
        assert len(df) == 0

    df = read_dataframe(naturalearth_lowres, bbox=(-140, 20, -100, 40))
    assert len(df) == 2
    assert np.array_equal(df.iso_a3, ["USA", "MEX"])

Example #3

0

Show file

def test_read_force_2d(test_fgdb_vsi):
    with pytest.warns(
            UserWarning,
            match=r"Measured \(M\) geometry types are not supported"):
        df = read_dataframe(test_fgdb_vsi, layer="test_lines", max_features=1)
        assert df.iloc[0].geometry.has_z

        df = read_dataframe(test_fgdb_vsi,
                            layer="test_lines",
                            force_2d=True,
                            max_features=1)
        assert not df.iloc[0].geometry.has_z

Example #4

0

Show file

def test_read_layer(test_fgdb_vsi):
    layers = list_layers(test_fgdb_vsi)
    # The first layer is read by default (NOTE: first layer has no features)
    df = read_dataframe(test_fgdb_vsi, read_geometry=False, max_features=1)
    df2 = read_dataframe(test_fgdb_vsi,
                         layer=layers[0][0],
                         read_geometry=False,
                         max_features=1)
    assert_frame_equal(df, df2)

    # Reading a specific layer should return that layer.
    # Detected here by a known column.
    df = read_dataframe(test_fgdb_vsi,
                        layer="test_lines",
                        read_geometry=False,
                        max_features=1)
    assert "RIVER_MILE" in df.columns

Example #5

0

Show file

File: test_io_benchmarks_geopandas.py Project: srenoes/pyogrio

def test_write_dataframe_benchmark_nhd_shp(tmpdir, nhd_hr, benchmark):
    layer = "NHDFlowline"
    df = read_dataframe(nhd_hr, layer=layer)

    # Datetime not currently supported
    df = df.drop(columns="FDate")

    filename = os.path.join(str(tmpdir), "test.shp")
    benchmark(write_dataframe,
              df,
              filename,
              layer=layer,
              driver="ESRI Shapefile")

Example #6

0

Show file

def test_write_dataframe(tmpdir, naturalearth_lowres, driver, ext):
    expected = read_dataframe(naturalearth_lowres)

    filename = os.path.join(str(tmpdir), f"test.{ext}")
    write_dataframe(expected, filename, driver=driver)

    assert os.path.exists(filename)

    df = read_dataframe(filename)

    if driver != "GeoJSONSeq":
        # GeoJSONSeq driver I/O reorders features and / or vertices, and does
        # not support roundtrip comparison

        # Coordinates are not precisely equal when written to JSON
        # dtypes do not necessarily round-trip precisely through JSON
        is_json = driver == "GeoJSON"

        assert_geodataframe_equal(df,
                                  expected,
                                  check_less_precise=is_json,
                                  check_dtype=not is_json)

Example #7

0

Show file

def test_read_dataframe(naturalearth_lowres):
    df = read_dataframe(naturalearth_lowres)

    assert isinstance(df, gp.GeoDataFrame)

    assert df.crs == "EPSG:4326"
    assert len(df) == 177
    assert df.columns.tolist() == [
        "pop_est",
        "continent",
        "name",
        "iso_a3",
        "gdp_md_est",
        "geometry",
    ]

    assert df.geometry.iloc[0].type == "MultiPolygon"

Example #8

0

Show file

def test_read_no_geometry(naturalearth_lowres):
    df = read_dataframe(naturalearth_lowres, read_geometry=False)
    assert isinstance(df, pd.DataFrame)
    assert not isinstance(df, gp.GeoDataFrame)

Example #9

0

Show file

def test_read_dataframe_vsi(naturalearth_lowres_vsi):
    df = read_dataframe(naturalearth_lowres_vsi)
    assert len(df) == 177

Example #10

0

Show file

File: test_io_benchmarks_geopandas.py Project: srenoes/pyogrio

def test_write_dataframe_benchmark_lowres_gpkg(tmpdir, naturalearth_lowres,
                                               benchmark):
    df = read_dataframe(naturalearth_lowres)
    filename = os.path.join(str(tmpdir), "test.gpkg")
    benchmark(write_dataframe, df, filename, driver="GPKG")

Example #11

0

Show file

File: test_io_benchmarks_geopandas.py Project: srenoes/pyogrio

def test_write_dataframe_benchmark_lowres_shp(tmpdir, naturalearth_lowres,
                                              benchmark):
    df = read_dataframe(naturalearth_lowres)
    filename = os.path.join(str(tmpdir), "test.shp")
    benchmark(write_dataframe, df, filename, driver="ESRI Shapefile")

Example #12

0

Show file

def test_read_null_values(test_fgdb_vsi):
    df = read_dataframe(test_fgdb_vsi, read_geometry=False)

    # make sure that Null values are preserved
    assert df.SEGMENT_NAME.isnull().max() == True
    assert df.loc[df.SEGMENT_NAME.isnull()].SEGMENT_NAME.iloc[0] == None

Example #13

0

Show file

File: prepare_hubs_corridors.py Project: astutespruce/sa-blueprint-sv

import rasterio
from rasterio.features import rasterize
from rasterio.enums import Resampling
from rasterio.vrt import WarpedVRT
import numpy as np
from pyogrio.geopandas import read_dataframe

from analysis.lib.pygeos_util import to_dict_all

src_dir = Path("source_data")
out_dir = Path("data/inputs")

### Rasterize and merge hubs and corridors
print("Processing hubs & corridors")
corridors_dir = src_dir / "corridors"
inland_hubs = read_dataframe(corridors_dir / "TerrestrialHubs.shp")
marine_hubs = read_dataframe(corridors_dir / "MarineHubs.shp")

# The rasters have the same footprint, but inland is at 30m and marine is at 200m
with rasterio.open(corridors_dir /
                   "TerrestrialCorridors.tif") as inland, rasterio.open(
                       corridors_dir / "MarineCorridors.tif") as marine:
    print("Rasterizing hubs...")
    # rasterize hubs to match inland
    inland_hubs_data = rasterize(
        to_dict_all(inland_hubs.geometry.values.data),
        inland.shape,
        transform=inland.transform,
        dtype="uint8",
    )
    marine_hubs_data = rasterize(

Example #14

0

Show file

                        message=".*initial implementation of Parquet.*")

src_dir = Path("source_data")
data_dir = Path("data")
out_dir = data_dir / "inputs/boundaries"  # used as inputs for other steps
tile_dir = data_dir / "for_tiles"

if not out_dir.exists():
    os.makedirs(out_dir)

if not tile_dir.exists():
    os.makedirs(tile_dir)

### Extract the boundary
bnd_df = read_dataframe(
    src_dir / "blueprint/SE_Blueprint_2021_Vectors.gdb",
    layer="SECAS_Boundary_2021_20211117",
)[["geometry"]]
# boundary has multiple geometries, union together and cleanup
bnd_df = gp.GeoDataFrame(
    geometry=[pg.union_all(pg.make_valid(bnd_df.geometry.values.data))],
    index=[0],
    crs=bnd_df.crs,
)
bnd_df.to_feather(out_dir / "se_boundary.feather")
write_dataframe(bnd_df, data_dir / "boundaries/se_boundary.fgb")

# create GeoJSON for tiling
bnd_geo = bnd_df.to_crs(GEO_CRS)
write_dataframe(bnd_geo, tile_dir / "se_boundary.geojson", driver="GeoJSONSeq")

### Create mask by cutting SA bounds out of world bounds

Example #15

0

Show file

# suppress warnings abuot writing to feather
import warnings

warnings.filterwarnings("ignore",
                        message=".*initial implementation of Parquet.*")

from analysis.constants import GEO_CRS, DATA_CRS
from analysis.lib.pygeos_util import explode

src_dir = Path("source_data")
data_dir = Path("data")
out_dir = data_dir / "inputs/boundaries"  # used as inputs for other steps
tile_dir = data_dir / "for_tiles"

sa_df = read_dataframe(src_dir / "boundaries/SABlueprint2020_Extent.shp")

### Create mask by cutting SA bounds out of world bounds
print("Creating mask...")
world = pg.box(-180, -85, 180, 85)

# boundary has self-intersections and 4 geometries, need to clean up

bnd = pg.union_all(pg.make_valid(sa_df.geometry.values.data))
bnd_geo = pg.union_all(
    pg.make_valid(sa_df.to_crs(GEO_CRS).geometry.values.data))
mask = pg.normalize(pg.difference(world, bnd_geo))

gp.GeoDataFrame(geometry=[bnd],
                crs=DATA_CRS).to_feather(out_dir / "sa_boundary.feather")

Example #16

0

Show file

File: test_maps.py Project: astutespruce/secas-blueprint

# aoi_names = ["Fort_Mill_townlimits"]
# aoi_names = ["Enviva_Hamlet_80_mile_sourcing_radius"]
# aoi_names = ["Razor", "Groton_all"]
# aoi_names = ["ACF_area"]
# aoi_names = ["NC"]
# aoi_names = ["SA_boundary"]

for aoi_name in aoi_names:
    print(f"Making maps for {aoi_name}...")

    ### Write maps for an aoi
    out_dir = Path("/tmp/aoi") / aoi_name / "maps"
    if not out_dir.exists():
        os.makedirs(out_dir)

    df = read_dataframe(f"examples/{aoi_name}.shp")
    geometry = pg.make_valid(df.geometry.values.data)

    # dissolve
    geometry = np.asarray([pg.union_all(geometry)])

    print("Calculating results...")
    results = CustomArea(geometry, df.crs, name="Test").get_results()
    # FIXME:
    # results = {"indicators": []}

    ### Convert to WGS84 for mapping
    geometry = to_crs(geometry, df.crs, GEO_CRS)
    bounds = pg.total_bounds(geometry)

    # only include urban up to 2060

Example #17

0

Show file

File: prepare_summary_units.py Project: astutespruce/sa-blueprint-sv

import warnings

warnings.filterwarnings("ignore",
                        message=".*initial implementation of Parquet.*")

from analysis.constants import DATA_CRS, GEO_CRS, M2_ACRES

src_dir = Path("source_data")
data_dir = Path("data")
analysis_dir = data_dir / "inputs/summary_units"
bnd_dir = data_dir / "boundaries"  # GPKGs output for reference
tile_dir = data_dir / "for_tiles"

### Extract the boundary

sa_df = read_dataframe(src_dir /
                       "boundaries/SABlueprint2020_Extent.shp")[["geometry"]]
# boundary has self-intersections and 4 geometries, need to clean up
bnd = pg.union_all(pg.make_valid(sa_df.geometry.values.data))

### Extract HUC12 within boundary
print("Reading source HUC12s...")
merged = None
for huc2 in [2, 3, 5, 6]:
    df = read_dataframe(
        src_dir / f"summary_units/WBD_0{huc2}_HU2_GDB/WBD_0{huc2}_HU2_GDB.gdb",
        layer="WBDHU12",
    )[["huc12", "name", "geometry"]].rename(columns={"huc12": "id"})

    if merged is None:
        merged = df

Example #18

0

Show file

File: test_io_benchmarks_geopandas.py Project: srenoes/pyogrio

def test_write_dataframe_benchmark_lowres_geojsonseq(tmpdir,
                                                     naturalearth_lowres,
                                                     benchmark):
    df = read_dataframe(naturalearth_lowres)
    filename = os.path.join(str(tmpdir), "test.json")
    benchmark(write_dataframe, df, filename, driver="GeoJSONSeq")

Example #19

0

Show file

def test_read_datetime(test_fgdb_vsi):
    df = read_dataframe(test_fgdb_vsi, layer="test_lines", max_features=1)
    assert df.SURVEY_DAT.dtype.name == "datetime64[ns]"

Example #20

0

Show file

def test_read_where_invalid(naturalearth_lowres):
    with pytest.raises(ValueError, match="Invalid SQL"):
        read_dataframe(naturalearth_lowres, where="invalid")

Example #21

0

Show file

File: test_report.py Project: astutespruce/secas-blueprint

    # {"name": "Area near Magnet, TX", "path": "magnet"},
    # {"name": "TriState area at junction of MO, OK, KS", "path": "TriState"},
    # {"name": "Quincy, FL area", "path": "Quincy"},
    # {"name": "Doyle Springs, TN area", "path": "DoyleSprings"},
    # {"name": "Cave Spring, VA area", "path": "CaveSpring"},
    # {"name": "South Atlantic Offshore", "path": "SAOffshore"},
    # {"name": "Florida Offshore", "path": "FLOffshore"}
]

for aoi in aois:
    name = aoi["name"]
    path = aoi["path"]
    print(f"Creating report for {name}...")

    start = time()
    df = read_dataframe(f"examples/{path}.shp", columns=[])
    geometry = pg.make_valid(df.geometry.values.data)

    # dissolve
    geometry = np.asarray([pg.union_all(geometry)])

    extent_area = (
        pg.area(pg.box(*pg.total_bounds(to_crs(geometry, df.crs, DATA_CRS)))) *
        M2_ACRES)
    print("Area of extent", extent_area.round())

    ### calculate results, data must be in DATA_CRS
    print("Calculating results...")
    results = CustomArea(geometry, df.crs, name=name).get_results()

    if results is None:

Example #22

0

Show file

def test_read_bbox_invalid(naturalearth_lowres, bbox):
    with pytest.raises(ValueError, match="Invalid bbox"):
        read_dataframe(naturalearth_lowres, bbox=bbox)