def test_to_wkb(): P = from_shapely(points_no_missing) res = to_wkb(P) exp = np.array([p.wkb for p in points_no_missing], dtype=object) assert isinstance(res, np.ndarray) np.testing.assert_array_equal(res, exp) # missing values a = from_shapely([None, points_no_missing[0]]) res = to_wkb(a) assert res[0] is None
def test_parquet_invalid_metadata(tmpdir, geo_meta, error): """Has geo metadata with missing required fields will raise a ValueError. This requires writing the parquet file directly below, so that we can control the metadata that is written for this test. """ from pyarrow import parquet, Table test_dataset = "naturalearth_lowres" df = read_file(get_path(test_dataset)) # convert to DataFrame and encode geometry to WKB df = DataFrame(df) df["geometry"] = to_wkb(df["geometry"].values) table = Table.from_pandas(df) metadata = table.schema.metadata metadata.update(geo_meta) table = table.replace_schema_metadata(metadata) filename = os.path.join(str(tmpdir), "test.pq") parquet.write_table(table, filename) with pytest.raises(ValueError, match=error): read_parquet(filename)
def _convert_to_wkb(gdf, geom_name): """Convert geometries to wkb. """ from geopandas.array import from_shapely, to_wkb geom_array = from_shapely(gdf[geom_name]) gdf[geom_name] = to_wkb(geom_array, hex=True) return gdf
def write_dataframe(df, path, layer=None, driver=None, encoding=None, **kwargs): try: with GDALEnv(): import geopandas as gp from geopandas.array import to_wkb # if geopandas is available so is pyproj from pyproj.enums import WktVersion except ImportError: raise ImportError("geopandas is required to use pyogrio.read_dataframe()") path = str(path) if not isinstance(df, gp.GeoDataFrame): raise ValueError("'df' must be a GeoDataFrame") geometry_columns = df.columns[df.dtypes == "geometry"] if len(geometry_columns) == 0: raise ValueError("'df' does not have a geometry column") if len(geometry_columns) > 1: raise ValueError( "'df' must have only one geometry column. " "Multiple geometry columns are not supported for output using OGR." ) geometry_column = geometry_columns[0] geometry = df[geometry_column] fields = [c for c in df.columns if not c == geometry_column] # TODO: may need to fill in pd.NA, etc field_data = [df[f].values for f in fields] # TODO: validate geometry types, not all combinations are valid geometry_type = geometry.type.unique()[0] crs = None if geometry.crs: # TODO: this may need to be WKT1, due to issues # if possible use EPSG codes instead epsg = geometry.crs.to_epsg() if epsg: crs = f"EPSG:{epsg}" else: crs = geometry.crs.to_wkt(WktVersion.WKT1_GDAL) write( path, layer=layer, driver=driver, geometry=to_wkb(geometry.values), field_data=field_data, fields=fields, crs=crs, geometry_type=geometry_type, encoding=encoding, )
def _encode_wkb(df): """Encode all geometry columns in the GeoDataFrame to WKB. Parameters ---------- df : GeoDataFrame Returns ------- DataFrame geometry columns are encoded to WKB """ df = DataFrame(df.copy()) # Encode all geometry columns to WKB for col in df.columns[df.dtypes == "geometry"]: df[col] = to_wkb(df[col].values) return df
def test_parquet_missing_metadata(tmpdir): """Missing geo metadata, such as from a parquet file created from a pandas DataFrame, will raise a ValueError. """ test_dataset = "naturalearth_lowres" df = read_file(get_path(test_dataset)) # convert to DataFrame df = DataFrame(df) # convert the geometry column so we can extract later df["geometry"] = to_wkb(df["geometry"].values) filename = os.path.join(str(tmpdir), "test.pq") # use pandas to_parquet (no geo metadata) df.to_parquet(filename) # missing metadata will raise ValueError with pytest.raises(ValueError, match="Missing geo metadata in Parquet/Feather file."): read_parquet(filename)