def test_from_wkb(): # list L_wkb = [p.wkb for p in points_no_missing] res = from_wkb(L_wkb) assert isinstance(res, GeometryArray) assert all(v.equals(t) for v, t in zip(res, points_no_missing)) # array res = from_wkb(np.array(L_wkb, dtype=object)) assert isinstance(res, GeometryArray) assert all(v.equals(t) for v, t in zip(res, points_no_missing)) # missing values # TODO(pygeos) does not support empty strings if compat.USE_PYGEOS: L_wkb.extend([None]) else: L_wkb.extend([b"", None]) res = from_wkb(L_wkb) assert res[-1] is None if not compat.USE_PYGEOS: assert res[-2] is None # single MultiPolygon multi_poly = shapely.geometry.MultiPolygon( [shapely.geometry.box(0, 0, 1, 1), shapely.geometry.box(3, 3, 4, 4)] ) res = from_wkb([multi_poly.wkb]) assert res[0] == multi_poly
def test_from_wkb(): # list L_wkb = [p.wkb for p in points_no_missing] res = from_wkb(L_wkb) assert isinstance(res, GeometryArray) assert all(v.equals(t) for v, t in zip(res, points_no_missing)) # array res = from_wkb(np.array(L_wkb, dtype=object)) assert isinstance(res, GeometryArray) assert all(v.equals(t) for v, t in zip(res, points_no_missing)) # missing values # TODO(pygeos) does not support empty strings, np.nan, or pd.NA missing_values = [None] if not compat.USE_PYGEOS: missing_values.extend([b"", np.nan]) if compat.PANDAS_GE_10: missing_values.append(pd.NA) res = from_wkb(missing_values) np.testing.assert_array_equal(res, np.full(len(missing_values), None)) # single MultiPolygon multi_poly = shapely.geometry.MultiPolygon( [shapely.geometry.box(0, 0, 1, 1), shapely.geometry.box(3, 3, 4, 4)] ) res = from_wkb([multi_poly.wkb]) assert res[0] == multi_poly
def test_from_wkb_hex(): geometry_hex = ["0101000000CDCCCCCCCCCC1440CDCCCCCCCC0C4A40"] res = from_wkb(geometry_hex) assert isinstance(res, GeometryArray) # array res = from_wkb(np.array(geometry_hex, dtype=object)) assert isinstance(res, GeometryArray)
def test_from_wkb(): # list L_wkb = [p.wkb for p in points_no_missing] res = from_wkb(L_wkb) assert isinstance(res, GeometryArray) assert all(v.equals(t) for v, t in zip(res, points_no_missing)) # array res = from_wkb(np.array(L_wkb, dtype=object)) assert isinstance(res, GeometryArray) assert all(v.equals(t) for v, t in zip(res, points_no_missing)) # missing values L_wkb.extend([b"", None]) res = from_wkb(L_wkb) assert res[-1] is None assert res[-2] is None
def _arrow_to_geopandas(table): """ Helper function with main, shared logic for read_parquet/read_feather. """ df = table.to_pandas() metadata = table.schema.metadata if b"geo" not in metadata: raise ValueError( """Missing geo metadata in Parquet/Feather file. Use pandas.read_parquet/read_feather() instead.""" ) try: metadata = _decode_metadata(metadata.get(b"geo", b"")) except (TypeError, json.decoder.JSONDecodeError): raise ValueError("Missing or malformed geo metadata in Parquet/Feather file") _validate_metadata(metadata) # Find all geometry columns that were read from the file. May # be a subset if 'columns' parameter is used. geometry_columns = df.columns.intersection(metadata["columns"]) if not len(geometry_columns): raise ValueError( """No geometry columns are included in the columns read from the Parquet/Feather file. To read this file without geometry columns, use pandas.read_parquet/read_feather() instead.""" ) geometry = metadata["primary_column"] # Missing geometry likely indicates a subset of columns was read; # promote the first available geometry to the primary geometry. if len(geometry_columns) and geometry not in geometry_columns: geometry = geometry_columns[0] # if there are multiple non-primary geometry columns, raise a warning if len(geometry_columns) > 1: warnings.warn( "Multiple non-primary geometry columns read from Parquet/Feather " "file. The first column read was promoted to the primary geometry." ) # Convert the WKB columns that are present back to geometry. for col in geometry_columns: df[col] = from_wkb(df[col].values, crs=metadata["columns"][col]["crs"]) return GeoDataFrame(df, geometry=geometry)
def test_from_wkt(string_type): if string_type == "str": f = six.text_type else: if six.PY3: def f(x): return bytes(x, "utf8") else: def f(x): return x # list L_wkt = [f(p.wkt) for p in points_no_missing] res = from_wkt(L_wkt) assert isinstance(res, GeometryArray) tol = 0.5 * 10**(-6) assert all( v.equals_exact(t, tolerance=tol) for v, t in zip(res, points_no_missing)) assert all( v.equals_exact(t, tolerance=tol) for v, t in zip(res, points_no_missing)) # array res = from_wkt(np.array(L_wkt, dtype=object)) assert isinstance(res, GeometryArray) assert all( v.equals_exact(t, tolerance=tol) for v, t in zip(res, points_no_missing)) # missing values # TODO(pygeos) does not support empty strings, np.nan, or pd.NA missing_values = [None] if not compat.USE_PYGEOS: missing_values.extend([f(""), np.nan]) if compat.PANDAS_GE_10: missing_values.append(pd.NA) res = from_wkb(missing_values) np.testing.assert_array_equal(res, np.full(len(missing_values), None)) # single MultiPolygon multi_poly = shapely.geometry.MultiPolygon( [shapely.geometry.box(0, 0, 1, 1), shapely.geometry.box(3, 3, 4, 4)]) res = from_wkt([f(multi_poly.wkt)]) assert res[0] == multi_poly
def test_from_wkb(): # list L_wkb = [p.wkb for p in points_no_missing] res = from_wkb(L_wkb) assert isinstance(res, GeometryArray) assert all(v.equals(t) for v, t in zip(res, points_no_missing)) # array res = from_wkb(np.array(L_wkb, dtype=object)) assert isinstance(res, GeometryArray) assert all(v.equals(t) for v, t in zip(res, points_no_missing)) # missing values L_wkb.extend([b"", None]) res = from_wkb(L_wkb) assert res[-1] is None assert res[-2] is None # single MultiPolygon multi_poly = shapely.geometry.MultiPolygon( [shapely.geometry.box(0, 0, 1, 1), shapely.geometry.box(3, 3, 4, 4)]) res = from_wkb([multi_poly.wkb]) assert res[0] == multi_poly
def from_geofeather(path, columns=None): """Deserialize a geopandas.GeoDataFrame stored in a feather file. This converts the internal WKB representation back into geometry. If the corresponding .crs file is found, it is used to set the CRS of the GeoDataFrame. Note: no index is set on this after deserialization, that is the responsibility of the caller. Parameters ---------- path : str path to feather file to read columns : list-like (optional, default: None) Subset of columns to read from the file, must include 'geometry'. If not provided, all columns are read. Returns ------- geopandas.GeoDataFrame """ if columns is not None and "geometry" not in columns: raise ValueError( "'geometry' must be included in list of columns to read from feather file" ) # shim to support files created with geofeather 0.1.0 if columns is not None and "wkb" not in columns: columns.append("wkb") df, crs = _from_geofeather(path, columns=columns) # shim to support files created with geofeather 0.1.0 df = df.rename(columns={"wkb": "geometry"}) # decode the WKB geometry back to shapely objects # df.geometry = df.geometry.apply(lambda wkb: loads(wkb)) df.geometry = from_wkb(df.geometry, crs=crs) return GeoDataFrame(df, geometry="geometry")
def test_from_wkb(self): L_wkb = [p.wkb for p in self.geoms] arr = from_wkb(L_wkb, crs=27700) assert arr.crs == self.osgb
def read_dataframe( path, layer=None, encoding=None, columns=None, read_geometry=True, force_2d=False, skip_features=0, max_features=None, where=None, bbox=None, ): """Read from an OGR data source to a GeoPandas GeoDataFrame or Pandas DataFrame. If the data source does not have a geometry column or `read_geometry` is False, a DataFrame will be returned. Requires geopandas >= 0.8. Parameters ---------- path : str path to file layer : int or str, optional (default: first layer) If an integer is provided, it corresponds to the index of the layer with the data source. If a string is provided, it must match the name of the layer in the data source. Defaults to first layer in data source. encoding : str, optional (default: None) If present, will be used as the encoding for reading string values from the data source, unless encoding can be inferred directly from the data source. columns : list-like, optional (default: all columns) List of column names to import from the data source. Column names must exactly match the names in the data source, and will be returned in the order they occur in the data source. To avoid reading any columns, pass an empty list-like. read_geometry : bool, optional (default: True) If True, will read geometry into a GeoSeries. If False, a Pandas DataFrame will be returned instead. force_2d : bool, optional (default: False) If the geometry has Z values, setting this to True will cause those to be ignored and 2D geometries to be returned skip_features : int, optional (default: 0) Number of features to skip from the beginning of the file before returning features. Must be less than the total number of features in the file. max_features : int, optional (default: None) Number of features to read from the file. Must be less than the total number of features in the file minus skip_features (if used). where : str, optional (default: None) Where clause to filter features in layer by attribute values. Uses a restricted form of SQL WHERE clause, defined here: http://ogdi.sourceforge.net/prop/6.2.CapabilitiesMetadata.html Examples: "ISO_A3 = 'CAN'", "POP_EST > 10000000 AND POP_EST < 100000000" bbox : tuple of (xmin, ymin, xmax, ymax) (default: None) If present, will be used to filter records whose geometry intersects this box. This must be in the same CRS as the dataset. Returns ------- GeoDataFrame or DataFrame (if no geometry is present) """ try: with GDALEnv(): import pandas as pd import geopandas as gp from geopandas.array import from_wkb except ImportError: raise ImportError("geopandas is required to use pyogrio.read_dataframe()") path = str(path) if not "://" in path: if not "/vsizip" in path.lower() and not os.path.exists(path): raise ValueError(f"'{path}' does not exist") meta, geometry, field_data = read( path, layer=layer, encoding=encoding, columns=columns, read_geometry=read_geometry, force_2d=force_2d, skip_features=skip_features, max_features=max_features, where=where, bbox=bbox, ) columns = meta["fields"].tolist() data = {columns[i]: field_data[i] for i in range(len(columns))} df = pd.DataFrame(data, columns=columns) if geometry is None or not read_geometry: return df geometry = from_wkb(geometry, crs=meta["crs"]) return gp.GeoDataFrame(df, geometry=geometry)
def _read_parquet(path, columns=None, **kwargs): """ Load a Parquet object from the file path, returning a GeoDataFrame. You can read a subset of columns in the file using the ``columns`` parameter. However, the structure of the returned GeoDataFrame will depend on which columns you read: * if no geometry columns are read, this will raise a ``ValueError`` - you should use the pandas `read_parquet` method instead. * if the primary geometry column saved to this file is not included in columns, the first available geometry column will be set as the geometry column of the returned GeoDataFrame. Requires 'pyarrow'. .. versionadded:: 0.8 Parameters ---------- path : str, path object columns : list-like of strings, default=None If not None, only these columns will be read from the file. If the primary geometry column is not included, the first secondary geometry read from the file will be set as the geometry column of the returned GeoDataFrame. If no geometry columns are present, a ``ValueError`` will be raised. **kwargs Any additional kwargs passed to pyarrow.parquet.read_table(). Returns ------- GeoDataFrame """ import_optional_dependency( "pyarrow", extra="pyarrow is required for Parquet support.") from pyarrow import parquet kwargs["use_pandas_metadata"] = True table = parquet.read_table(path, columns=columns, **kwargs) df = table.to_pandas() metadata = table.schema.metadata if b"geo" not in metadata: raise ValueError("""Missing geo metadata in Parquet file. Use pandas.read_parquet() instead.""") try: metadata = _decode_metadata(metadata.get(b"geo", b"")) except (TypeError, json.decoder.JSONDecodeError): raise ValueError("Missing or malformed geo metadata in Parquet file") _validate_metadata(metadata) # Find all geometry columns that were read from the file. May # be a subset if 'columns' parameter is used. geometry_columns = df.columns.intersection(metadata["columns"]) if not len(geometry_columns): raise ValueError( """No geometry columns are included in the columns read from the Parquet file. To read this file without geometry columns, use pandas.read_parquet() instead.""") geometry = metadata["primary_column"] # Missing geometry likely indicates a subset of columns was read; # promote the first available geometry to the primary geometry. if len(geometry_columns) and geometry not in geometry_columns: geometry = geometry_columns[0] # if there are multiple non-primary geometry columns, raise a warning if len(geometry_columns) > 1: warnings.warn( "Multiple non-primary geometry columns read from Parquet file. " "The first column read was promoted to the primary geometry.") # Convert the WKB columns that are present back to geometry. for col in geometry_columns: df[col] = from_wkb(df[col].values, crs=metadata["columns"][col]["crs"]) return GeoDataFrame(df, geometry=geometry)