コード例 #1
0
ファイル: test_array.py プロジェクト: visr/geopandas
def test_from_wkb():
    # list
    L_wkb = [p.wkb for p in points_no_missing]
    res = from_wkb(L_wkb)
    assert isinstance(res, GeometryArray)
    assert all(v.equals(t) for v, t in zip(res, points_no_missing))

    # array
    res = from_wkb(np.array(L_wkb, dtype=object))
    assert isinstance(res, GeometryArray)
    assert all(v.equals(t) for v, t in zip(res, points_no_missing))

    # missing values
    # TODO(pygeos) does not support empty strings
    if compat.USE_PYGEOS:
        L_wkb.extend([None])
    else:
        L_wkb.extend([b"", None])
    res = from_wkb(L_wkb)
    assert res[-1] is None
    if not compat.USE_PYGEOS:
        assert res[-2] is None

    # single MultiPolygon
    multi_poly = shapely.geometry.MultiPolygon(
        [shapely.geometry.box(0, 0, 1, 1), shapely.geometry.box(3, 3, 4, 4)]
    )
    res = from_wkb([multi_poly.wkb])
    assert res[0] == multi_poly
コード例 #2
0
ファイル: test_array.py プロジェクト: snowman2/geopandas
def test_from_wkb():
    # list
    L_wkb = [p.wkb for p in points_no_missing]
    res = from_wkb(L_wkb)
    assert isinstance(res, GeometryArray)
    assert all(v.equals(t) for v, t in zip(res, points_no_missing))

    # array
    res = from_wkb(np.array(L_wkb, dtype=object))
    assert isinstance(res, GeometryArray)
    assert all(v.equals(t) for v, t in zip(res, points_no_missing))

    # missing values
    # TODO(pygeos) does not support empty strings, np.nan, or pd.NA
    missing_values = [None]
    if not compat.USE_PYGEOS:
        missing_values.extend([b"", np.nan])

        if compat.PANDAS_GE_10:
            missing_values.append(pd.NA)

    res = from_wkb(missing_values)
    np.testing.assert_array_equal(res, np.full(len(missing_values), None))

    # single MultiPolygon
    multi_poly = shapely.geometry.MultiPolygon(
        [shapely.geometry.box(0, 0, 1, 1), shapely.geometry.box(3, 3, 4, 4)]
    )
    res = from_wkb([multi_poly.wkb])
    assert res[0] == multi_poly
コード例 #3
0
ファイル: test_array.py プロジェクト: snowman2/geopandas
def test_from_wkb_hex():
    geometry_hex = ["0101000000CDCCCCCCCCCC1440CDCCCCCCCC0C4A40"]
    res = from_wkb(geometry_hex)
    assert isinstance(res, GeometryArray)

    # array
    res = from_wkb(np.array(geometry_hex, dtype=object))
    assert isinstance(res, GeometryArray)
コード例 #4
0
def test_from_wkb():
    # list
    L_wkb = [p.wkb for p in points_no_missing]
    res = from_wkb(L_wkb)
    assert isinstance(res, GeometryArray)
    assert all(v.equals(t) for v, t in zip(res, points_no_missing))

    # array
    res = from_wkb(np.array(L_wkb, dtype=object))
    assert isinstance(res, GeometryArray)
    assert all(v.equals(t) for v, t in zip(res, points_no_missing))

    # missing values
    L_wkb.extend([b"", None])
    res = from_wkb(L_wkb)
    assert res[-1] is None
    assert res[-2] is None
コード例 #5
0
ファイル: arrow.py プロジェクト: willschlitzer/geopandas
def _arrow_to_geopandas(table):
    """
    Helper function with main, shared logic for read_parquet/read_feather.
    """
    df = table.to_pandas()

    metadata = table.schema.metadata
    if b"geo" not in metadata:
        raise ValueError(
            """Missing geo metadata in Parquet/Feather file.
            Use pandas.read_parquet/read_feather() instead."""
        )

    try:
        metadata = _decode_metadata(metadata.get(b"geo", b""))

    except (TypeError, json.decoder.JSONDecodeError):
        raise ValueError("Missing or malformed geo metadata in Parquet/Feather file")

    _validate_metadata(metadata)

    # Find all geometry columns that were read from the file.  May
    # be a subset if 'columns' parameter is used.
    geometry_columns = df.columns.intersection(metadata["columns"])

    if not len(geometry_columns):
        raise ValueError(
            """No geometry columns are included in the columns read from
            the Parquet/Feather file.  To read this file without geometry columns,
            use pandas.read_parquet/read_feather() instead."""
        )

    geometry = metadata["primary_column"]

    # Missing geometry likely indicates a subset of columns was read;
    # promote the first available geometry to the primary geometry.
    if len(geometry_columns) and geometry not in geometry_columns:
        geometry = geometry_columns[0]

        # if there are multiple non-primary geometry columns, raise a warning
        if len(geometry_columns) > 1:
            warnings.warn(
                "Multiple non-primary geometry columns read from Parquet/Feather "
                "file. The first column read was promoted to the primary geometry."
            )

    # Convert the WKB columns that are present back to geometry.
    for col in geometry_columns:
        df[col] = from_wkb(df[col].values, crs=metadata["columns"][col]["crs"])

    return GeoDataFrame(df, geometry=geometry)
コード例 #6
0
def test_from_wkt(string_type):
    if string_type == "str":
        f = six.text_type
    else:
        if six.PY3:

            def f(x):
                return bytes(x, "utf8")

        else:

            def f(x):
                return x

    # list
    L_wkt = [f(p.wkt) for p in points_no_missing]
    res = from_wkt(L_wkt)
    assert isinstance(res, GeometryArray)
    tol = 0.5 * 10**(-6)
    assert all(
        v.equals_exact(t, tolerance=tol)
        for v, t in zip(res, points_no_missing))
    assert all(
        v.equals_exact(t, tolerance=tol)
        for v, t in zip(res, points_no_missing))

    # array
    res = from_wkt(np.array(L_wkt, dtype=object))
    assert isinstance(res, GeometryArray)
    assert all(
        v.equals_exact(t, tolerance=tol)
        for v, t in zip(res, points_no_missing))

    # missing values
    # TODO(pygeos) does not support empty strings, np.nan, or pd.NA
    missing_values = [None]
    if not compat.USE_PYGEOS:
        missing_values.extend([f(""), np.nan])

        if compat.PANDAS_GE_10:
            missing_values.append(pd.NA)

    res = from_wkb(missing_values)
    np.testing.assert_array_equal(res, np.full(len(missing_values), None))

    # single MultiPolygon
    multi_poly = shapely.geometry.MultiPolygon(
        [shapely.geometry.box(0, 0, 1, 1),
         shapely.geometry.box(3, 3, 4, 4)])
    res = from_wkt([f(multi_poly.wkt)])
    assert res[0] == multi_poly
コード例 #7
0
def test_from_wkb():
    # list
    L_wkb = [p.wkb for p in points_no_missing]
    res = from_wkb(L_wkb)
    assert isinstance(res, GeometryArray)
    assert all(v.equals(t) for v, t in zip(res, points_no_missing))

    # array
    res = from_wkb(np.array(L_wkb, dtype=object))
    assert isinstance(res, GeometryArray)
    assert all(v.equals(t) for v, t in zip(res, points_no_missing))

    # missing values
    L_wkb.extend([b"", None])
    res = from_wkb(L_wkb)
    assert res[-1] is None
    assert res[-2] is None

    # single MultiPolygon
    multi_poly = shapely.geometry.MultiPolygon(
        [shapely.geometry.box(0, 0, 1, 1),
         shapely.geometry.box(3, 3, 4, 4)])
    res = from_wkb([multi_poly.wkb])
    assert res[0] == multi_poly
コード例 #8
0
ファイル: core.py プロジェクト: brendan-ward/geofeather
def from_geofeather(path, columns=None):
    """Deserialize a geopandas.GeoDataFrame stored in a feather file.

    This converts the internal WKB representation back into geometry.

    If the corresponding .crs file is found, it is used to set the CRS of
    the GeoDataFrame.

    Note: no index is set on this after deserialization, that is the responsibility of the caller.

    Parameters
    ----------
    path : str
        path to feather file to read
    columns : list-like (optional, default: None)
        Subset of columns to read from the file, must include 'geometry'.  If not provided,
        all columns are read.

    Returns
    -------
    geopandas.GeoDataFrame
    """

    if columns is not None and "geometry" not in columns:
        raise ValueError(
            "'geometry' must be included in list of columns to read from feather file"
        )

    # shim to support files created with geofeather 0.1.0
    if columns is not None and "wkb" not in columns:
        columns.append("wkb")

    df, crs = _from_geofeather(path, columns=columns)

    # shim to support files created with geofeather 0.1.0
    df = df.rename(columns={"wkb": "geometry"})

    # decode the WKB geometry back to shapely objects
    # df.geometry = df.geometry.apply(lambda wkb: loads(wkb))
    df.geometry = from_wkb(df.geometry, crs=crs)

    return GeoDataFrame(df, geometry="geometry")
コード例 #9
0
 def test_from_wkb(self):
     L_wkb = [p.wkb for p in self.geoms]
     arr = from_wkb(L_wkb, crs=27700)
     assert arr.crs == self.osgb
コード例 #10
0
ファイル: geopandas.py プロジェクト: geopandas/pyogrio
def read_dataframe(
    path,
    layer=None,
    encoding=None,
    columns=None,
    read_geometry=True,
    force_2d=False,
    skip_features=0,
    max_features=None,
    where=None,
    bbox=None,
):
    """Read from an OGR data source to a GeoPandas GeoDataFrame or Pandas DataFrame.
    If the data source does not have a geometry column or `read_geometry` is False,
    a DataFrame will be returned.

    Requires geopandas >= 0.8.

    Parameters
    ----------
    path : str
        path to file
    layer : int or str, optional (default: first layer)
        If an integer is provided, it corresponds to the index of the layer
        with the data source.  If a string is provided, it must match the name
        of the layer in the data source.  Defaults to first layer in data source.
    encoding : str, optional (default: None)
        If present, will be used as the encoding for reading string values from
        the data source, unless encoding can be inferred directly from the data
        source.
    columns : list-like, optional (default: all columns)
        List of column names to import from the data source.  Column names must
        exactly match the names in the data source, and will be returned in
        the order they occur in the data source.  To avoid reading any columns,
        pass an empty list-like.
    read_geometry : bool, optional (default: True)
        If True, will read geometry into a GeoSeries.  If False, a Pandas DataFrame
        will be returned instead.
    force_2d : bool, optional (default: False)
        If the geometry has Z values, setting this to True will cause those to
        be ignored and 2D geometries to be returned
    skip_features : int, optional (default: 0)
        Number of features to skip from the beginning of the file before returning
        features.  Must be less than the total number of features in the file.
    max_features : int, optional (default: None)
        Number of features to read from the file.  Must be less than the total
        number of features in the file minus skip_features (if used).
    where : str, optional (default: None)
        Where clause to filter features in layer by attribute values.  Uses a
        restricted form of SQL WHERE clause, defined here:
        http://ogdi.sourceforge.net/prop/6.2.CapabilitiesMetadata.html
        Examples: "ISO_A3 = 'CAN'", "POP_EST > 10000000 AND POP_EST < 100000000"
    bbox : tuple of (xmin, ymin, xmax, ymax) (default: None)
        If present, will be used to filter records whose geometry intersects this
        box.  This must be in the same CRS as the dataset.

    Returns
    -------
    GeoDataFrame or DataFrame (if no geometry is present)
    """
    try:
        with GDALEnv():
            import pandas as pd
            import geopandas as gp
            from geopandas.array import from_wkb

    except ImportError:
        raise ImportError("geopandas is required to use pyogrio.read_dataframe()")

    path = str(path)

    if not "://" in path:
        if not "/vsizip" in path.lower() and not os.path.exists(path):
            raise ValueError(f"'{path}' does not exist")

    meta, geometry, field_data = read(
        path,
        layer=layer,
        encoding=encoding,
        columns=columns,
        read_geometry=read_geometry,
        force_2d=force_2d,
        skip_features=skip_features,
        max_features=max_features,
        where=where,
        bbox=bbox,
    )

    columns = meta["fields"].tolist()
    data = {columns[i]: field_data[i] for i in range(len(columns))}
    df = pd.DataFrame(data, columns=columns)

    if geometry is None or not read_geometry:
        return df

    geometry = from_wkb(geometry, crs=meta["crs"])

    return gp.GeoDataFrame(df, geometry=geometry)
コード例 #11
0
ファイル: parquet.py プロジェクト: wouellette/geopandas
def _read_parquet(path, columns=None, **kwargs):
    """
    Load a Parquet object from the file path, returning a GeoDataFrame.

    You can read a subset of columns in the file using the ``columns`` parameter.
    However, the structure of the returned GeoDataFrame will depend on which
    columns you read:

    * if no geometry columns are read, this will raise a ``ValueError`` - you
      should use the pandas `read_parquet` method instead.
    * if the primary geometry column saved to this file is not included in
      columns, the first available geometry column will be set as the geometry
      column of the returned GeoDataFrame.

    Requires 'pyarrow'.

    .. versionadded:: 0.8

    Parameters
    ----------
    path : str, path object
    columns : list-like of strings, default=None
        If not None, only these columns will be read from the file.  If
        the primary geometry column is not included, the first secondary
        geometry read from the file will be set as the geometry column
        of the returned GeoDataFrame.  If no geometry columns are present,
        a ``ValueError`` will be raised.
    **kwargs
        Any additional kwargs passed to pyarrow.parquet.read_table().

    Returns
    -------
    GeoDataFrame
    """

    import_optional_dependency(
        "pyarrow", extra="pyarrow is required for Parquet support.")
    from pyarrow import parquet

    kwargs["use_pandas_metadata"] = True
    table = parquet.read_table(path, columns=columns, **kwargs)

    df = table.to_pandas()

    metadata = table.schema.metadata
    if b"geo" not in metadata:
        raise ValueError("""Missing geo metadata in Parquet file.
            Use pandas.read_parquet() instead.""")

    try:
        metadata = _decode_metadata(metadata.get(b"geo", b""))

    except (TypeError, json.decoder.JSONDecodeError):
        raise ValueError("Missing or malformed geo metadata in Parquet file")

    _validate_metadata(metadata)

    # Find all geometry columns that were read from the file.  May
    # be a subset if 'columns' parameter is used.
    geometry_columns = df.columns.intersection(metadata["columns"])

    if not len(geometry_columns):
        raise ValueError(
            """No geometry columns are included in the columns read from
            the Parquet file.  To read this file without geometry columns,
            use pandas.read_parquet() instead.""")

    geometry = metadata["primary_column"]

    # Missing geometry likely indicates a subset of columns was read;
    # promote the first available geometry to the primary geometry.
    if len(geometry_columns) and geometry not in geometry_columns:
        geometry = geometry_columns[0]

        # if there are multiple non-primary geometry columns, raise a warning
        if len(geometry_columns) > 1:
            warnings.warn(
                "Multiple non-primary geometry columns read from Parquet file.  "
                "The first column read was promoted to the primary geometry.")

    # Convert the WKB columns that are present back to geometry.
    for col in geometry_columns:
        df[col] = from_wkb(df[col].values, crs=metadata["columns"][col]["crs"])

    return GeoDataFrame(df, geometry=geometry)