예제 #1
0
    def test_integration_natural_earth(self, predicate, expected_shape):
        """Tests output sizes for the naturalearth datasets."""
        world = read_file(datasets.get_path("naturalearth_lowres"))
        capitals = read_file(datasets.get_path("naturalearth_cities"))

        res = world.sindex.query_bulk(capitals.geometry, predicate)
        assert res.shape == expected_shape
예제 #2
0
    def test_clip(self):
        left = read_file(datasets.get_path("naturalearth_cities"))
        world = read_file(datasets.get_path("naturalearth_lowres"))
        south_america = world[world["continent"] == "South America"]

        expected = clip(left.geometry, south_america)
        result = left.geometry.clip(south_america)
        assert_geoseries_equal(result, expected)
예제 #3
0
    def test_read_paths(self):

        gdf = read_file(get_path('naturalearth_lowres'))
        assert isinstance(gdf, GeoDataFrame)

        gdf = read_file(get_path('naturalearth_cities'))
        assert isinstance(gdf, GeoDataFrame)

        gdf = read_file(get_path('nybb'))
        assert isinstance(gdf, GeoDataFrame)
예제 #4
0
    def test_read_paths(self):

        gdf = read_file(get_path('naturalearth_lowres'))
        assert isinstance(gdf, GeoDataFrame)

        gdf = read_file(get_path('naturalearth_cities'))
        assert isinstance(gdf, GeoDataFrame)

        gdf = read_file(get_path('nybb'))
        assert isinstance(gdf, GeoDataFrame)
예제 #5
0
    def setup(self, *args):
        world = read_file(datasets.get_path('naturalearth_lowres'))
        capitals = read_file(datasets.get_path('naturalearth_cities'))
        countries = world[['geometry', 'name']]
        countries = countries.to_crs('+init=epsg:3395')[
            countries.name != "Antarctica"]
        capitals = capitals.to_crs('+init=epsg:3395')
        capitals['geometry'] = capitals.buffer(500000)

        self.countries = countries
        self.capitals = capitals
예제 #6
0
    def setup(self, *args):
        world = read_file(datasets.get_path('naturalearth_lowres'))
        capitals = read_file(datasets.get_path('naturalearth_cities'))
        countries = world[['geometry', 'name']]
        countries = countries.to_crs('+init=epsg:3395')[
            countries.name != "Antarctica"]
        capitals = capitals.to_crs('+init=epsg:3395')
        capitals['geometry'] = capitals.buffer(500000)

        self.countries = countries
        self.capitals = capitals
예제 #7
0
def generate_test_df():
    world = read_file(datasets.get_path("naturalearth_lowres"))
    capitals = read_file(datasets.get_path("naturalearth_cities"))
    countries = world.to_crs("epsg:3395")[["geometry"]]
    capitals = capitals.to_crs("epsg:3395")[["geometry"]]
    mixed = capitals.append(countries)  # get a mix of geometries
    points = capitals
    polygons = countries
    # filter out invalid geometries
    data = {
        "mixed": mixed[mixed.is_valid],
        "points": points[points.is_valid],
        "polygons": polygons[polygons.is_valid],
    }
    return data
예제 #8
0
def test_parquet_invalid_metadata(tmpdir, geo_meta, error):
    """Has geo metadata with missing required fields will raise a ValueError.

    This requires writing the parquet file directly below, so that we can
    control the metadata that is written for this test.
    """

    from pyarrow import parquet, Table

    test_dataset = "naturalearth_lowres"
    df = read_file(get_path(test_dataset))

    # convert to DataFrame and encode geometry to WKB
    df = DataFrame(df)
    df["geometry"] = to_wkb(df["geometry"].values)

    table = Table.from_pandas(df)
    metadata = table.schema.metadata
    metadata.update(geo_meta)
    table = table.replace_schema_metadata(metadata)

    filename = os.path.join(str(tmpdir), "test.pq")
    parquet.write_table(table, filename)

    with pytest.raises(ValueError, match=error):
        read_parquet(filename)
예제 #9
0
def test_promote_secondary_geometry(tmpdir, file_format):
    """Reading a subset of columns that does not include the primary geometry
    column should promote the first geometry column present.
    """
    reader, writer = file_format

    test_dataset = "naturalearth_lowres"
    df = read_file(get_path(test_dataset))
    df["geom2"] = df.geometry.copy()

    filename = os.path.join(str(tmpdir), "test.pq")
    writer(df, filename)
    pq_df = reader(filename, columns=["name", "geom2"])

    assert_geodataframe_equal(df.set_geometry("geom2")[["name", "geom2"]], pq_df)

    df["geom3"] = df.geometry.copy()

    writer(df, filename)
    with pytest.warns(
        UserWarning,
        match="Multiple non-primary geometry columns read from Parquet/Feather file.",
    ):
        pq_df = reader(filename, columns=["name", "geom2", "geom3"])

    assert_geodataframe_equal(
        df.set_geometry("geom2")[["name", "geom2", "geom3"]], pq_df
    )
예제 #10
0
def test_fsspec_url():
    fsspec = pytest.importorskip("fsspec")
    import fsspec.implementations.memory

    class MyMemoryFileSystem(fsspec.implementations.memory.MemoryFileSystem):
        # Simple fsspec filesystem that adds a required keyword.
        # Attempting to use this filesystem without the keyword will raise an exception.
        def __init__(self, is_set, *args, **kwargs):
            self.is_set = is_set
            super().__init__(*args, **kwargs)

    fsspec.register_implementation("memory", MyMemoryFileSystem, clobber=True)
    memfs = MyMemoryFileSystem(is_set=True)

    test_dataset = "naturalearth_lowres"
    df = read_file(get_path(test_dataset))

    with memfs.open("data.parquet", "wb") as f:
        df.to_parquet(f)

    result = read_parquet("memory://data.parquet", storage_options=dict(is_set=True))
    assert_geodataframe_equal(result, df)

    result = read_parquet("memory://data.parquet", filesystem=memfs)
    assert_geodataframe_equal(result, df)
예제 #11
0
def test_roundtrip(tmpdir, file_format, test_dataset):
    """Writing to parquet should not raise errors, and should not alter original
    GeoDataFrame
    """
    reader, writer = file_format

    df = read_file(get_path(test_dataset))
    orig = df.copy()

    filename = os.path.join(str(tmpdir), "test.pq")

    # TEMP: Initial implementation should raise a UserWarning
    with pytest.warns(UserWarning, match="initial implementation"):
        writer(df, filename)

    assert os.path.exists(filename)

    # make sure that the original data frame is unaltered
    assert_geodataframe_equal(df, orig)

    # make sure that we can roundtrip the data frame
    pq_df = reader(filename)

    assert isinstance(pq_df, GeoDataFrame)
    assert_geodataframe_equal(df, pq_df)
예제 #12
0
def test_feather_arrow_version(tmpdir):
    df = read_file(get_path("naturalearth_lowres"))
    filename = os.path.join(str(tmpdir), "test.feather")

    with pytest.raises(ImportError,
                       match="pyarrow >= 0.17 required for Feather support"):
        df.to_feather(filename)
예제 #13
0
def generate_test_df():
    world = read_file(datasets.get_path("naturalearth_lowres"))
    capitals = read_file(datasets.get_path("naturalearth_cities"))
    countries = world.to_crs("epsg:3395")[["geometry"]]
    capitals = capitals.to_crs("epsg:3395")[["geometry"]]
    mixed = capitals.append(countries)  # get a mix of geometries
    points = capitals
    polygons = countries
    # filter out invalid geometries
    data = {
        "mixed": mixed[mixed.is_valid],
        "points": points[points.is_valid],
        "polygons": polygons[polygons.is_valid],
    }
    # ensure index is pre-generated
    for data_type, value in data.items():
        data[data_type].sindex.query(value.geometry.values.data[0])
    return data
예제 #14
0
    def setup(self):
        nybb = read_file(datasets.get_path('nybb'))
        self.long_nybb = GeoDataFrame(pd.concat(10 * [nybb]), crs=nybb.crs)

        num_points = 20000
        longitudes = np.random.rand(num_points) - 120
        latitudes = np.random.rand(num_points) + 38
        self.point_df = GeoSeries(
            [Point(x, y) for (x, y) in zip(longitudes, latitudes)])
        self.point_df.crs = {"init": "epsg:4326"}
예제 #15
0
def test_pandas_parquet_roundtrip2(test_dataset, tmpdir):
    test_dataset = "naturalearth_lowres"
    df = DataFrame(read_file(get_path(test_dataset)).drop(columns=["geometry"]))

    filename = os.path.join(str(tmpdir), "test.pq")
    df.to_parquet(filename)

    pq_df = pd_read_parquet(filename)

    assert_frame_equal(df, pq_df)
예제 #16
0
 def setup_class(cls):
     try:
         import mapclassify
     except ImportError:
         try:
             import pysal
         except ImportError:
             pytest.importorskip('mapclassify')
     pth = get_path('naturalearth_lowres')
     cls.df = read_file(pth)
     cls.df['NEGATIVES'] = np.linspace(-10, 10, len(cls.df.index))
예제 #17
0
 def setup_class(cls):
     try:
         import mapclassify  # noqa
     except ImportError:
         try:
             import pysal  # noqa
         except ImportError:
             pytest.importorskip("mapclassify")
     pth = get_path("naturalearth_lowres")
     cls.df = read_file(pth)
     cls.df["NEGATIVES"] = np.linspace(-10, 10, len(cls.df.index))
예제 #18
0
def test_encode_wkb():
    test_dataset = "naturalearth_lowres"
    df = read_file(get_path(test_dataset))

    encoded = _encode_wkb(df)

    # make sure original is not modified
    assert isinstance(df, GeoDataFrame)
    assert (
        encoded.geometry.iloc[0][:16] ==
        b"\x01\x06\x00\x00\x00\x03\x00\x00\x00\x01\x03\x00\x00\x00\x01\x00")
예제 #19
0
    def setup(self):
        nybb = read_file(datasets.get_path('nybb'))
        self.long_nybb = GeoDataFrame(pd.concat(10 * [nybb]),
                                      crs=nybb.crs)

        num_points = 20000
        longitudes = np.random.rand(num_points) - 120
        latitudes = np.random.rand(num_points) + 38
        self.point_df = GeoSeries([Point(x, y) for (x, y)
                                  in zip(longitudes, latitudes)])
        self.point_df.crs = {"init": "epsg:4326"}
예제 #20
0
def test_parquet_columns_no_geometry(tmpdir):
    """Reading a parquet file that is missing all of the geometry columns
    should raise a ValueError"""

    test_dataset = "naturalearth_lowres"
    df = read_file(get_path(test_dataset))

    filename = os.path.join(str(tmpdir), "test.pq")
    df.to_parquet(filename)

    with pytest.raises(ValueError):
        read_parquet(filename, columns=["name"])
예제 #21
0
def test_feather_compression(compression, tmpdir):
    """Using compression options should not raise errors, and should
    return identical GeoDataFrame.
    """

    test_dataset = "naturalearth_lowres"
    df = read_file(get_path(test_dataset))

    filename = os.path.join(str(tmpdir), "test.feather")
    df.to_feather(filename, compression=compression)
    pq_df = read_feather(filename)

    assert isinstance(pq_df, GeoDataFrame)
    assert_geodataframe_equal(df, pq_df)
예제 #22
0
def test_parquet_repeat_columns(tmpdir):
    """Reading repeated columns should return first value of each repeated column
    """

    test_dataset = "naturalearth_lowres"
    df = read_file(get_path(test_dataset))

    filename = os.path.join(str(tmpdir), "test.pq")
    df.to_parquet(filename)

    columns = ["name", "name", "iso_a3", "name", "geometry"]
    pq_df = read_parquet(filename, columns=columns)

    assert pq_df.columns.tolist() == ["name", "iso_a3", "geometry"]
예제 #23
0
def test_create_metadata():
    test_dataset = "naturalearth_lowres"
    df = read_file(get_path(test_dataset))
    metadata = _create_metadata(df)

    assert isinstance(metadata, dict)
    assert metadata["schema_version"] == METADATA_VERSION
    assert metadata["creator"]["library"] == "geopandas"
    assert metadata["creator"]["version"] == geopandas.__version__
    assert metadata["primary_column"] == "geometry"
    assert "geometry" in metadata["columns"]
    assert metadata["columns"]["geometry"]["crs"] == df.geometry.crs.to_wkt()
    assert metadata["columns"]["geometry"]["encoding"] == "WKB"

    assert np.array_equal(metadata["columns"]["geometry"]["bbox"],
                          df.geometry.total_bounds)
예제 #24
0
def test_parquet_index(tmpdir):
    """Setting index=`True` should preserve index in output, and
    setting index=`False` should drop index from output.
    """

    test_dataset = "naturalearth_lowres"
    df = read_file(get_path(test_dataset)).set_index("iso_a3")

    filename = os.path.join(str(tmpdir), "test_with_index.pq")
    df.to_parquet(filename, index=True)
    pq_df = read_parquet(filename)
    assert_geodataframe_equal(df, pq_df)

    filename = os.path.join(str(tmpdir), "drop_index.pq")
    df.to_parquet(filename, index=False)
    pq_df = read_parquet(filename)
    assert_geodataframe_equal(df.reset_index(drop=True), pq_df)
예제 #25
0
def test_parquet_missing_crs(tmpdir):
    """If CRS is `None`, it should be properly handled
    and remain `None` when read from parquet`.
    """

    test_dataset = "naturalearth_lowres"

    df = read_file(get_path(test_dataset))
    df.crs = None

    filename = os.path.join(str(tmpdir), "test.pq")
    df.to_parquet(filename)
    pq_df = read_parquet(filename)

    assert pq_df.crs is None

    assert_geodataframe_equal(df, pq_df, check_crs=True)
예제 #26
0
def test_parquet_subset_columns(tmpdir):
    """Reading a subset of columns should correctly decode selected geometry
    columns.
    """

    test_dataset = "naturalearth_lowres"
    df = read_file(get_path(test_dataset))

    filename = os.path.join(str(tmpdir), "test.pq")
    df.to_parquet(filename)
    pq_df = read_parquet(filename, columns=["name", "geometry"])

    assert_geodataframe_equal(df[["name", "geometry"]], pq_df)

    with pytest.raises(
            ValueError,
            match="No geometry columns are included in the columns read"):
        read_parquet(filename, columns=[])
예제 #27
0
def test_parquet_multiple_geom_cols(tmpdir):
    """If multiple geometry columns are present when written to parquet,
    they should all be returned as such when read from parquet.
    """

    test_dataset = "naturalearth_lowres"
    df = read_file(get_path(test_dataset))
    df["geom2"] = df.geometry.copy()

    filename = os.path.join(str(tmpdir), "test.pq")
    df.to_parquet(filename)

    assert os.path.exists(filename)

    pq_df = read_parquet(filename)

    assert isinstance(pq_df, GeoDataFrame)
    assert_geodataframe_equal(df, pq_df)

    assert_geoseries_equal(df.geom2, pq_df.geom2, check_geom_type=True)
예제 #28
0
def test_validate_dataframe():
    test_dataset = "naturalearth_lowres"
    df = read_file(get_path(test_dataset))

    # valid: should not raise ValueError
    _validate_dataframe(df)
    _validate_dataframe(df.set_index("iso_a3"))

    # add column with non-string type
    df[0] = 1

    # invalid: should raise ValueError
    with pytest.raises(ValueError):
        _validate_dataframe(df)

    with pytest.raises(ValueError):
        _validate_dataframe(df.set_index(0))

    # not a DataFrame: should raise ValueError
    with pytest.raises(ValueError):
        _validate_dataframe("not a dataframe")
예제 #29
0
def test_parquet_missing_metadata(tmpdir):
    """Missing geo metadata, such as from a parquet file created
    from a pandas DataFrame, will raise a ValueError.
    """

    test_dataset = "naturalearth_lowres"
    df = read_file(get_path(test_dataset))

    # convert to DataFrame
    df = DataFrame(df)

    # convert the geometry column so we can extract later
    df["geometry"] = to_wkb(df["geometry"].values)

    filename = os.path.join(str(tmpdir), "test.pq")

    # use pandas to_parquet (no geo metadata)
    df.to_parquet(filename)

    # missing metadata will raise ValueError
    with pytest.raises(ValueError,
                       match="Missing geo metadata in Parquet/Feather file."):
        read_parquet(filename)
예제 #30
0
def test_non_fsspec_url_with_storage_options_raises():
    with pytest.raises(ValueError, match="storage_options"):
        test_dataset = "naturalearth_lowres"
        read_parquet(get_path(test_dataset), storage_options={"foo": "bar"})
예제 #31
0
파일: clip.py 프로젝트: jdmcbr/geopandas
 def setup(self, *args):
     world = read_file(datasets.get_path("naturalearth_lowres"))
     capitals = read_file(datasets.get_path("naturalearth_cities"))
     self.bounds = [box(*geom.bounds) for geom in world.geometry]
     self.points = capitals
예제 #32
0
def test_read_paths(test_dataset):
    assert isinstance(read_file(get_path(test_dataset)), GeoDataFrame)
예제 #33
0
def test_read_paths(test_dataset):
    assert isinstance(read_file(get_path(test_dataset)), GeoDataFrame)
예제 #34
0
 def test_read_file(self):
     nybb_filename = datasets.get_path("nybb")
     df = read_file(nybb_filename)
     assert df.crs == pyproj.CRS(2263)
     assert df.geometry.crs == pyproj.CRS(2263)
     assert df.geometry.values.crs == pyproj.CRS(2263)
예제 #35
0
 def setup_class(cls):
     pytest.importorskip('mapclassify')
     pth = get_path('naturalearth_lowres')
     cls.df = read_file(pth)
     cls.df['NEGATIVES'] = np.linspace(-10, 10, len(cls.df.index))