def test_integration_natural_earth(self, predicate, expected_shape): """Tests output sizes for the naturalearth datasets.""" world = read_file(datasets.get_path("naturalearth_lowres")) capitals = read_file(datasets.get_path("naturalearth_cities")) res = world.sindex.query_bulk(capitals.geometry, predicate) assert res.shape == expected_shape
def test_clip(self): left = read_file(datasets.get_path("naturalearth_cities")) world = read_file(datasets.get_path("naturalearth_lowres")) south_america = world[world["continent"] == "South America"] expected = clip(left.geometry, south_america) result = left.geometry.clip(south_america) assert_geoseries_equal(result, expected)
def test_read_paths(self): gdf = read_file(get_path('naturalearth_lowres')) assert isinstance(gdf, GeoDataFrame) gdf = read_file(get_path('naturalearth_cities')) assert isinstance(gdf, GeoDataFrame) gdf = read_file(get_path('nybb')) assert isinstance(gdf, GeoDataFrame)
def setup(self, *args): world = read_file(datasets.get_path('naturalearth_lowres')) capitals = read_file(datasets.get_path('naturalearth_cities')) countries = world[['geometry', 'name']] countries = countries.to_crs('+init=epsg:3395')[ countries.name != "Antarctica"] capitals = capitals.to_crs('+init=epsg:3395') capitals['geometry'] = capitals.buffer(500000) self.countries = countries self.capitals = capitals
def generate_test_df(): world = read_file(datasets.get_path("naturalearth_lowres")) capitals = read_file(datasets.get_path("naturalearth_cities")) countries = world.to_crs("epsg:3395")[["geometry"]] capitals = capitals.to_crs("epsg:3395")[["geometry"]] mixed = capitals.append(countries) # get a mix of geometries points = capitals polygons = countries # filter out invalid geometries data = { "mixed": mixed[mixed.is_valid], "points": points[points.is_valid], "polygons": polygons[polygons.is_valid], } return data
def test_parquet_invalid_metadata(tmpdir, geo_meta, error): """Has geo metadata with missing required fields will raise a ValueError. This requires writing the parquet file directly below, so that we can control the metadata that is written for this test. """ from pyarrow import parquet, Table test_dataset = "naturalearth_lowres" df = read_file(get_path(test_dataset)) # convert to DataFrame and encode geometry to WKB df = DataFrame(df) df["geometry"] = to_wkb(df["geometry"].values) table = Table.from_pandas(df) metadata = table.schema.metadata metadata.update(geo_meta) table = table.replace_schema_metadata(metadata) filename = os.path.join(str(tmpdir), "test.pq") parquet.write_table(table, filename) with pytest.raises(ValueError, match=error): read_parquet(filename)
def test_promote_secondary_geometry(tmpdir, file_format): """Reading a subset of columns that does not include the primary geometry column should promote the first geometry column present. """ reader, writer = file_format test_dataset = "naturalearth_lowres" df = read_file(get_path(test_dataset)) df["geom2"] = df.geometry.copy() filename = os.path.join(str(tmpdir), "test.pq") writer(df, filename) pq_df = reader(filename, columns=["name", "geom2"]) assert_geodataframe_equal(df.set_geometry("geom2")[["name", "geom2"]], pq_df) df["geom3"] = df.geometry.copy() writer(df, filename) with pytest.warns( UserWarning, match="Multiple non-primary geometry columns read from Parquet/Feather file.", ): pq_df = reader(filename, columns=["name", "geom2", "geom3"]) assert_geodataframe_equal( df.set_geometry("geom2")[["name", "geom2", "geom3"]], pq_df )
def test_fsspec_url(): fsspec = pytest.importorskip("fsspec") import fsspec.implementations.memory class MyMemoryFileSystem(fsspec.implementations.memory.MemoryFileSystem): # Simple fsspec filesystem that adds a required keyword. # Attempting to use this filesystem without the keyword will raise an exception. def __init__(self, is_set, *args, **kwargs): self.is_set = is_set super().__init__(*args, **kwargs) fsspec.register_implementation("memory", MyMemoryFileSystem, clobber=True) memfs = MyMemoryFileSystem(is_set=True) test_dataset = "naturalearth_lowres" df = read_file(get_path(test_dataset)) with memfs.open("data.parquet", "wb") as f: df.to_parquet(f) result = read_parquet("memory://data.parquet", storage_options=dict(is_set=True)) assert_geodataframe_equal(result, df) result = read_parquet("memory://data.parquet", filesystem=memfs) assert_geodataframe_equal(result, df)
def test_roundtrip(tmpdir, file_format, test_dataset): """Writing to parquet should not raise errors, and should not alter original GeoDataFrame """ reader, writer = file_format df = read_file(get_path(test_dataset)) orig = df.copy() filename = os.path.join(str(tmpdir), "test.pq") # TEMP: Initial implementation should raise a UserWarning with pytest.warns(UserWarning, match="initial implementation"): writer(df, filename) assert os.path.exists(filename) # make sure that the original data frame is unaltered assert_geodataframe_equal(df, orig) # make sure that we can roundtrip the data frame pq_df = reader(filename) assert isinstance(pq_df, GeoDataFrame) assert_geodataframe_equal(df, pq_df)
def test_feather_arrow_version(tmpdir): df = read_file(get_path("naturalearth_lowres")) filename = os.path.join(str(tmpdir), "test.feather") with pytest.raises(ImportError, match="pyarrow >= 0.17 required for Feather support"): df.to_feather(filename)
def generate_test_df(): world = read_file(datasets.get_path("naturalearth_lowres")) capitals = read_file(datasets.get_path("naturalearth_cities")) countries = world.to_crs("epsg:3395")[["geometry"]] capitals = capitals.to_crs("epsg:3395")[["geometry"]] mixed = capitals.append(countries) # get a mix of geometries points = capitals polygons = countries # filter out invalid geometries data = { "mixed": mixed[mixed.is_valid], "points": points[points.is_valid], "polygons": polygons[polygons.is_valid], } # ensure index is pre-generated for data_type, value in data.items(): data[data_type].sindex.query(value.geometry.values.data[0]) return data
def setup(self): nybb = read_file(datasets.get_path('nybb')) self.long_nybb = GeoDataFrame(pd.concat(10 * [nybb]), crs=nybb.crs) num_points = 20000 longitudes = np.random.rand(num_points) - 120 latitudes = np.random.rand(num_points) + 38 self.point_df = GeoSeries( [Point(x, y) for (x, y) in zip(longitudes, latitudes)]) self.point_df.crs = {"init": "epsg:4326"}
def test_pandas_parquet_roundtrip2(test_dataset, tmpdir): test_dataset = "naturalearth_lowres" df = DataFrame(read_file(get_path(test_dataset)).drop(columns=["geometry"])) filename = os.path.join(str(tmpdir), "test.pq") df.to_parquet(filename) pq_df = pd_read_parquet(filename) assert_frame_equal(df, pq_df)
def setup_class(cls): try: import mapclassify except ImportError: try: import pysal except ImportError: pytest.importorskip('mapclassify') pth = get_path('naturalearth_lowres') cls.df = read_file(pth) cls.df['NEGATIVES'] = np.linspace(-10, 10, len(cls.df.index))
def setup_class(cls): try: import mapclassify # noqa except ImportError: try: import pysal # noqa except ImportError: pytest.importorskip("mapclassify") pth = get_path("naturalearth_lowres") cls.df = read_file(pth) cls.df["NEGATIVES"] = np.linspace(-10, 10, len(cls.df.index))
def test_encode_wkb(): test_dataset = "naturalearth_lowres" df = read_file(get_path(test_dataset)) encoded = _encode_wkb(df) # make sure original is not modified assert isinstance(df, GeoDataFrame) assert ( encoded.geometry.iloc[0][:16] == b"\x01\x06\x00\x00\x00\x03\x00\x00\x00\x01\x03\x00\x00\x00\x01\x00")
def setup(self): nybb = read_file(datasets.get_path('nybb')) self.long_nybb = GeoDataFrame(pd.concat(10 * [nybb]), crs=nybb.crs) num_points = 20000 longitudes = np.random.rand(num_points) - 120 latitudes = np.random.rand(num_points) + 38 self.point_df = GeoSeries([Point(x, y) for (x, y) in zip(longitudes, latitudes)]) self.point_df.crs = {"init": "epsg:4326"}
def test_parquet_columns_no_geometry(tmpdir): """Reading a parquet file that is missing all of the geometry columns should raise a ValueError""" test_dataset = "naturalearth_lowres" df = read_file(get_path(test_dataset)) filename = os.path.join(str(tmpdir), "test.pq") df.to_parquet(filename) with pytest.raises(ValueError): read_parquet(filename, columns=["name"])
def test_feather_compression(compression, tmpdir): """Using compression options should not raise errors, and should return identical GeoDataFrame. """ test_dataset = "naturalearth_lowres" df = read_file(get_path(test_dataset)) filename = os.path.join(str(tmpdir), "test.feather") df.to_feather(filename, compression=compression) pq_df = read_feather(filename) assert isinstance(pq_df, GeoDataFrame) assert_geodataframe_equal(df, pq_df)
def test_parquet_repeat_columns(tmpdir): """Reading repeated columns should return first value of each repeated column """ test_dataset = "naturalearth_lowres" df = read_file(get_path(test_dataset)) filename = os.path.join(str(tmpdir), "test.pq") df.to_parquet(filename) columns = ["name", "name", "iso_a3", "name", "geometry"] pq_df = read_parquet(filename, columns=columns) assert pq_df.columns.tolist() == ["name", "iso_a3", "geometry"]
def test_create_metadata(): test_dataset = "naturalearth_lowres" df = read_file(get_path(test_dataset)) metadata = _create_metadata(df) assert isinstance(metadata, dict) assert metadata["schema_version"] == METADATA_VERSION assert metadata["creator"]["library"] == "geopandas" assert metadata["creator"]["version"] == geopandas.__version__ assert metadata["primary_column"] == "geometry" assert "geometry" in metadata["columns"] assert metadata["columns"]["geometry"]["crs"] == df.geometry.crs.to_wkt() assert metadata["columns"]["geometry"]["encoding"] == "WKB" assert np.array_equal(metadata["columns"]["geometry"]["bbox"], df.geometry.total_bounds)
def test_parquet_index(tmpdir): """Setting index=`True` should preserve index in output, and setting index=`False` should drop index from output. """ test_dataset = "naturalearth_lowres" df = read_file(get_path(test_dataset)).set_index("iso_a3") filename = os.path.join(str(tmpdir), "test_with_index.pq") df.to_parquet(filename, index=True) pq_df = read_parquet(filename) assert_geodataframe_equal(df, pq_df) filename = os.path.join(str(tmpdir), "drop_index.pq") df.to_parquet(filename, index=False) pq_df = read_parquet(filename) assert_geodataframe_equal(df.reset_index(drop=True), pq_df)
def test_parquet_missing_crs(tmpdir): """If CRS is `None`, it should be properly handled and remain `None` when read from parquet`. """ test_dataset = "naturalearth_lowres" df = read_file(get_path(test_dataset)) df.crs = None filename = os.path.join(str(tmpdir), "test.pq") df.to_parquet(filename) pq_df = read_parquet(filename) assert pq_df.crs is None assert_geodataframe_equal(df, pq_df, check_crs=True)
def test_parquet_subset_columns(tmpdir): """Reading a subset of columns should correctly decode selected geometry columns. """ test_dataset = "naturalearth_lowres" df = read_file(get_path(test_dataset)) filename = os.path.join(str(tmpdir), "test.pq") df.to_parquet(filename) pq_df = read_parquet(filename, columns=["name", "geometry"]) assert_geodataframe_equal(df[["name", "geometry"]], pq_df) with pytest.raises( ValueError, match="No geometry columns are included in the columns read"): read_parquet(filename, columns=[])
def test_parquet_multiple_geom_cols(tmpdir): """If multiple geometry columns are present when written to parquet, they should all be returned as such when read from parquet. """ test_dataset = "naturalearth_lowres" df = read_file(get_path(test_dataset)) df["geom2"] = df.geometry.copy() filename = os.path.join(str(tmpdir), "test.pq") df.to_parquet(filename) assert os.path.exists(filename) pq_df = read_parquet(filename) assert isinstance(pq_df, GeoDataFrame) assert_geodataframe_equal(df, pq_df) assert_geoseries_equal(df.geom2, pq_df.geom2, check_geom_type=True)
def test_validate_dataframe(): test_dataset = "naturalearth_lowres" df = read_file(get_path(test_dataset)) # valid: should not raise ValueError _validate_dataframe(df) _validate_dataframe(df.set_index("iso_a3")) # add column with non-string type df[0] = 1 # invalid: should raise ValueError with pytest.raises(ValueError): _validate_dataframe(df) with pytest.raises(ValueError): _validate_dataframe(df.set_index(0)) # not a DataFrame: should raise ValueError with pytest.raises(ValueError): _validate_dataframe("not a dataframe")
def test_parquet_missing_metadata(tmpdir): """Missing geo metadata, such as from a parquet file created from a pandas DataFrame, will raise a ValueError. """ test_dataset = "naturalearth_lowres" df = read_file(get_path(test_dataset)) # convert to DataFrame df = DataFrame(df) # convert the geometry column so we can extract later df["geometry"] = to_wkb(df["geometry"].values) filename = os.path.join(str(tmpdir), "test.pq") # use pandas to_parquet (no geo metadata) df.to_parquet(filename) # missing metadata will raise ValueError with pytest.raises(ValueError, match="Missing geo metadata in Parquet/Feather file."): read_parquet(filename)
def test_non_fsspec_url_with_storage_options_raises(): with pytest.raises(ValueError, match="storage_options"): test_dataset = "naturalearth_lowres" read_parquet(get_path(test_dataset), storage_options={"foo": "bar"})
def setup(self, *args): world = read_file(datasets.get_path("naturalearth_lowres")) capitals = read_file(datasets.get_path("naturalearth_cities")) self.bounds = [box(*geom.bounds) for geom in world.geometry] self.points = capitals
def test_read_paths(test_dataset): assert isinstance(read_file(get_path(test_dataset)), GeoDataFrame)
def test_read_file(self): nybb_filename = datasets.get_path("nybb") df = read_file(nybb_filename) assert df.crs == pyproj.CRS(2263) assert df.geometry.crs == pyproj.CRS(2263) assert df.geometry.values.crs == pyproj.CRS(2263)
def setup_class(cls): pytest.importorskip('mapclassify') pth = get_path('naturalearth_lowres') cls.df = read_file(pth) cls.df['NEGATIVES'] = np.linspace(-10, 10, len(cls.df.index))