Example #1
0
def test_clip_dask_mask(geodf_points):  # noqa: F811
    dask_obj = dask_geopandas.from_geopandas(geodf_points, npartitions=2)
    mask = dask_geopandas.from_geopandas(geodf_points.iloc[:1], npartitions=1)
    with pytest.raises(
        NotImplementedError, match=r"Mask cannot be a Dask GeoDataFrame or GeoSeries."
    ):
        dask_geopandas.clip(dask_obj, mask)
Example #2
0
def test_operator_methods(geoseries_polygons, geoseries_points, meth):
    one = geoseries_polygons
    other = geoseries_points
    original = getattr(one, meth)(other)

    dask_one = dask_geopandas.from_geopandas(one, npartitions=2)
    dask_other = dask_geopandas.from_geopandas(other, npartitions=2)
    daskified = getattr(dask_one, meth)(dask_other)

    assert isinstance(daskified, dd.Series)
    assert all(original == daskified.compute())
Example #3
0
def test_geom_equals_exact(geoseries_polygons, geoseries_points):
    meth = "geom_equals_exact"
    one = geoseries_polygons
    other = geoseries_points
    original = getattr(one, meth)(other, tolerance=2)

    dask_one = dask_geopandas.from_geopandas(one, npartitions=2)
    dask_other = dask_geopandas.from_geopandas(other, npartitions=2)
    daskified = getattr(dask_one, meth)(dask_other, tolerance=2)

    assert isinstance(daskified, dd.Series)
    assert original.equals(daskified.compute())
Example #4
0
def test_sjoin_dask_geopandas():
    df_points = geopandas.read_file(
        geopandas.datasets.get_path("naturalearth_cities"))
    ddf_points = dask_geopandas.from_geopandas(df_points, npartitions=4)

    df_polygons = geopandas.read_file(
        geopandas.datasets.get_path("naturalearth_lowres"))
    ddf_polygons = dask_geopandas.from_geopandas(df_polygons, npartitions=4)

    expected = geopandas.sjoin(df_points,
                               df_polygons,
                               predicate="within",
                               how="inner")
    expected = expected.sort_index()

    # dask / geopandas
    result = dask_geopandas.sjoin(ddf_points,
                                  df_polygons,
                                  predicate="within",
                                  how="inner")
    assert_geodataframe_equal(expected, result.compute().sort_index())

    # geopandas / dask
    result = dask_geopandas.sjoin(df_points,
                                  ddf_polygons,
                                  predicate="within",
                                  how="inner")
    assert_geodataframe_equal(expected, result.compute().sort_index())

    # dask / dask
    result = dask_geopandas.sjoin(ddf_points,
                                  ddf_polygons,
                                  predicate="within",
                                  how="inner")
    assert_geodataframe_equal(expected, result.compute().sort_index())

    # with spatial_partitions
    ddf_points.calculate_spatial_partitions()
    ddf_polygons.calculate_spatial_partitions()
    result = dask_geopandas.sjoin(ddf_points,
                                  ddf_polygons,
                                  predicate="within",
                                  how="inner")
    assert result.spatial_partitions is not None
    assert_geodataframe_equal(expected, result.compute().sort_index())

    # check warning
    with pytest.warns(FutureWarning, match="The `op` parameter is deprecated"):
        dask_geopandas.sjoin(df_points, ddf_polygons, op="within", how="inner")
Example #5
0
def test_geoseries_apply(geoseries_polygons):
    # https://github.com/jsignell/dask-geopandas/issues/18
    ds = dask_geopandas.from_geopandas(geoseries_polygons, npartitions=2)
    result = ds.apply(lambda geom: geom.area,
                      meta=pd.Series(dtype=float)).compute()
    expected = geoseries_polygons.area
    pd.testing.assert_series_equal(result, expected)
Example #6
0
def test_set_crs_sets_spatial_partition_crs(geodf_points):
    dask_obj = dask_geopandas.from_geopandas(geodf_points, npartitions=2)

    dask_obj.calculate_spatial_partitions()
    dask_obj = dask_obj.set_crs("epsg:4326")

    assert dask_obj.crs == dask_obj.spatial_partitions.crs
Example #7
0
def test_parquet_roundtrip(tmp_path):
    # basic roundtrip
    df = geopandas.read_file(
        geopandas.datasets.get_path("naturalearth_lowres"))
    ddf = dask_geopandas.from_geopandas(df, npartitions=4)

    basedir = tmp_path / "dataset"
    ddf.to_parquet(basedir)

    # each partition (4) is written as parquet file
    paths = list(basedir.glob("*.parquet"))
    assert len(paths) == 4

    # reading back gives identical GeoDataFrame
    result = dask_geopandas.read_parquet(basedir)
    assert ddf.npartitions == 4
    assert_geodataframe_equal(result.compute(), df)

    # the written dataset is also readable by plain geopandas
    result_gpd = geopandas.read_parquet(basedir)
    # the dataset written by dask has "__null_dask_index__" index column name
    result_gpd.index.name = None
    assert_geodataframe_equal(result_gpd, df)

    result_part0 = geopandas.read_parquet(basedir / "part.0.parquet")
    result_part0.index.name = None
    assert_geodataframe_equal(result_part0, df.iloc[:45])
Example #8
0
def test_geohash_range(geoseries_points):

    ddf = from_geopandas(geoseries_points, npartitions=1)

    with pytest.raises(ValueError):
        ddf.geohash(precision=0, as_string=False)
        ddf.geohash(precision=12, as_string=False)
Example #9
0
def test_roundtrip_geometry_column_name(tmp_path):
    # basic roundtrip with different geometry column name
    df = geopandas.read_file(
        geopandas.datasets.get_path("naturalearth_lowres"))
    df = df.rename_geometry("geom")

    # geopandas -> dask-geopandas roundtrip
    path = tmp_path / "data.parquet"
    df.to_parquet(path)
    result = dask_geopandas.read_parquet(path)
    assert isinstance(result, dask_geopandas.GeoDataFrame)
    assert result.geometry.name == "geom"
    assert result.crs == df.crs
    assert result.spatial_partitions is not None
    assert_geodataframe_equal(result.compute(), df)

    # dask-geopandas -> dask-geopandas roundtrip
    ddf = dask_geopandas.from_geopandas(df, npartitions=4)
    assert ddf.geometry.name == "geom"
    basedir = tmp_path / "dataset"
    ddf.to_parquet(basedir)

    result = dask_geopandas.read_parquet(basedir)
    assert isinstance(result, dask_geopandas.GeoDataFrame)
    assert result.geometry.name == "geom"
    assert result.crs == df.crs
    assert result.spatial_partitions is not None
    assert_geodataframe_equal(result.compute(), df)
Example #10
0
def test_geoseries_crs(geoseries_points_crs):
    s = geoseries_points_crs
    original = s.crs
    name = s.name

    dask_obj = dask_geopandas.from_geopandas(s, npartitions=2)
    assert dask_obj.crs == original
    assert dask_obj.partitions[1].crs == original
    assert dask_obj.compute().crs == original

    new_crs = "epsg:4316"
    with pytest.raises(
            ValueError,
            match=r".*already has a CRS which is not equal to the passed CRS.*"
    ):
        dask_obj.set_crs(new_crs)

    new = dask_obj.set_crs(new_crs, allow_override=True)
    assert new.crs == new_crs
    assert new.name == name
    assert new.partitions[1].crs == new_crs
    assert dask_obj.crs == original

    dask_obj.crs = new_crs
    assert dask_obj.crs == new_crs
    assert dask_obj.partitions[1].crs == new_crs
    assert dask_obj.name == name
    assert dask_obj.compute().crs == new_crs
Example #11
0
def test_parquet_partition_on(tmp_path, write_metadata_file):
    df = geopandas.read_file(
        geopandas.datasets.get_path("naturalearth_lowres"))
    ddf = dask_geopandas.from_geopandas(df, npartitions=4)

    # Writing a partitioned dataset based on one of the attribute columns
    basedir = tmp_path / "naturalearth_lowres_by_continent.parquet"
    ddf.to_parquet(basedir,
                   partition_on="continent",
                   write_metadata_file=write_metadata_file)

    # Check for one of the partitions that the file is present and is correct
    n_files = 10 if write_metadata_file else 8  # 8 continents + 2 metadata files
    assert len(list(basedir.iterdir())) == n_files
    assert (basedir / "continent=Africa").exists()
    result_africa = geopandas.read_parquet(basedir / "continent=Africa")
    expected = df[df["continent"] == "Africa"].drop(columns=["continent"])
    result_africa.index.name = None
    assert_geodataframe_equal(result_africa, expected)

    # Check roundtrip
    result = dask_geopandas.read_parquet(basedir)
    assert result.npartitions >= 8
    assert result.spatial_partitions is not None
    expected = df.copy()
    expected["continent"] = expected["continent"].astype("category")
    assert_geodataframe_equal(result.compute(), expected, check_like=True)
Example #12
0
def test_clip_no_spatial_partitions(geodf_points):  # noqa: F811
    dask_obj = dask_geopandas.from_geopandas(geodf_points, npartitions=2)
    mask = geodf_points.iloc[:1]
    mask["geometry"] = mask["geometry"].buffer(2)
    expected = geodf_points.iloc[:2]
    result = dask_geopandas.clip(dask_obj, mask).compute()
    assert_geodataframe_equal(expected, result)
Example #13
0
def test_to_wkb_series(geoseries_points, hex):
    s = geoseries_points
    dask_obj = dask_geopandas.from_geopandas(s, npartitions=4)
    expected = s.to_wkb(hex=hex)
    result = dask_obj.to_wkb(hex=hex).compute()

    assert_series_equal(expected, result)
Example #14
0
def test_total_bounds_from_partitions(geoseries_polygons):
    ddf = from_geopandas(geoseries_polygons, npartitions=2)
    expected = ddf.morton_distance().compute()

    ddf.calculate_spatial_partitions()
    result = ddf.morton_distance().compute()
    assert_series_equal(result, expected)
Example #15
0
def test_empty(geoseries_polygons, empty):
    s = geoseries_polygons
    s.iloc[-1] = empty
    dask_obj = from_geopandas(s, npartitions=2)
    with pytest.raises(ValueError,
                       match="cannot be computed on a GeoSeries with empty"):
        dask_obj.morton_distance().compute()
Example #16
0
def test_geoseries_unary_union(geoseries_points):
    original = getattr(geoseries_points, "unary_union")

    dask_obj = dask_geopandas.from_geopandas(geoseries_points, npartitions=2)
    daskified = dask_obj.unary_union
    assert isinstance(daskified, Scalar)
    assert original.equals(daskified.compute())
Example #17
0
def test_set_geometry_property_on_geodf(geodf_points):
    df = geodf_points
    dask_obj = dask_geopandas.from_geopandas(df, npartitions=2)

    df = dask_obj.rename(columns={"geometry": "foo"}).set_geometry("foo").compute()
    assert set(df.columns) == {"value1", "value2", "foo"}
    assert all(df.geometry == df.foo)
Example #18
0
def test_set_index_preserves_class(geodf_points, shuffle_method):
    dask_obj = dask_geopandas.from_geopandas(geodf_points, npartitions=2)
    dask_obj = dask_obj.set_index("value1", shuffle=shuffle_method)

    for partition in dask_obj.partitions:
        assert isinstance(partition.compute(), geopandas.GeoDataFrame)

    assert isinstance(dask_obj.compute(), geopandas.GeoDataFrame)
Example #19
0
def test_clip_geoseries(geodf_points):  # noqa: F811
    dask_obj = dask_geopandas.from_geopandas(geodf_points, npartitions=2)
    dask_obj.calculate_spatial_partitions()
    mask = geodf_points.iloc[:1]
    mask["geometry"] = mask["geometry"].buffer(2)
    expected = geopandas.clip(geodf_points.geometry, mask)
    result = dask_geopandas.clip(dask_obj.geometry, mask).compute()
    assert_geoseries_equal(expected, result)
Example #20
0
def test_to_crs_geodf(geodf_points_crs):
    df = geodf_points_crs
    dask_obj = dask_geopandas.from_geopandas(df, npartitions=2)

    new_crs = "epsg:4316"
    new = dask_obj.to_crs(new_crs)
    assert new.crs == new_crs
    assert all(new.compute() == df.to_crs(new_crs))
Example #21
0
def test_to_wkb(geodf_points_crs, hex):
    df = geodf_points_crs
    df["polygons"] = df.buffer(1)
    ddf = dask_geopandas.from_geopandas(df, npartitions=4)
    expected = df.to_wkb(hex=hex)
    result = ddf.to_wkb(hex=hex).compute()

    assert_frame_equal(expected, result)
Example #22
0
def test_to_crs_geoseries(geoseries_points_crs):
    s = geoseries_points_crs
    dask_obj = dask_geopandas.from_geopandas(s, npartitions=2)

    new_crs = "epsg:4316"
    new = dask_obj.to_crs(new_crs)
    assert new.crs == new_crs
    assert all(new.compute() == s.to_crs(new_crs))
Example #23
0
def test_propagate_on_set_crs(geodf_points):
    dask_obj = dask_geopandas.from_geopandas(geodf_points, npartitions=2)

    dask_obj.calculate_spatial_partitions()
    result = dask_obj.set_crs("epsg:4326").spatial_partitions
    expected = dask_obj.spatial_partitions.set_crs("epsg:4326")

    assert_geoseries_equal(result, expected)
Example #24
0
def test_geoseries_properties(geoseries_polygons, attr):
    original = getattr(geoseries_polygons, attr)

    dask_obj = dask_geopandas.from_geopandas(geoseries_polygons, npartitions=2)
    assert len(dask_obj.partitions[0]) < len(geoseries_polygons)
    assert isinstance(dask_obj, dask_geopandas.GeoSeries)

    daskified = getattr(dask_obj, attr)
    assert all(original == daskified.compute())
Example #25
0
def test_explode_geoseries():
    s = geopandas.GeoSeries(
        [MultiPoint([(0, 0), (1, 1)]),
         MultiPoint([(2, 2), (3, 3), (4, 4)])])
    original = s.explode()
    dask_s = dask_geopandas.from_geopandas(s, npartitions=2)
    daskified = dask_s.explode()
    assert isinstance(daskified, dask_geopandas.GeoSeries)
    assert all(original == daskified.compute())
Example #26
0
def test_meth_with_args_and_kwargs(geoseries_lines, meth, options):
    s = geoseries_lines
    original = getattr(s, meth)(**options)

    dask_s = dask_geopandas.from_geopandas(s, npartitions=2)
    daskified = getattr(dask_s, meth)(**options)

    assert isinstance(daskified, dask_geopandas.GeoSeries)
    assert all(original == daskified.compute())
Example #27
0
 def test_split_out_name(self):
     gpd_default = self.world.rename_geometry("geom").dissolve("continent")
     ddf = dask_geopandas.from_geopandas(self.world.rename_geometry("geom"),
                                         npartitions=4)
     dd_split = ddf.dissolve("continent", split_out=4)
     assert dd_split.npartitions == 4
     assert_geodataframe_equal(gpd_default,
                               dd_split.compute(),
                               check_like=True)
Example #28
0
def test_explode_geodf():
    s = geopandas.GeoSeries(
        [MultiPoint([(0, 0), (1, 1)]),
         MultiPoint([(2, 2), (3, 3), (4, 4)])])
    df = geopandas.GeoDataFrame({"col": [1, 2], "geometry": s})
    original = df.explode()
    dask_s = dask_geopandas.from_geopandas(df, npartitions=2)
    daskified = dask_s.explode()
    assert isinstance(daskified, dask_geopandas.GeoDataFrame)
    assert all(original == daskified.compute())
Example #29
0
def test_spatial_partitions_setter(geodf_points):
    dask_obj = dask_geopandas.from_geopandas(geodf_points, npartitions=2)

    # needs to be a GeoSeries
    with pytest.raises(TypeError):
        dask_obj.spatial_partitions = geodf_points

    # wrong length
    with pytest.raises(ValueError):
        dask_obj.spatial_partitions = geodf_points.geometry
Example #30
0
def test_get_coord(coord):
    p1 = Point(1, 2, 3)
    p2 = Point(2, 3, 4)
    p3 = Point(3, 4, 5)
    p4 = Point(4, 1, 7)
    s = geopandas.GeoSeries([p1, p2, p3, p4])
    dask_obj = dask_geopandas.from_geopandas(s, npartitions=2)
    expected = getattr(s, coord)
    result = getattr(dask_obj, coord).compute()
    assert_series_equal(expected, result)