def test_from_dask_dataframe_with_column_name(): df = pd.DataFrame({"x": [0, 1, 2, 3], "y": [1, 2, 3, 4]}) df["geoms"] = geopandas.points_from_xy(df["x"], df["y"]) dask_obj = dd.from_pandas(df, npartitions=2) dask_obj = dask_geopandas.from_dask_dataframe(dask_obj, geometry="geoms") expected = geopandas.GeoDataFrame(df, geometry="geoms") assert_geodataframe_equal(dask_obj.compute(), expected)
def test_from_dask_dataframe_with_dask_geoseries(): df = pd.DataFrame({"x": [0, 1, 2, 3], "y": [1, 2, 3, 4]}) dask_obj = dd.from_pandas(df, npartitions=2) dask_obj = dask_geopandas.from_dask_dataframe( dask_obj, geometry=dask_geopandas.points_from_xy(dask_obj, "x", "y")) expected = df.set_geometry(geopandas.points_from_xy(df["x"], df["y"])) assert_geoseries_equal(dask_obj.geometry.compute(), expected.geometry)
def test_points_from_xy(): x = [1, 2, 3, 4, 5] y = [4, 5, 6, 7, 8] expected = geopandas.points_from_xy(x, y) df = pd.DataFrame({"x": x, "y": y}) ddf = dd.from_pandas(df, npartitions=2) actual = dask_geopandas.points_from_xy(ddf) assert isinstance(actual, dask_geopandas.GeoSeries) assert list(actual) == list(expected) # assign to geometry column and convert to GeoDataFrame df["geometry"] = expected expected = geopandas.GeoDataFrame(df) ddf["geometry"] = actual ddf = dask_geopandas.from_dask_dataframe(ddf) result = ddf.compute() assert_geodataframe_equal(result, expected)
def spatial_join_map_partition(points_filepath, nuts_filepath, blocksize=1_000_000): def spatial_join(gdf_regions): def compute_spatial_join(df): df = sjoin(df, gdf_regions, how='left') return df[['nuts_id', 'amenity', 'osm_id']] return compute_spatial_join gdf_nuts = gpd.read_file(nuts_filepath, ignore_fields=[ 'levl_code', 'cntr_code', 'name_latn', 'nuts_name', 'population' ], driver='GPKG') ddf_amenity = dd.read_csv(points_filepath, blocksize=blocksize) ddf_amenity = dask_geopandas.from_dask_dataframe(ddf_amenity) ddf_amenity = ddf_amenity.set_geometry( dask_geopandas.points_from_xy(ddf_amenity, 'lon', 'lat')) ddf_amenity = ddf_amenity.set_crs(pyproj.CRS(4326)) print(ddf_amenity.npartitions) ddf_amenity = ddf_amenity.map_partitions(spatial_join(gdf_nuts.copy()), meta={ 'nuts_id': object, 'amenity': object, 'osm_id': object }) s = ddf_amenity.groupby(['nuts_id', 'amenity'])['osm_id'] \ .count() \ .compute() s.name = 'counts' df = s.reset_index() return df