Ejemplo n.º 1
0
    def test_explode_pandas_fallback_ignore_index(self):
        d = {
            "col1": [["name1", "name2"], ["name3", "name4"]],
            "geometry": [
                MultiPoint([(1, 2), (3, 4)]),
                MultiPoint([(2, 1), (0, 0)]),
            ],
        }
        gdf = GeoDataFrame(d, crs=4326)
        expected_df = GeoDataFrame(
            {
                "col1": ["name1", "name2", "name3", "name4"],
                "geometry": [
                    MultiPoint([(1, 2), (3, 4)]),
                    MultiPoint([(1, 2), (3, 4)]),
                    MultiPoint([(2, 1), (0, 0)]),
                    MultiPoint([(2, 1), (0, 0)]),
                ],
            },
            crs=4326,
        )

        # Test with column provided as arg
        exploded_df = gdf.explode("col1", ignore_index=True)
        assert_geodataframe_equal(exploded_df, expected_df)

        # Test with column provided as kwarg
        exploded_df = gdf.explode(column="col1", ignore_index=True)
        assert_geodataframe_equal(exploded_df, expected_df)
Ejemplo n.º 2
0
    def test_explode_pandas_multi_index_ignore_index(self, outer_index):
        index = MultiIndex.from_arrays(
            [[outer_index, outer_index, outer_index], [1, 2, 3]],
            names=("first", "second"),
        )
        df = GeoDataFrame(
            {"vals": [1, 2, 3]},
            geometry=[MultiPoint([(x, x), (x, 0)]) for x in range(3)],
            index=index,
        )

        test_df = df.explode(ignore_index=True)

        expected_s = GeoSeries([
            Point(0, 0),
            Point(0, 0),
            Point(1, 1),
            Point(1, 0),
            Point(2, 2),
            Point(2, 0),
        ])
        expected_df = GeoDataFrame({
            "vals": [1, 1, 2, 2, 3, 3],
            "geometry": expected_s
        })
        expected_index = Index(range(len(expected_df)))
        expected_df = expected_df.set_index(expected_index)
        assert_frame_equal(test_df, expected_df)

        # index_parts is ignored if ignore_index=True
        test_df = df.explode(ignore_index=True, index_parts=True)
        assert_frame_equal(test_df, expected_df)
Ejemplo n.º 3
0
 def test_explode_duplicated_index(self):
     df = GeoDataFrame(
         {"vals": [1, 2, 3]},
         geometry=[MultiPoint([(x, x), (x, 0)]) for x in range(3)],
         index=[1, 1, 2],
     )
     test_df = df.explode(index_parts=True)
     expected_index = MultiIndex.from_arrays(
         [[1, 1, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], )
     expected_geometry = GeoSeries(
         [
             Point(0, 0),
             Point(0, 0),
             Point(1, 1),
             Point(1, 0),
             Point(2, 2),
             Point(2, 0),
         ],
         index=expected_index,
     )
     expected_df = GeoDataFrame(
         {"vals": [1, 1, 2, 2, 3, 3]},
         geometry=expected_geometry,
         index=expected_index,
     )
     assert_geodataframe_equal(test_df, expected_df)
Ejemplo n.º 4
0
    def test_explode_order_mixed(self):
        df = GeoDataFrame(
            {"vals": [1, 2, 3]},
            geometry=[MultiPoint([(x, x), (x, 0)])
                      for x in range(2)] + [Point(0, 10)],
            index=[2, 9, 7],
        )
        test_df = df.explode(index_parts=True)

        expected_index = MultiIndex.from_arrays(
            [[2, 2, 9, 9, 7], [0, 1, 0, 1, 0]], )
        expected_geometry = GeoSeries(
            [
                Point(0, 0),
                Point(0, 0),
                Point(1, 1),
                Point(1, 0),
                Point(0, 10),
            ],
            index=expected_index,
        )
        expected_df = GeoDataFrame(
            {"vals": [1, 1, 2, 2, 3]},
            geometry=expected_geometry,
            index=expected_index,
        )
        assert_geodataframe_equal(test_df, expected_df)
Ejemplo n.º 5
0
    def test_explode_pandas_multi_index(self, outer_index):
        index = MultiIndex.from_arrays(
            [[outer_index, outer_index, outer_index], [1, 2, 3]],
            names=("first", "second"),
        )
        df = GeoDataFrame(
            {"vals": [1, 2, 3]},
            geometry=[MultiPoint([(x, x), (x, 0)]) for x in range(3)],
            index=index,
        )

        test_df = df.explode(index_parts=True)

        expected_s = GeoSeries([
            Point(0, 0),
            Point(0, 0),
            Point(1, 1),
            Point(1, 0),
            Point(2, 2),
            Point(2, 0),
        ])
        expected_df = GeoDataFrame({
            "vals": [1, 1, 2, 2, 3, 3],
            "geometry": expected_s
        })
        expected_index = MultiIndex.from_tuples(
            [(outer_index, *pair)
             for pair in [(1, 0), (1, 1), (2, 0), (2, 1), (3, 0), (3, 1)]],
            names=["first", "second", None],
        )
        expected_df = expected_df.set_index(expected_index)
        assert_frame_equal(test_df, expected_df)
Ejemplo n.º 6
0
def explode(gdf: gpd.GeoDataFrame,
            ratios: Iterable[str] = None) -> gpd.GeoDataFrame:
    """
    Explode MultiPolygon to multiple Polygon geometries.

    Args:
        gdf: GeoDataFrame with non-zero-area (Multi)Polygon geometries.
        ratios: Names of columns to rescale by the area fraction of the Polygon
            relative to the MultiPolygon.
            If provided, MultiPolygon cannot self-intersect.
            By default, the original value is used unchanged.

    Raises:
        ValueError: Geometry contains self-intersecting MultiPolygon.

    Returns:
        GeoDataFrame with each Polygon as a separate row in the GeoDataFrame.
        The index is the number of the source row in the input GeoDataFrame.

    """
    check_gdf(gdf)
    gdf = gdf.reset_index(drop=True)
    is_mpoly = gdf.geometry.geom_type == "MultiPolygon"
    if ratios and is_mpoly.any():
        union_area = gdf.geometry[is_mpoly].apply(shapely.ops.unary_union).area
        if (union_area != gdf.geometry[is_mpoly].area).any():
            raise ValueError(
                "Geometry contains self-intersecting MultiPolygon")
    result = gdf.explode().droplevel(1)
    if ratios:
        fraction = result.geometry.area.values / gdf.geometry.area[
            result.index].values
        result[ratios] = result[ratios].multiply(fraction, axis="index")
    return result[gdf.columns]
Ejemplo n.º 7
0
    def test_explode_geodataframe(self, index_name):
        s = GeoSeries([MultiPoint([Point(1, 2), Point(2, 3)]), Point(5, 5)])
        df = GeoDataFrame({'col': [1, 2], 'geometry': s})
        df.index.name = index_name

        test_df = df.explode()

        expected_s = GeoSeries([Point(1, 2), Point(2, 3), Point(5, 5)])
        expected_df = GeoDataFrame({'geometry': expected_s, 'col': [1, 1, 2]})
        expected_index = MultiIndex(levels=[[0, 1], [0, 1]],
                                    labels=[[0, 0, 1], [0, 1, 0]],
                                    names=[index_name, None])
        expected_df = expected_df.set_index(expected_index)
        assert_frame_equal(test_df, expected_df)
Ejemplo n.º 8
0
    def test_explode_geodataframe_no_multiindex(self, index_name):
        # GH1393
        s = GeoSeries([MultiPoint([Point(1, 2), Point(2, 3)]), Point(5, 5)])
        df = GeoDataFrame({"level_1": [1, 2], "geometry": s})
        df.index.name = index_name

        test_df = df.explode(index_parts=False)

        expected_s = GeoSeries([Point(1, 2), Point(2, 3), Point(5, 5)])
        expected_df = GeoDataFrame({"level_1": [1, 1, 2], "geometry": expected_s})

        expected_index = Index([0, 0, 1], name=index_name)
        expected_df = expected_df.set_index(expected_index)
        assert_frame_equal(test_df, expected_df)
Ejemplo n.º 9
0
def process_terraces(bmp_gdf: geopandas.GeoDataFrame, name: str = "terrace"):
    """
    Get representative points (locations) for each BMP feature
    in a geopandas geodataframe
    """

    points = (bmp_gdf.explode().reset_index(drop=True).loc[:, BMPCOLS].pipe(
        _fix_ints, filter(lambda c: c.startswith("Present"), BMPCOLS)).rename(
            columns=lambda c: c.lower().replace("present", "isin")).assign(
                geometry=lambda gdf: gdf.geometry.apply(_smooth_line)).explode(
                ).rename_axis(["obj_id", "geo_id"],
                              axis="index").reset_index())

    return points
Ejemplo n.º 10
0
    def test_explode_geodataframe(self, index_name):
        s = GeoSeries([MultiPoint([Point(1, 2), Point(2, 3)]), Point(5, 5)])
        df = GeoDataFrame({'col': [1, 2], 'geometry': s})
        df.index.name = index_name

        test_df = df.explode()

        expected_s = GeoSeries([Point(1, 2), Point(2, 3), Point(5, 5)])
        expected_df = GeoDataFrame({'col': [1, 1, 2], 'geometry': expected_s})
        expected_index = MultiIndex([[0, 1], [0, 1]],  # levels
                                    [[0, 0, 1], [0, 1, 0]],  # labels/codes
                                    names=[index_name, None])
        expected_df = expected_df.set_index(expected_index)
        assert_frame_equal(test_df, expected_df)
Ejemplo n.º 11
0
    def test_explode_order_no_multi(self):
        df = GeoDataFrame(
            {"vals": [1, 2, 3]},
            geometry=[Point(0, x) for x in range(3)],
            index=[2, 9, 7],
        )
        test_df = df.explode(index_parts=True)

        expected_index = MultiIndex.from_arrays([[2, 9, 7], [0, 0, 0]], )
        expected_df = GeoDataFrame(
            {"vals": [1, 2, 3]},
            geometry=[Point(0, x) for x in range(3)],
            index=expected_index,
        )
        assert_geodataframe_equal(test_df, expected_df)
Ejemplo n.º 12
0
    def test_explode_geodataframe(self, index_name):
        s = GeoSeries([MultiPoint([Point(1, 2), Point(2, 3)]), Point(5, 5)])
        df = GeoDataFrame({"col": [1, 2], "geometry": s})
        df.index.name = index_name

        test_df = df.explode()

        expected_s = GeoSeries([Point(1, 2), Point(2, 3), Point(5, 5)])
        expected_df = GeoDataFrame({"col": [1, 1, 2], "geometry": expected_s})
        expected_index = MultiIndex(
            [[0, 1], [0, 1]],  # levels
            [[0, 0, 1], [0, 1, 0]],  # labels/codes
            names=[index_name, None],
        )
        expected_df = expected_df.set_index(expected_index)
        assert_frame_equal(test_df, expected_df)
Ejemplo n.º 13
0
    def test_explode_geodataframe_level_1(self, index_name):
        # GH1393
        s = GeoSeries([MultiPoint([Point(1, 2), Point(2, 3)]), Point(5, 5)])
        df = GeoDataFrame({"level_1": [1, 2], "geometry": s})
        df.index.name = index_name

        test_df = df.explode()

        expected_s = GeoSeries([Point(1, 2), Point(2, 3), Point(5, 5)])
        expected_df = GeoDataFrame({"level_1": [1, 1, 2], "geometry": expected_s})
        expected_index = MultiIndex(
            [[0, 1], [0, 1]],  # levels
            [[0, 0, 1], [0, 1, 0]],  # labels/codes
            names=[index_name, None],
        )
        expected_df = expected_df.set_index(expected_index)
        if not compat.PANDAS_GE_024:
            expected_df = expected_df[["level_1", "geometry"]]
        assert_frame_equal(test_df, expected_df)
Ejemplo n.º 14
0
    def aggregate_raw_PV_polygons_to_raw_PV_installations(
            self, raw_PV_polygons_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
        """
        Aggregate raw PV polygons belonging to the same PV installation. Raw refers to the fact that the PV area is
        not corrected by the tilt angle. For each PV installation, we compute its raw area and a unique identifier.

        Parameters
        ----------
        raw_PV_polygons_gdf : GeoPandas.GeoDataFrame
            GeoDataFrame which contains all the raw PV polygons which have been detected during the previous pipeline step.
        Returns
        -------
        GeoPandas.GeoDataFrame
           GeoDataFrame with dissolved PV polygon geometries.
        """

        # Buffer polygons, i.e. overwrite the original polygons with their buffered versions
        # Based on our experience, the buffer value should be within [1e-6, 1e-8] degrees
        raw_PV_polygons_gdf["geometry"] = raw_PV_polygons_gdf[
            "geometry"].buffer(1e-6)

        # Dissolve, i.e. aggregate, all PV polygons into one Multipolygon
        raw_PV_polygons_gdf = raw_PV_polygons_gdf.dissolve(by="class")

        # Explode multi-part geometries into multiple single geometries
        raw_PV_installations_gdf = (
            raw_PV_polygons_gdf.explode().reset_index().drop(
                columns=["level_1"]))

        # Compute the raw area for each pv installation
        raw_PV_installations_gdf["raw_area"] = (
            raw_PV_installations_gdf["geometry"].to_crs(epsg=5243).area)

        # Create a unique identifier for each pv installation
        raw_PV_installations_gdf[
            "identifier"] = raw_PV_installations_gdf.index.map(
                lambda id: "polygon_" + str(id))

        return raw_PV_installations_gdf
Ejemplo n.º 15
0
def import_geodataframe(
    self,
    gdf: gpd.GeoDataFrame,
    tablename: str,
    gpd_kwargs: dict = {},
    uid_col: str = "uid",
    explode: bool = False,
) -> None:
    """
    TODO: option to use multipart features instead of exploding to singlepart
    """

    gdf = gdf.copy()

    gdf = helpers.sanitize_df_for_sql(gdf)

    epsg_code = int(str(gdf.crs).split(":")[1])

    # Get a list of all geometry types in the dataframe
    geom_types = list(gdf.geometry.geom_type.unique())

    # If there are multi- and single-part features, explode to singlepart
    if explode:
        # Explode multipart to singlepart and reset the index
        gdf = gdf.explode(index_parts=True)
        gdf["explode"] = gdf.index.to_numpy()
        gdf = gdf.reset_index()

    else:
        if len(geom_types) > 1:
            print(f"Warning! This dataset has {geom_types=}")
            print("Run with explode=True")
            return None

    # Use the non-multi version of the geometry
    geom_type_to_use = min(geom_types, key=len).upper()

    # Replace the 'geom' column with 'geometry'
    if "geom" in gdf.columns:
        gdf["geometry"] = gdf["geom"]
        gdf.drop(labels="geom", axis=1, inplace=True)

    # Drop the 'gid' column
    if "gid" in gdf.columns:
        gdf.drop(labels="gid", axis=1, inplace=True)

    # Rename 'uid' to 'old_uid'
    if uid_col in gdf.columns:
        gdf[f"old_{uid_col}"] = gdf[uid_col]
        gdf.drop(labels=uid_col, axis=1, inplace=True)

    # Build a 'geom' column using geoalchemy2
    # and drop the source 'geometry' column
    gdf["geom"] = gdf["geometry"].apply(
        lambda x: WKTElement(x.wkt, srid=epsg_code))
    gdf.drop(labels="geometry", axis=1, inplace=True)

    # Ensure that the target schema exists
    schema, tbl = helpers.convert_full_tablename_to_parts(tablename)
    self.schema_add(schema)

    # Write geodataframe to SQL database
    engine = sqlalchemy.create_engine(self.uri)
    gdf.to_sql(
        tbl,
        engine,
        schema=schema,
        dtype={"geom": Geometry(geom_type_to_use, srid=epsg_code)},
        **gpd_kwargs,
    )
    engine.dispose()

    self.table_add_uid_column(tablename)
    self.gis_table_add_spatial_index(tablename)
Ejemplo n.º 16
0
def explode_multipolygons(polygons: geopandas.GeoDataFrame):
    return polygons.explode()