コード例 #1
0
ファイル: sjoin.py プロジェクト: adriangb/geopandas
def _basic_checks(
    left_df, right_df, how, lsuffix, rsuffix, allowed_hows=("left", "right", "inner")
):
    """Checks the validity of join input parameters.

    `how` must be one of the valid options.
    `'index_'` concatenated with `lsuffix` or `rsuffix` must not already
    exist as columns in the left or right data frames.

    Parameters
    ------------
    left_df : GeoDataFrame
    right_df : GeoData Frame
    how : str, one of allowed_hows
        join type
    lsuffix : str
        left index suffix
    rsuffix : str
        right index suffix
    """
    if not isinstance(left_df, GeoDataFrame):
        raise ValueError(
            "'left_df' should be GeoDataFrame, got {}".format(type(left_df))
        )

    if not isinstance(right_df, GeoDataFrame):
        raise ValueError(
            "'right_df' should be GeoDataFrame, got {}".format(type(right_df))
        )

    if how not in allowed_hows:
        raise ValueError(
            '`how` was "{}" but is expected to be in {}'.format(how, allowed_hows)
        )

    if not _check_crs(left_df, right_df):
        _crs_mismatch_warn(left_df, right_df, stacklevel=4)

    index_left = "index_{}".format(lsuffix)
    index_right = "index_{}".format(rsuffix)

    # due to GH 352
    if any(left_df.columns.isin([index_left, index_right])) or any(
        right_df.columns.isin([index_left, index_right])
    ):
        raise ValueError(
            "'{0}' and '{1}' cannot be names in the frames being"
            " joined".format(index_left, index_right)
        )
コード例 #2
0
def sjoin(left_df,
          right_df,
          how="inner",
          op="intersects",
          lsuffix="left",
          rsuffix="right"):
    """Spatial join of two GeoDataFrames.

    Parameters
    ----------
    left_df, right_df : GeoDataFrames
    how : string, default 'inner'
        The type of join:

        * 'left': use keys from left_df; retain only left_df geometry column
        * 'right': use keys from right_df; retain only right_df geometry column
        * 'inner': use intersection of keys from both dfs; retain only
          left_df geometry column
    op : string, default 'intersects'
        Binary predicate, one of {'intersects', 'contains', 'within'}.
        See http://shapely.readthedocs.io/en/latest/manual.html#binary-predicates.
    lsuffix : string, default 'left'
        Suffix to apply to overlapping column names (left GeoDataFrame).
    rsuffix : string, default 'right'
        Suffix to apply to overlapping column names (right GeoDataFrame).

    """
    if not isinstance(left_df, GeoDataFrame):
        raise ValueError("'left_df' should be GeoDataFrame, got {}".format(
            type(left_df)))

    if not isinstance(right_df, GeoDataFrame):
        raise ValueError("'right_df' should be GeoDataFrame, got {}".format(
            type(right_df)))

    allowed_hows = ["left", "right", "inner"]
    if how not in allowed_hows:
        raise ValueError('`how` was "%s" but is expected to be in %s' %
                         (how, allowed_hows))

    allowed_ops = ["contains", "within", "intersects"]
    if op not in allowed_ops:
        raise ValueError('`op` was "%s" but is expected to be in %s' %
                         (op, allowed_ops))

    if not _check_crs(left_df, right_df):
        _crs_mismatch_warn(left_df, right_df, stacklevel=3)

    index_left = "index_%s" % lsuffix
    index_right = "index_%s" % rsuffix

    # due to GH 352
    if any(left_df.columns.isin([index_left, index_right])) or any(
            right_df.columns.isin([index_left, index_right])):
        raise ValueError("'{0}' and '{1}' cannot be names in the frames being"
                         " joined".format(index_left, index_right))

    # Attempt to re-use spatial indexes, otherwise generate the spatial index
    # for the longer dataframe. If we are joining to an empty dataframe,
    # don't bother generating the index.
    if right_df._sindex_generated or (not left_df._sindex_generated and
                                      right_df.shape[0] > left_df.shape[0]):
        tree_idx = right_df.sindex if len(left_df) > 0 else None
        tree_idx_right = True
    else:
        tree_idx = left_df.sindex if len(right_df) > 0 else None
        tree_idx_right = False

    # the rtree spatial index only allows limited (numeric) index types, but an
    # index in geopandas may be any arbitrary dtype. so reset both indices now
    # and store references to the original indices, to be reaffixed later.
    # GH 352
    left_df = left_df.copy(deep=True)
    try:
        left_index_name = left_df.index.name
        left_df.index = left_df.index.rename(index_left)
    except TypeError:
        index_left = [
            "index_%s" % lsuffix + str(pos)
            for pos, ix in enumerate(left_df.index.names)
        ]
        left_index_name = left_df.index.names
        left_df.index = left_df.index.rename(index_left)
    left_df = left_df.reset_index()

    right_df = right_df.copy(deep=True)
    try:
        right_index_name = right_df.index.name
        right_df.index = right_df.index.rename(index_right)
    except TypeError:
        index_right = [
            "index_%s" % rsuffix + str(pos)
            for pos, ix in enumerate(right_df.index.names)
        ]
        right_index_name = right_df.index.names
        right_df.index = right_df.index.rename(index_right)
    right_df = right_df.reset_index()

    if op == "within":
        # within implemented as the inverse of contains; swap names
        left_df, right_df = right_df, left_df
        tree_idx_right = not tree_idx_right

    r_idx = np.empty((0, 0))
    l_idx = np.empty((0, 0))
    # get rtree spatial index. If tree_idx does not exist, it is due to either a
    # failure to generate the index (e.g., if the column is empty), or the
    # other dataframe is empty so it wasn't necessary to generate it.
    if tree_idx_right and tree_idx:
        idxmatch = left_df.geometry.apply(lambda x: x.bounds).apply(
            lambda x: list(tree_idx.intersection(x)) if not x == () else [])
        idxmatch = idxmatch[idxmatch.apply(len) > 0]
        # indexes of overlapping boundaries
        if idxmatch.shape[0] > 0:
            r_idx = np.concatenate(idxmatch.values)
            l_idx = np.concatenate([[i] * len(v)
                                    for i, v in idxmatch.iteritems()])
    elif not tree_idx_right and tree_idx:
        # tree_idx_df == 'left'
        idxmatch = right_df.geometry.apply(lambda x: x.bounds).apply(
            lambda x: list(tree_idx.intersection(x)) if not x == () else [])
        idxmatch = idxmatch[idxmatch.apply(len) > 0]
        if idxmatch.shape[0] > 0:
            # indexes of overlapping boundaries
            l_idx = np.concatenate(idxmatch.values)
            r_idx = np.concatenate([[i] * len(v)
                                    for i, v in idxmatch.iteritems()])

    if len(r_idx) > 0 and len(l_idx) > 0:
        if compat.USE_PYGEOS:
            import pygeos

            predicate_d = {
                "intersects": pygeos.intersects,
                "contains": pygeos.contains,
                "within": pygeos.contains,
            }
            check_predicates = predicate_d[op]
        else:
            # Vectorize predicate operations
            def find_intersects(a1, a2):
                return a1.intersects(a2)

            def find_contains(a1, a2):
                return a1.contains(a2)

            predicate_d = {
                "intersects": find_intersects,
                "contains": find_contains,
                "within": find_contains,
            }

            check_predicates = np.vectorize(predicate_d[op])

        if compat.USE_PYGEOS:
            res = check_predicates(
                left_df.geometry[l_idx].values.data,
                right_df[right_df.geometry.name][r_idx].values.data,
            )
        else:
            res = check_predicates(
                left_df.geometry.apply(lambda x: prepared.prep(x))[l_idx],
                right_df[right_df.geometry.name][r_idx],
            )

        result = pd.DataFrame(np.column_stack([l_idx, r_idx, res]))

        result.columns = ["_key_left", "_key_right", "match_bool"]
        result = pd.DataFrame(result[result["match_bool"] == 1]).drop(
            "match_bool", axis=1)

    else:
        # when output from the join has no overlapping geometries
        result = pd.DataFrame(columns=["_key_left", "_key_right"], dtype=float)

    if op == "within":
        # within implemented as the inverse of contains; swap names
        left_df, right_df = right_df, left_df
        result = result.rename(columns={
            "_key_left": "_key_right",
            "_key_right": "_key_left"
        })

    if how == "inner":
        result = result.set_index("_key_left")
        joined = (left_df.merge(result, left_index=True,
                                right_index=True).merge(
                                    right_df.drop(right_df.geometry.name,
                                                  axis=1),
                                    left_on="_key_right",
                                    right_index=True,
                                    suffixes=("_%s" % lsuffix,
                                              "_%s" % rsuffix),
                                ).set_index(index_left).drop(["_key_right"],
                                                             axis=1))
        if isinstance(index_left, list):
            joined.index.names = left_index_name
        else:
            joined.index.name = left_index_name

    elif how == "left":
        result = result.set_index("_key_left")
        joined = (left_df.merge(result,
                                left_index=True,
                                right_index=True,
                                how="left").merge(
                                    right_df.drop(right_df.geometry.name,
                                                  axis=1),
                                    how="left",
                                    left_on="_key_right",
                                    right_index=True,
                                    suffixes=("_%s" % lsuffix,
                                              "_%s" % rsuffix),
                                ).set_index(index_left).drop(["_key_right"],
                                                             axis=1))
        if isinstance(index_left, list):
            joined.index.names = left_index_name
        else:
            joined.index.name = left_index_name

    else:  # how == 'right':
        joined = (left_df.drop(left_df.geometry.name, axis=1).merge(
            result.merge(right_df,
                         left_on="_key_right",
                         right_index=True,
                         how="right"),
            left_index=True,
            right_on="_key_left",
            how="right",
        ).set_index(index_right).drop(["_key_left", "_key_right"], axis=1))
        if isinstance(index_right, list):
            joined.index.names = right_index_name
        else:
            joined.index.name = right_index_name

    return joined
コード例 #3
0
ファイル: overlay.py プロジェクト: xiongbillow/geopandas
def overlay(df1,
            df2,
            how="intersection",
            make_valid=True,
            keep_geom_type=True):
    """Perform spatial overlay between two GeoDataFrames.

    Currently only supports data GeoDataFrames with uniform geometry types,
    i.e. containing only (Multi)Polygons, or only (Multi)Points, or a
    combination of (Multi)LineString and LinearRing shapes.
    Implements several methods that are all effectively subsets of the union.

    Parameters
    ----------
    df1 : GeoDataFrame
    df2 : GeoDataFrame
    how : string
        Method of spatial overlay: 'intersection', 'union',
        'identity', 'symmetric_difference' or 'difference'.
    keep_geom_type : bool
        If True, return only geometries of the same geometry type as df1 has,
        if False, return all resulting gemetries.

    Returns
    -------
    df : GeoDataFrame
        GeoDataFrame with new set of polygons and attributes
        resulting from the overlay

    """
    # Allowed operations
    allowed_hows = [
        "intersection",
        "union",
        "identity",
        "symmetric_difference",
        "difference",  # aka erase
    ]
    # Error Messages
    if how not in allowed_hows:
        raise ValueError("`how` was '{0}' but is expected to be in {1}".format(
            how, allowed_hows))

    if isinstance(df1, GeoSeries) or isinstance(df2, GeoSeries):
        raise NotImplementedError("overlay currently only implemented for "
                                  "GeoDataFrames")

    if not _check_crs(df1, df2):
        _crs_mismatch_warn(df1, df2, stacklevel=3)

    polys = ["Polygon", "MultiPolygon"]
    lines = ["LineString", "MultiLineString", "LinearRing"]
    points = ["Point", "MultiPoint"]
    for i, df in enumerate([df1, df2]):
        poly_check = df.geom_type.isin(polys).any()
        lines_check = df.geom_type.isin(lines).any()
        points_check = df.geom_type.isin(points).any()
        if sum([poly_check, lines_check, points_check]) > 1:
            raise NotImplementedError(
                "df{} contains mixed geometry types.".format(i + 1))

    # Computations
    df1 = df1.copy()
    df2 = df2.copy()
    if df1.geom_type.isin(polys).all():
        df1[df1._geometry_column_name] = df1.geometry.buffer(0)
    if df2.geom_type.isin(polys).all():
        df2[df2._geometry_column_name] = df2.geometry.buffer(0)

    with warnings.catch_warnings(
    ):  # CRS checked above, supress array-level warning
        warnings.filterwarnings("ignore",
                                message="CRS mismatch between the CRS")
        if how == "difference":
            return _overlay_difference(df1, df2)
        elif how == "intersection":
            result = _overlay_intersection(df1, df2)
        elif how == "symmetric_difference":
            result = _overlay_symmetric_diff(df1, df2)
        elif how == "union":
            result = _overlay_union(df1, df2)
        elif how == "identity":
            dfunion = _overlay_union(df1, df2)
            result = dfunion[dfunion["__idx1"].notnull()].copy()

    if keep_geom_type:
        type = df1.geom_type.iloc[0]
        if type in polys:
            result = result.loc[result.geom_type.isin(polys)]
        elif type in lines:
            result = result.loc[result.geom_type.isin(lines)]
        elif type in points:
            result = result.loc[result.geom_type.isin(points)]
        else:
            raise TypeError(
                "`keep_geom_type` does not support {}.".format(type))

    result.reset_index(drop=True, inplace=True)
    result.drop(["__idx1", "__idx2"], axis=1, inplace=True)
    return result
コード例 #4
0
ファイル: test_array.py プロジェクト: visr/geopandas
def test_check_crs():
    t1 = T.copy()
    t1.crs = 4326
    assert _check_crs(t1, T) is False
    assert _check_crs(t1, t1) is True
    assert _check_crs(t1, T, allow_none=True) is True
コード例 #5
0
ファイル: overlay.py プロジェクト: sourcery-ai-bot/geopandas
def overlay(df1,
            df2,
            how="intersection",
            keep_geom_type=None,
            make_valid=True):
    """Perform spatial overlay between two GeoDataFrames.

    Currently only supports data GeoDataFrames with uniform geometry types,
    i.e. containing only (Multi)Polygons, or only (Multi)Points, or a
    combination of (Multi)LineString and LinearRing shapes.
    Implements several methods that are all effectively subsets of the union.

    See the User Guide page :doc:`../../user_guide/set_operations` for details.

    Parameters
    ----------
    df1 : GeoDataFrame
    df2 : GeoDataFrame
    how : string
        Method of spatial overlay: 'intersection', 'union',
        'identity', 'symmetric_difference' or 'difference'.
    keep_geom_type : bool
        If True, return only geometries of the same geometry type as df1 has,
        if False, return all resulting geometries. Default is None,
        which will set keep_geom_type to True but warn upon dropping
        geometries.
    make_valid : bool, default True
        If True, any invalid input geometries are corrected with a call to `buffer(0)`,
        if False, a `ValueError` is raised if any input geometries are invalid.

    Returns
    -------
    df : GeoDataFrame
        GeoDataFrame with new set of polygons and attributes
        resulting from the overlay

    Examples
    --------
    >>> from shapely.geometry import Polygon
    >>> polys1 = geopandas.GeoSeries([Polygon([(0,0), (2,0), (2,2), (0,2)]),
    ...                               Polygon([(2,2), (4,2), (4,4), (2,4)])])
    >>> polys2 = geopandas.GeoSeries([Polygon([(1,1), (3,1), (3,3), (1,3)]),
    ...                               Polygon([(3,3), (5,3), (5,5), (3,5)])])
    >>> df1 = geopandas.GeoDataFrame({'geometry': polys1, 'df1_data':[1,2]})
    >>> df2 = geopandas.GeoDataFrame({'geometry': polys2, 'df2_data':[1,2]})

    >>> geopandas.overlay(df1, df2, how='union')
       df1_data  df2_data                                           geometry
    0       1.0       1.0  POLYGON ((1.00000 2.00000, 2.00000 2.00000, 2....
    1       2.0       1.0  POLYGON ((3.00000 2.00000, 2.00000 2.00000, 2....
    2       2.0       2.0  POLYGON ((3.00000 4.00000, 4.00000 4.00000, 4....
    3       1.0       NaN  POLYGON ((2.00000 1.00000, 2.00000 0.00000, 0....
    4       2.0       NaN  MULTIPOLYGON (((3.00000 3.00000, 4.00000 3.000...
    5       NaN       1.0  MULTIPOLYGON (((2.00000 2.00000, 3.00000 2.000...
    6       NaN       2.0  POLYGON ((3.00000 4.00000, 3.00000 5.00000, 5....

    >>> geopandas.overlay(df1, df2, how='intersection')
       df1_data  df2_data                                           geometry
    0         1         1  POLYGON ((1.00000 2.00000, 2.00000 2.00000, 2....
    1         2         1  POLYGON ((3.00000 2.00000, 2.00000 2.00000, 2....
    2         2         2  POLYGON ((3.00000 4.00000, 4.00000 4.00000, 4....

    >>> geopandas.overlay(df1, df2, how='symmetric_difference')
       df1_data  df2_data                                           geometry
    0       1.0       NaN  POLYGON ((2.00000 1.00000, 2.00000 0.00000, 0....
    1       2.0       NaN  MULTIPOLYGON (((3.00000 3.00000, 4.00000 3.000...
    2       NaN       1.0  MULTIPOLYGON (((2.00000 2.00000, 3.00000 2.000...
    3       NaN       2.0  POLYGON ((3.00000 4.00000, 3.00000 5.00000, 5....

    >>> geopandas.overlay(df1, df2, how='difference')
                                                geometry  df1_data
    0  POLYGON ((2.00000 1.00000, 2.00000 0.00000, 0....         1
    1  MULTIPOLYGON (((2.00000 3.00000, 2.00000 4.000...         2

    >>> geopandas.overlay(df1, df2, how='identity')
       df1_data  df2_data                                           geometry
    0       1.0       1.0  POLYGON ((1.00000 2.00000, 2.00000 2.00000, 2....
    1       2.0       1.0  POLYGON ((3.00000 2.00000, 2.00000 2.00000, 2....
    2       2.0       2.0  POLYGON ((3.00000 4.00000, 4.00000 4.00000, 4....
    3       1.0       NaN  POLYGON ((2.00000 1.00000, 2.00000 0.00000, 0....
    4       2.0       NaN  MULTIPOLYGON (((3.00000 3.00000, 4.00000 3.000...

    See also
    --------
    sjoin : spatial join

    Notes
    ------
    Every operation in GeoPandas is planar, i.e. the potential third
    dimension is not taken into account.
    """
    # Allowed operations
    allowed_hows = [
        "intersection",
        "union",
        "identity",
        "symmetric_difference",
        "difference",  # aka erase
    ]
    # Error Messages
    if how not in allowed_hows:
        raise ValueError("`how` was '{0}' but is expected to be in {1}".format(
            how, allowed_hows))

    if isinstance(df1, GeoSeries) or isinstance(df2, GeoSeries):
        raise NotImplementedError("overlay currently only implemented for "
                                  "GeoDataFrames")

    if not _check_crs(df1, df2):
        _crs_mismatch_warn(df1, df2, stacklevel=3)

    if keep_geom_type is None:
        keep_geom_type = True
        keep_geom_type_warning = True
    else:
        keep_geom_type_warning = False

    polys = ["Polygon", "MultiPolygon"]
    lines = ["LineString", "MultiLineString", "LinearRing"]
    points = ["Point", "MultiPoint"]
    for i, df in enumerate([df1, df2]):
        poly_check = df.geom_type.isin(polys).any()
        lines_check = df.geom_type.isin(lines).any()
        points_check = df.geom_type.isin(points).any()
        if sum([poly_check, lines_check, points_check]) > 1:
            raise NotImplementedError(
                "df{} contains mixed geometry types.".format(i + 1))

    # Computations
    def _make_valid(df):
        df = df.copy()
        if df.geom_type.isin(polys).all():
            mask = ~df.geometry.is_valid
            col = df._geometry_column_name
            if make_valid:
                df.loc[mask, col] = df.loc[mask, col].buffer(0)
            elif mask.any():
                raise ValueError(
                    "You have passed make_valid=False along with "
                    f"{mask.sum()} invalid input geometries. "
                    "Use make_valid=True or make sure that all geometries "
                    "are valid before using overlay.")
        return df

    df1 = _make_valid(df1)
    df2 = _make_valid(df2)

    with warnings.catch_warnings(
    ):  # CRS checked above, supress array-level warning
        warnings.filterwarnings("ignore",
                                message="CRS mismatch between the CRS")
        if how == "difference":
            return _overlay_difference(df1, df2)
        elif how == "intersection":
            result = _overlay_intersection(df1, df2)
        elif how == "symmetric_difference":
            result = _overlay_symmetric_diff(df1, df2)
        elif how == "union":
            result = _overlay_union(df1, df2)
        elif how == "identity":
            dfunion = _overlay_union(df1, df2)
            result = dfunion[dfunion["__idx1"].notnull()].copy()

    if keep_geom_type:
        key_order = result.keys()
        exploded = result.reset_index(drop=True).explode()
        exploded = exploded.reset_index(level=0)

        orig_num_geoms = result.shape[0]
        geom_type = df1.geom_type.iloc[0]
        if geom_type in polys:
            exploded = exploded.loc[exploded.geom_type.isin(polys)]
        elif geom_type in lines:
            exploded = exploded.loc[exploded.geom_type.isin(lines)]
        elif geom_type in points:
            exploded = exploded.loc[exploded.geom_type.isin(points)]
        else:
            raise TypeError(
                "`keep_geom_type` does not support {}.".format(geom_type))

        # level_0 created with above reset_index operation
        # and represents the original geometry collections
        result = exploded.dissolve(by="level_0")[key_order]

        if (result.shape[0] != orig_num_geoms) and keep_geom_type_warning:
            num_dropped = orig_num_geoms - result.shape[0]
            warnings.warn(
                "`keep_geom_type=True` in overlay resulted in {} dropped "
                "geometries of different geometry types than df1 has. "
                "Set `keep_geom_type=False` to retain all "
                "geometries".format(num_dropped),
                UserWarning,
                stacklevel=2,
            )

    result.reset_index(drop=True, inplace=True)
    result.drop(["__idx1", "__idx2"], axis=1, inplace=True)
    return result
コード例 #6
0
def clip(gdf, mask, keep_geom_type=False):
    """Clip points, lines, or polygon geometries to the mask extent.

    Both layers must be in the same Coordinate Reference System (CRS).
    The `gdf` will be clipped to the full extent of the clip object.

    If there are multiple polygons in mask, data from `gdf` will be
    clipped to the total boundary of all polygons in mask.

    Parameters
    ----------
    gdf : GeoDataFrame or GeoSeries
        Vector layer (point, line, polygon) to be clipped to mask.
    mask : GeoDataFrame, GeoSeries, (Multi)Polygon
        Polygon vector layer used to clip `gdf`.
        The mask's geometry is dissolved into one geometric feature
        and intersected with `gdf`.
    keep_geom_type : boolean, default False
        If True, return only geometries of original type in case of intersection
        resulting in multiple geometry types or GeometryCollections.
        If False, return all resulting geometries (potentially mixed-types).

    Returns
    -------
    GeoDataFrame or GeoSeries
         Vector data (points, lines, polygons) from `gdf` clipped to
         polygon boundary from mask.

    Examples
    --------
    Clip points (global cities) with a polygon (the South American continent):

    >>> world = geopandas.read_file(
    ...     geopandas.datasets.get_path('naturalearth_lowres'))
    >>> south_america = world[world['continent'] == "South America"]
    >>> capitals = geopandas.read_file(
    ...     geopandas.datasets.get_path('naturalearth_cities'))
    >>> capitals.shape
    (202, 2)

    >>> sa_capitals = geopandas.clip(capitals, south_america)
    >>> sa_capitals.shape
    (12, 2)
    """
    if not isinstance(gdf, (GeoDataFrame, GeoSeries)):
        raise TypeError(
            "'gdf' should be GeoDataFrame or GeoSeries, got {}".format(
                type(gdf)))

    if not isinstance(mask, (GeoDataFrame, GeoSeries, Polygon, MultiPolygon)):
        raise TypeError("'mask' should be GeoDataFrame, GeoSeries or"
                        "(Multi)Polygon, got {}".format(type(mask)))

    if isinstance(mask, (GeoDataFrame, GeoSeries)):
        if not _check_crs(gdf, mask):
            _crs_mismatch_warn(gdf, mask, stacklevel=3)

    if isinstance(mask, (GeoDataFrame, GeoSeries)):
        box_mask = mask.total_bounds
    else:
        box_mask = mask.bounds
    box_gdf = gdf.total_bounds
    if not (((box_mask[0] <= box_gdf[2]) and (box_gdf[0] <= box_mask[2])) and
            ((box_mask[1] <= box_gdf[3]) and (box_gdf[1] <= box_mask[3]))):
        return gdf.iloc[:0]

    if isinstance(mask, (GeoDataFrame, GeoSeries)):
        poly = mask.geometry.unary_union
    else:
        poly = mask

    geom_types = gdf.geometry.type
    poly_idx = np.asarray((geom_types == "Polygon")
                          | (geom_types == "MultiPolygon"))
    line_idx = np.asarray((geom_types == "LineString")
                          | (geom_types == "LinearRing")
                          | (geom_types == "MultiLineString"))
    point_idx = np.asarray((geom_types == "Point")
                           | (geom_types == "MultiPoint"))
    geomcoll_idx = np.asarray((geom_types == "GeometryCollection"))

    if point_idx.any():
        point_gdf = _clip_points(gdf[point_idx], poly)
    else:
        point_gdf = None

    if poly_idx.any():
        poly_gdf = _clip_line_poly(gdf[poly_idx], poly)
    else:
        poly_gdf = None

    if line_idx.any():
        line_gdf = _clip_line_poly(gdf[line_idx], poly)
    else:
        line_gdf = None

    if geomcoll_idx.any():
        geomcoll_gdf = _clip_line_poly(gdf[geomcoll_idx], poly)
    else:
        geomcoll_gdf = None

    order = pd.Series(range(len(gdf)), index=gdf.index)
    concat = pd.concat([point_gdf, line_gdf, poly_gdf, geomcoll_gdf])

    if keep_geom_type:
        geomcoll_concat = (concat.geom_type == "GeometryCollection").any()
        geomcoll_orig = geomcoll_idx.any()

        new_collection = geomcoll_concat and not geomcoll_orig

        if geomcoll_orig:
            warnings.warn("keep_geom_type can not be called on a "
                          "GeoDataFrame with GeometryCollection.")
        else:
            polys = ["Polygon", "MultiPolygon"]
            lines = ["LineString", "MultiLineString", "LinearRing"]
            points = ["Point", "MultiPoint"]

            # Check that the gdf for multiple geom types (points, lines and/or polys)
            orig_types_total = sum([
                gdf.geom_type.isin(polys).any(),
                gdf.geom_type.isin(lines).any(),
                gdf.geom_type.isin(points).any(),
            ])

            # Check how many geometry types are in the clipped GeoDataFrame
            clip_types_total = sum([
                concat.geom_type.isin(polys).any(),
                concat.geom_type.isin(lines).any(),
                concat.geom_type.isin(points).any(),
            ])

            # Check there aren't any new geom types in the clipped GeoDataFrame
            more_types = orig_types_total < clip_types_total

            if orig_types_total > 1:
                warnings.warn(
                    "keep_geom_type can not be called on a mixed type GeoDataFrame."
                )
            elif new_collection or more_types:
                orig_type = gdf.geom_type.iloc[0]
                if new_collection:
                    concat = concat.explode()
                if orig_type in polys:
                    concat = concat.loc[concat.geom_type.isin(polys)]
                elif orig_type in lines:
                    concat = concat.loc[concat.geom_type.isin(lines)]

    # Return empty GeoDataFrame or GeoSeries if no shapes remain
    if len(concat) == 0:
        return gdf.iloc[:0]

    # Preserve the original order of the input
    if isinstance(concat, GeoDataFrame):
        concat["_order"] = order
        return concat.sort_values(by="_order").drop(columns="_order")
    else:
        concat = GeoDataFrame(geometry=concat)
        concat["_order"] = order
        return concat.sort_values(by="_order").geometry
コード例 #7
0
ファイル: sjoin.py プロジェクト: sourabhmadur/Dalla_Neta
def sjoin(left_df,
          right_df,
          how="inner",
          op="intersects",
          lsuffix="left",
          rsuffix="right"):
    """Spatial join of two GeoDataFrames.

    Parameters
    ----------
    left_df, right_df : GeoDataFrames
    how : string, default 'inner'
        The type of join:

        * 'left': use keys from left_df; retain only left_df geometry column
        * 'right': use keys from right_df; retain only right_df geometry column
        * 'inner': use intersection of keys from both dfs; retain only
          left_df geometry column
    op : string, default 'intersects'
        Binary predicate, one of {'intersects', 'contains', 'within'}.
        See http://shapely.readthedocs.io/en/latest/manual.html#binary-predicates.
    lsuffix : string, default 'left'
        Suffix to apply to overlapping column names (left GeoDataFrame).
    rsuffix : string, default 'right'
        Suffix to apply to overlapping column names (right GeoDataFrame).

    """
    if not isinstance(left_df, GeoDataFrame):
        raise ValueError("'left_df' should be GeoDataFrame, got {}".format(
            type(left_df)))

    if not isinstance(right_df, GeoDataFrame):
        raise ValueError("'right_df' should be GeoDataFrame, got {}".format(
            type(right_df)))

    allowed_hows = ["left", "right", "inner"]
    if how not in allowed_hows:
        raise ValueError('`how` was "%s" but is expected to be in %s' %
                         (how, allowed_hows))

    allowed_ops = ["contains", "within", "intersects"]
    if op not in allowed_ops:
        raise ValueError('`op` was "%s" but is expected to be in %s' %
                         (op, allowed_ops))

    if not _check_crs(left_df, right_df):
        _crs_mismatch_warn(left_df, right_df, stacklevel=3)

    index_left = "index_%s" % lsuffix
    index_right = "index_%s" % rsuffix

    # due to GH 352
    if any(left_df.columns.isin([index_left, index_right])) or any(
            right_df.columns.isin([index_left, index_right])):
        raise ValueError("'{0}' and '{1}' cannot be names in the frames being"
                         " joined".format(index_left, index_right))

    # query index
    with warnings.catch_warnings():
        # We don't need to show our own warning here
        # TODO remove this once the deprecation has been enforced
        warnings.filterwarnings("ignore", "Generated spatial index is empty",
                                FutureWarning)
        if op == "within":
            # within is implemented as the inverse of contains
            # contains is a faster predicate
            # see discussion at https://github.com/geopandas/geopandas/pull/1421
            predicate = "contains"
            sindex = left_df.sindex
            input_geoms = right_df.geometry
        else:
            # all other predicates are symmetric
            # keep them the same
            predicate = op
            sindex = right_df.sindex
            input_geoms = left_df.geometry

    if sindex:
        l_idx, r_idx = sindex.query_bulk(input_geoms,
                                         predicate=predicate,
                                         sort=False)
        result = pd.DataFrame({"_key_left": l_idx, "_key_right": r_idx})
    else:
        # when sindex is empty / has no valid geometries
        result = pd.DataFrame(columns=["_key_left", "_key_right"], dtype=float)
    if op == "within":
        # within is implemented as the inverse of contains
        # flip back the results
        result = result.rename(columns={
            "_key_left": "_key_right",
            "_key_right": "_key_left"
        })

    # the spatial index only allows limited (numeric) index types, but an
    # index in geopandas may be any arbitrary dtype. so reset both indices now
    # and store references to the original indices, to be reaffixed later.
    # GH 352
    left_df = left_df.copy(deep=True)
    try:
        left_index_name = left_df.index.name
        left_df.index = left_df.index.rename(index_left)
    except TypeError:
        index_left = [
            "index_%s" % lsuffix + str(pos)
            for pos, ix in enumerate(left_df.index.names)
        ]
        left_index_name = left_df.index.names
        left_df.index = left_df.index.rename(index_left)
    left_df = left_df.reset_index()

    right_df = right_df.copy(deep=True)
    try:
        right_index_name = right_df.index.name
        right_df.index = right_df.index.rename(index_right)
    except TypeError:
        index_right = [
            "index_%s" % rsuffix + str(pos)
            for pos, ix in enumerate(right_df.index.names)
        ]
        right_index_name = right_df.index.names
        right_df.index = right_df.index.rename(index_right)
    right_df = right_df.reset_index()

    # perform join on the dataframes
    if how == "inner":
        result = result.set_index("_key_left")
        joined = (left_df.merge(result, left_index=True,
                                right_index=True).merge(
                                    right_df.drop(right_df.geometry.name,
                                                  axis=1),
                                    left_on="_key_right",
                                    right_index=True,
                                    suffixes=("_%s" % lsuffix,
                                              "_%s" % rsuffix),
                                ).set_index(index_left).drop(["_key_right"],
                                                             axis=1))
        if isinstance(index_left, list):
            joined.index.names = left_index_name
        else:
            joined.index.name = left_index_name

    elif how == "left":
        result = result.set_index("_key_left")
        joined = (left_df.merge(result,
                                left_index=True,
                                right_index=True,
                                how="left").merge(
                                    right_df.drop(right_df.geometry.name,
                                                  axis=1),
                                    how="left",
                                    left_on="_key_right",
                                    right_index=True,
                                    suffixes=("_%s" % lsuffix,
                                              "_%s" % rsuffix),
                                ).set_index(index_left).drop(["_key_right"],
                                                             axis=1))
        if isinstance(index_left, list):
            joined.index.names = left_index_name
        else:
            joined.index.name = left_index_name

    else:  # how == 'right':
        joined = (left_df.drop(left_df.geometry.name, axis=1).merge(
            result.merge(right_df,
                         left_on="_key_right",
                         right_index=True,
                         how="right"),
            left_index=True,
            right_on="_key_left",
            how="right",
        ).set_index(index_right).drop(["_key_left", "_key_right"], axis=1))
        if isinstance(index_right, list):
            joined.index.names = right_index_name
        else:
            joined.index.name = right_index_name

    return joined
コード例 #8
0
ファイル: overlay.py プロジェクト: jdmcbr/geopandas
def overlay(df1, df2, how="intersection", keep_geom_type=None, make_valid=True):
    """Perform spatial overlay between two GeoDataFrames.

    Currently only supports data GeoDataFrames with uniform geometry types,
    i.e. containing only (Multi)Polygons, or only (Multi)Points, or a
    combination of (Multi)LineString and LinearRing shapes.
    Implements several methods that are all effectively subsets of the union.

    See the User Guide page :doc:`../../user_guide/set_operations` for details.

    Parameters
    ----------
    df1 : GeoDataFrame
    df2 : GeoDataFrame
    how : string
        Method of spatial overlay: 'intersection', 'union',
        'identity', 'symmetric_difference' or 'difference'.
    keep_geom_type : bool
        If True, return only geometries of the same geometry type as df1 has,
        if False, return all resulting geometries. Default is None,
        which will set keep_geom_type to True but warn upon dropping
        geometries.
    make_valid : bool, default True
        If True, any invalid input geometries are corrected with a call to `buffer(0)`,
        if False, a `ValueError` is raised if any input geometries are invalid.

    Returns
    -------
    df : GeoDataFrame
        GeoDataFrame with new set of polygons and attributes
        resulting from the overlay

    Examples
    --------
    >>> from shapely.geometry import Polygon
    >>> polys1 = geopandas.GeoSeries([Polygon([(0,0), (2,0), (2,2), (0,2)]),
    ...                               Polygon([(2,2), (4,2), (4,4), (2,4)])])
    >>> polys2 = geopandas.GeoSeries([Polygon([(1,1), (3,1), (3,3), (1,3)]),
    ...                               Polygon([(3,3), (5,3), (5,5), (3,5)])])
    >>> df1 = geopandas.GeoDataFrame({'geometry': polys1, 'df1_data':[1,2]})
    >>> df2 = geopandas.GeoDataFrame({'geometry': polys2, 'df2_data':[1,2]})

    >>> geopandas.overlay(df1, df2, how='union')
       df1_data  df2_data                                           geometry
    0       1.0       1.0  POLYGON ((2.00000 2.00000, 2.00000 1.00000, 1....
    1       2.0       1.0  POLYGON ((2.00000 2.00000, 2.00000 3.00000, 3....
    2       2.0       2.0  POLYGON ((4.00000 4.00000, 4.00000 3.00000, 3....
    3       1.0       NaN  POLYGON ((2.00000 0.00000, 0.00000 0.00000, 0....
    4       2.0       NaN  MULTIPOLYGON (((3.00000 3.00000, 4.00000 3.000...
    5       NaN       1.0  MULTIPOLYGON (((2.00000 2.00000, 3.00000 2.000...
    6       NaN       2.0  POLYGON ((3.00000 5.00000, 5.00000 5.00000, 5....

    >>> geopandas.overlay(df1, df2, how='intersection')
       df1_data  df2_data                                           geometry
    0         1         1  POLYGON ((2.00000 2.00000, 2.00000 1.00000, 1....
    1         2         1  POLYGON ((2.00000 2.00000, 2.00000 3.00000, 3....
    2         2         2  POLYGON ((4.00000 4.00000, 4.00000 3.00000, 3....

    >>> geopandas.overlay(df1, df2, how='symmetric_difference')
       df1_data  df2_data                                           geometry
    0       1.0       NaN  POLYGON ((2.00000 0.00000, 0.00000 0.00000, 0....
    1       2.0       NaN  MULTIPOLYGON (((3.00000 3.00000, 4.00000 3.000...
    2       NaN       1.0  MULTIPOLYGON (((2.00000 2.00000, 3.00000 2.000...
    3       NaN       2.0  POLYGON ((3.00000 5.00000, 5.00000 5.00000, 5....

    >>> geopandas.overlay(df1, df2, how='difference')
                                            geometry  df1_data
    0  POLYGON ((2.00000 0.00000, 0.00000 0.00000, 0....         1
    1  MULTIPOLYGON (((3.00000 3.00000, 4.00000 3.000...         2

    >>> geopandas.overlay(df1, df2, how='identity')
       df1_data  df2_data                                           geometry
    0       1.0       1.0  POLYGON ((2.00000 2.00000, 2.00000 1.00000, 1....
    1       2.0       1.0  POLYGON ((2.00000 2.00000, 2.00000 3.00000, 3....
    2       2.0       2.0  POLYGON ((4.00000 4.00000, 4.00000 3.00000, 3....
    3       1.0       NaN  POLYGON ((2.00000 0.00000, 0.00000 0.00000, 0....
    4       2.0       NaN  MULTIPOLYGON (((3.00000 3.00000, 4.00000 3.000...

    See also
    --------
    sjoin : spatial join
    GeoDataFrame.overlay : equivalent method

    Notes
    ------
    Every operation in GeoPandas is planar, i.e. the potential third
    dimension is not taken into account.
    """
    # Allowed operations
    allowed_hows = [
        "intersection",
        "union",
        "identity",
        "symmetric_difference",
        "difference",  # aka erase
    ]
    # Error Messages
    if how not in allowed_hows:
        raise ValueError(
            "`how` was '{0}' but is expected to be in {1}".format(how, allowed_hows)
        )

    if isinstance(df1, GeoSeries) or isinstance(df2, GeoSeries):
        raise NotImplementedError(
            "overlay currently only implemented for " "GeoDataFrames"
        )

    if not _check_crs(df1, df2):
        _crs_mismatch_warn(df1, df2, stacklevel=3)

    if keep_geom_type is None:
        keep_geom_type = True
        keep_geom_type_warning = True
    else:
        keep_geom_type_warning = False

    polys = ["Polygon", "MultiPolygon"]
    lines = ["LineString", "MultiLineString", "LinearRing"]
    points = ["Point", "MultiPoint"]
    for i, df in enumerate([df1, df2]):
        poly_check = df.geom_type.isin(polys).any()
        lines_check = df.geom_type.isin(lines).any()
        points_check = df.geom_type.isin(points).any()
        if sum([poly_check, lines_check, points_check]) > 1:
            raise NotImplementedError(
                "df{} contains mixed geometry types.".format(i + 1)
            )

    if how == "intersection":
        box_gdf1 = df1.total_bounds
        box_gdf2 = df2.total_bounds

        if not (
            ((box_gdf1[0] <= box_gdf2[2]) and (box_gdf2[0] <= box_gdf1[2]))
            and ((box_gdf1[1] <= box_gdf2[3]) and (box_gdf2[1] <= box_gdf1[3]))
        ):
            result = df1.iloc[:0].merge(
                df2.iloc[:0].drop(df2.geometry.name, axis=1),
                left_index=True,
                right_index=True,
                suffixes=("_1", "_2"),
            )
            return result[
                result.columns.drop(df1.geometry.name).tolist() + [df1.geometry.name]
            ]

    # Computations
    def _make_valid(df):
        df = df.copy()
        if df.geom_type.isin(polys).all():
            mask = ~df.geometry.is_valid
            col = df._geometry_column_name
            if make_valid:
                df.loc[mask, col] = df.loc[mask, col].buffer(0)
            elif mask.any():
                raise ValueError(
                    "You have passed make_valid=False along with "
                    f"{mask.sum()} invalid input geometries. "
                    "Use make_valid=True or make sure that all geometries "
                    "are valid before using overlay."
                )
        return df

    df1 = _make_valid(df1)
    df2 = _make_valid(df2)

    with warnings.catch_warnings():  # CRS checked above, suppress array-level warning
        warnings.filterwarnings("ignore", message="CRS mismatch between the CRS")
        if how == "difference":
            result = _overlay_difference(df1, df2)
        elif how == "intersection":
            result = _overlay_intersection(df1, df2)
        elif how == "symmetric_difference":
            result = _overlay_symmetric_diff(df1, df2)
        elif how == "union":
            result = _overlay_union(df1, df2)
        elif how == "identity":
            dfunion = _overlay_union(df1, df2)
            result = dfunion[dfunion["__idx1"].notnull()].copy()

        if how in ["intersection", "symmetric_difference", "union", "identity"]:
            result.drop(["__idx1", "__idx2"], axis=1, inplace=True)

    if keep_geom_type:
        geom_type = df1.geom_type.iloc[0]

        # First we filter the geometry types inside GeometryCollections objects
        # (e.g. GeometryCollection([polygon, point]) -> polygon)
        # we do this separately on only the relevant rows, as this is an expensive
        # operation (an expensive no-op for geometry types other than collections)
        is_collection = result.geom_type == "GeometryCollection"
        if is_collection.any():
            geom_col = result._geometry_column_name
            collections = result[[geom_col]][is_collection]

            exploded = collections.reset_index(drop=True).explode(index_parts=True)
            exploded = exploded.reset_index(level=0)

            orig_num_geoms_exploded = exploded.shape[0]
            if geom_type in polys:
                exploded.loc[~exploded.geom_type.isin(polys), geom_col] = None
            elif geom_type in lines:
                exploded.loc[~exploded.geom_type.isin(lines), geom_col] = None
            elif geom_type in points:
                exploded.loc[~exploded.geom_type.isin(points), geom_col] = None
            else:
                raise TypeError(
                    "`keep_geom_type` does not support {}.".format(geom_type)
                )
            num_dropped_collection = (
                orig_num_geoms_exploded - exploded.geometry.isna().sum()
            )

            # level_0 created with above reset_index operation
            # and represents the original geometry collections
            # TODO avoiding dissolve to call unary_union in this case could further
            # improve performance (we only need to collect geometries in their
            # respective Multi version)
            dissolved = exploded.dissolve(by="level_0")
            result.loc[is_collection, geom_col] = dissolved[geom_col].values
        else:
            num_dropped_collection = 0

        # Now we filter all geometries (in theory we don't need to do this
        # again for the rows handled above for GeometryCollections, but filtering
        # them out is probably more expensive as simply including them when this
        # is typically about only a few rows)
        orig_num_geoms = result.shape[0]
        if geom_type in polys:
            result = result.loc[result.geom_type.isin(polys)]
        elif geom_type in lines:
            result = result.loc[result.geom_type.isin(lines)]
        elif geom_type in points:
            result = result.loc[result.geom_type.isin(points)]
        else:
            raise TypeError("`keep_geom_type` does not support {}.".format(geom_type))
        num_dropped = orig_num_geoms - result.shape[0]

        if (num_dropped > 0 or num_dropped_collection > 0) and keep_geom_type_warning:
            warnings.warn(
                "`keep_geom_type=True` in overlay resulted in {} dropped "
                "geometries of different geometry types than df1 has. "
                "Set `keep_geom_type=False` to retain all "
                "geometries".format(num_dropped + num_dropped_collection),
                UserWarning,
                stacklevel=2,
            )

    result.reset_index(drop=True, inplace=True)
    return result