Ejemplo n.º 1
0
 def test_sjoin_nearest_left(
     self,
     geo_left,
     geo_right,
     expected_left: Sequence[int],
     expected_right: Sequence[int],
     distances: Sequence[float],
     how,
 ):
     left = geopandas.GeoDataFrame({"geometry": geo_left})
     right = geopandas.GeoDataFrame({"geometry": geo_right})
     expected_gdf = left.iloc[expected_left].copy()
     expected_gdf["index_right"] = expected_right
     # without distance col
     joined = sjoin_nearest(left, right, how=how)
     # inner / left join give a different row order
     check_like = how == "inner"
     assert_geodataframe_equal(expected_gdf, joined, check_like=check_like)
     # with distance col
     expected_gdf["distance_col"] = np.array(distances, dtype=float)
     joined = sjoin_nearest(left,
                            right,
                            how=how,
                            distance_col="distance_col")
     assert_geodataframe_equal(expected_gdf, joined, check_like=check_like)
Ejemplo n.º 2
0
    def test_sjoin_nearest_inner(self):
        # check equivalency of left and inner join
        countries = read_file(geopandas.datasets.get_path("naturalearth_lowres"))
        cities = read_file(geopandas.datasets.get_path("naturalearth_cities"))
        countries = countries[["geometry", "name"]].rename(columns={"name": "country"})

        # default: inner and left give the same result
        result1 = sjoin_nearest(cities, countries, distance_col="dist")
        assert result1.shape[0] == cities.shape[0]
        result2 = sjoin_nearest(cities, countries, distance_col="dist", how="inner")
        assert_geodataframe_equal(result2, result1)
        result3 = sjoin_nearest(cities, countries, distance_col="dist", how="left")
        assert_geodataframe_equal(result3, result1, check_like=True)

        # with max_distance: rows that go above are dropped in case of inner
        result4 = sjoin_nearest(cities, countries, distance_col="dist", max_distance=1)
        assert_geodataframe_equal(
            result4, result1[result1["dist"] < 1], check_like=True
        )
        result5 = sjoin_nearest(
            cities, countries, distance_col="dist", max_distance=1, how="left"
        )
        assert result5.shape[0] == cities.shape[0]
        result5 = result5.dropna()
        result5["index_right"] = result5["index_right"].astype("int64")
        assert_geodataframe_equal(result5, result4, check_like=True)
Ejemplo n.º 3
0
def test_no_nearest_all():
    df1 = geopandas.GeoDataFrame({"geometry": []})
    df2 = geopandas.GeoDataFrame({"geometry": []})
    with pytest.raises(
            NotImplementedError,
            match="Currently, only PyGEOS >= 0.10.0 supports `nearest_all`",
    ):
        sjoin_nearest(df1, df2)
Ejemplo n.º 4
0
 def test_empty_left_df(self, how, distance_col: str):
     right = geopandas.GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1)]})
     left = geopandas.GeoDataFrame({"geometry": []})
     joined = sjoin_nearest(left, right, how=how, distance_col=distance_col)
     assert joined.empty
     if distance_col is not None:
         assert distance_col in joined
Ejemplo n.º 5
0
 def test_sjoin_nearest_left(
     self,
     geo_left,
     geo_right,
     expected_left: Sequence[int],
     expected_right: Sequence[int],
     distances: Sequence[float],
 ):
     left = geopandas.GeoDataFrame({"geometry": geo_left})
     right = geopandas.GeoDataFrame({"geometry": geo_right})
     expected_gdf = left.iloc[expected_left]
     expected_gdf["index_right"] = expected_right
     # without distance col
     joined = sjoin_nearest(left, right, how="left")
     assert_geodataframe_equal(expected_gdf, joined)
     # with distance col
     expected_gdf["distance_col"] = np.array(distances, dtype=float)
     joined = sjoin_nearest(left, right, how="left", distance_col="distance_col")
     assert_geodataframe_equal(expected_gdf, joined)
Ejemplo n.º 6
0
 def test_empty_right_df_how_right(self, distance_col: str):
     # no records in joined
     left = geopandas.GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1)]})
     right = geopandas.GeoDataFrame({"geometry": []})
     joined = sjoin_nearest(
         left,
         right,
         how="right",
         distance_col=distance_col,
     )
     assert joined.empty
     if distance_col is not None:
         assert distance_col in joined
Ejemplo n.º 7
0
 def test_empty_left_df_how_right(self, distance_col: str):
     right = geopandas.GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1)]})
     left = geopandas.GeoDataFrame({"geometry": []})
     joined = sjoin_nearest(
         left,
         right,
         how="right",
         distance_col=distance_col,
     )
     assert_geoseries_equal(joined["geometry"], right["geometry"])
     assert joined["index_left"].isna().all()
     if distance_col is not None:
         assert joined[distance_col].isna().all()
Ejemplo n.º 8
0
 def test_empty_right_df_how_left(self, distance_col: str):
     # all records from left and no results from right
     left = geopandas.GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1)]})
     right = geopandas.GeoDataFrame({"geometry": []})
     joined = sjoin_nearest(
         left,
         right,
         how="left",
         distance_col=distance_col,
     )
     assert_geoseries_equal(joined["geometry"], left["geometry"])
     assert joined["index_right"].isna().all()
     if distance_col is not None:
         assert joined[distance_col].isna().all()
Ejemplo n.º 9
0
 def test_max_distance_how_left(self):
     left = geopandas.GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1)]})
     right = geopandas.GeoDataFrame({"geometry": [Point(1, 1), Point(2, 2)]})
     joined = sjoin_nearest(
         left,
         right,
         how="left",
         max_distance=1,
         distance_col="distances",
     )
     expected = left.copy()
     expected["index_right"] = [np.nan, 0]
     expected["distances"] = [np.nan, 0]
     assert_geodataframe_equal(joined, expected)
Ejemplo n.º 10
0
 def test_empty_join_due_to_max_distance_how_left(self):
     # after applying max_distance the join comes back empty
     # (as in NaN in the joined columns)
     left = geopandas.GeoDataFrame({"geometry": [Point(0, 0)]})
     right = geopandas.GeoDataFrame({"geometry": [Point(1, 1), Point(2, 2)]})
     joined = sjoin_nearest(
         left,
         right,
         how="left",
         max_distance=1,
         distance_col="distances",
     )
     expected = left.copy()
     expected["index_right"] = [np.nan]
     expected["distances"] = [np.nan]
     assert_geodataframe_equal(joined, expected)
Ejemplo n.º 11
0
 def test_max_distance(self, how):
     left = geopandas.GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1)]})
     right = geopandas.GeoDataFrame({"geometry": [Point(1, 1), Point(2, 2)]})
     joined = sjoin_nearest(
         left,
         right,
         how=how,
         max_distance=1,
         distance_col="distances",
     )
     expected = left.copy()
     expected["index_right"] = [np.nan, 0]
     expected["distances"] = [np.nan, 0]
     if how == "inner":
         expected = expected.dropna()
         expected["index_right"] = expected["index_right"].astype("int64")
     assert_geodataframe_equal(joined, expected)
Ejemplo n.º 12
0
    def test_sjoin_nearest(self, how, max_distance, distance_col):
        """
        Basic test for availability of the GeoDataFrame method. Other
        sjoin tests are located in /tools/tests/test_sjoin.py
        """
        left = read_file(geopandas.datasets.get_path("naturalearth_cities"))
        right = read_file(geopandas.datasets.get_path("naturalearth_lowres"))

        expected = geopandas.sjoin_nearest(left,
                                           right,
                                           how=how,
                                           max_distance=max_distance,
                                           distance_col=distance_col)
        result = left.sjoin_nearest(right,
                                    how=how,
                                    max_distance=max_distance,
                                    distance_col=distance_col)
        assert_geodataframe_equal(result, expected)
Ejemplo n.º 13
0
 def test_empty_join_due_to_max_distance(self, how):
     # after applying max_distance the join comes back empty
     # (as in NaN in the joined columns)
     left = geopandas.GeoDataFrame({"geometry": [Point(0, 0)]})
     right = geopandas.GeoDataFrame({"geometry": [Point(1, 1), Point(2, 2)]})
     joined = sjoin_nearest(
         left,
         right,
         how=how,
         max_distance=1,
         distance_col="distances",
     )
     expected = left.copy()
     expected["index_right"] = [np.nan]
     expected["distances"] = [np.nan]
     if how == "inner":
         expected = expected.dropna()
         expected["index_right"] = expected["index_right"].astype("int64")
     assert_geodataframe_equal(joined, expected)
Ejemplo n.º 14
0
def match_nodes_edges_to_countries(nodes,edges,countries):
    # assign iso code and continent name to each node
    nodes_matches = gpd.sjoin(nodes[["node_id","geometry"]],
                                countries, 
                                how="left", predicate='within').reset_index()
    nodes_matches = nodes_matches[~nodes_matches["ISO_A3"].isna()]
    nodes_matches = nodes_matches[["node_id","ISO_A3","CONTINENT","geometry"]]
    nodes_matches.rename(columns={"ISO_A3":"iso_code","CONTINENT":"continent"},inplace=True)
    nodes_matches = nodes_matches.drop_duplicates(subset=["node_id"],keep="first")
    
    nodes_unmatched = nodes[~nodes["node_id"].isin(nodes_matches["node_id"].values.tolist())]
    nodes_unmatched = gpd.sjoin_nearest(nodes_unmatched[["node_id","geometry"]],
                                countries, 
                                how="left").reset_index()
    nodes_unmatched = nodes_unmatched[["node_id","ISO_A3","CONTINENT","geometry"]]
    nodes_unmatched.rename(columns={"ISO_A3":"iso_code","CONTINENT":"continent"},inplace=True)
    nodes_unmatched = nodes_unmatched.drop_duplicates(subset=["node_id"],keep="first")

    nodes = pd.concat([nodes_matches,nodes_unmatched],axis=0,ignore_index=True)
    nodes = gpd.GeoDataFrame(nodes[["node_id","iso_code","continent","geometry"]],geometry="geometry",crs="EPSG:4326")
    
    # assign iso code and continent name to each edge
    edges = pd.merge(edges,nodes[["node_id","iso_code","continent"]],how="left",left_on=["from_node"],right_on=["node_id"])
    edges.rename(columns={"iso_code":"from_iso","continent":"from_continent"},inplace=True)
    edges.drop("node_id",axis=1,inplace=True)
    edges = pd.merge(edges,nodes[["node_id","iso_code","continent"]],how="left",left_on=["to_node"],right_on=["node_id"])
    edges.rename(columns={"iso_code":"to_iso","continent":"to_continent"},inplace=True)
    edges.drop("node_id",axis=1,inplace=True)

    nodes["old_node_id"] = nodes["node_id"]
    nodes["node_id"] = nodes.progress_apply(lambda x:f"{x.iso_code}_{x.node_id}",axis=1)
    edges["from_node"] = edges.progress_apply(lambda x:f"{x.from_iso}_{x.from_node}",axis=1)
    edges["to_node"] = edges.progress_apply(lambda x:f"{x.to_iso}_{x.to_node}",axis=1)
    edges["old_edge_id"] = edges["edge_id"]
    edges["edge_id"] = edges.progress_apply(lambda x:f"{x.from_iso}_{x.to_iso}_{x.edge_id}",axis=1)
    
    return nodes, edges
Ejemplo n.º 15
0
 def test_invalid_hows(self, how: str):
     left = geopandas.GeoDataFrame({"geometry": []})
     right = geopandas.GeoDataFrame({"geometry": []})
     with pytest.raises(ValueError, match="`how` was"):
         sjoin_nearest(left, right, how=how)
Ejemplo n.º 16
0
 def test_allowed_hows(self, how_kwargs):
     left = geopandas.GeoDataFrame({"geometry": []})
     right = geopandas.GeoDataFrame({"geometry": []})
     sjoin_nearest(left, right, **how_kwargs)  # no error