def test_sjoin_nearest_left( self, geo_left, geo_right, expected_left: Sequence[int], expected_right: Sequence[int], distances: Sequence[float], how, ): left = geopandas.GeoDataFrame({"geometry": geo_left}) right = geopandas.GeoDataFrame({"geometry": geo_right}) expected_gdf = left.iloc[expected_left].copy() expected_gdf["index_right"] = expected_right # without distance col joined = sjoin_nearest(left, right, how=how) # inner / left join give a different row order check_like = how == "inner" assert_geodataframe_equal(expected_gdf, joined, check_like=check_like) # with distance col expected_gdf["distance_col"] = np.array(distances, dtype=float) joined = sjoin_nearest(left, right, how=how, distance_col="distance_col") assert_geodataframe_equal(expected_gdf, joined, check_like=check_like)
def test_sjoin_nearest_inner(self): # check equivalency of left and inner join countries = read_file(geopandas.datasets.get_path("naturalearth_lowres")) cities = read_file(geopandas.datasets.get_path("naturalearth_cities")) countries = countries[["geometry", "name"]].rename(columns={"name": "country"}) # default: inner and left give the same result result1 = sjoin_nearest(cities, countries, distance_col="dist") assert result1.shape[0] == cities.shape[0] result2 = sjoin_nearest(cities, countries, distance_col="dist", how="inner") assert_geodataframe_equal(result2, result1) result3 = sjoin_nearest(cities, countries, distance_col="dist", how="left") assert_geodataframe_equal(result3, result1, check_like=True) # with max_distance: rows that go above are dropped in case of inner result4 = sjoin_nearest(cities, countries, distance_col="dist", max_distance=1) assert_geodataframe_equal( result4, result1[result1["dist"] < 1], check_like=True ) result5 = sjoin_nearest( cities, countries, distance_col="dist", max_distance=1, how="left" ) assert result5.shape[0] == cities.shape[0] result5 = result5.dropna() result5["index_right"] = result5["index_right"].astype("int64") assert_geodataframe_equal(result5, result4, check_like=True)
def test_no_nearest_all(): df1 = geopandas.GeoDataFrame({"geometry": []}) df2 = geopandas.GeoDataFrame({"geometry": []}) with pytest.raises( NotImplementedError, match="Currently, only PyGEOS >= 0.10.0 supports `nearest_all`", ): sjoin_nearest(df1, df2)
def test_empty_left_df(self, how, distance_col: str): right = geopandas.GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1)]}) left = geopandas.GeoDataFrame({"geometry": []}) joined = sjoin_nearest(left, right, how=how, distance_col=distance_col) assert joined.empty if distance_col is not None: assert distance_col in joined
def test_sjoin_nearest_left( self, geo_left, geo_right, expected_left: Sequence[int], expected_right: Sequence[int], distances: Sequence[float], ): left = geopandas.GeoDataFrame({"geometry": geo_left}) right = geopandas.GeoDataFrame({"geometry": geo_right}) expected_gdf = left.iloc[expected_left] expected_gdf["index_right"] = expected_right # without distance col joined = sjoin_nearest(left, right, how="left") assert_geodataframe_equal(expected_gdf, joined) # with distance col expected_gdf["distance_col"] = np.array(distances, dtype=float) joined = sjoin_nearest(left, right, how="left", distance_col="distance_col") assert_geodataframe_equal(expected_gdf, joined)
def test_empty_right_df_how_right(self, distance_col: str): # no records in joined left = geopandas.GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1)]}) right = geopandas.GeoDataFrame({"geometry": []}) joined = sjoin_nearest( left, right, how="right", distance_col=distance_col, ) assert joined.empty if distance_col is not None: assert distance_col in joined
def test_empty_left_df_how_right(self, distance_col: str): right = geopandas.GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1)]}) left = geopandas.GeoDataFrame({"geometry": []}) joined = sjoin_nearest( left, right, how="right", distance_col=distance_col, ) assert_geoseries_equal(joined["geometry"], right["geometry"]) assert joined["index_left"].isna().all() if distance_col is not None: assert joined[distance_col].isna().all()
def test_empty_right_df_how_left(self, distance_col: str): # all records from left and no results from right left = geopandas.GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1)]}) right = geopandas.GeoDataFrame({"geometry": []}) joined = sjoin_nearest( left, right, how="left", distance_col=distance_col, ) assert_geoseries_equal(joined["geometry"], left["geometry"]) assert joined["index_right"].isna().all() if distance_col is not None: assert joined[distance_col].isna().all()
def test_max_distance_how_left(self): left = geopandas.GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1)]}) right = geopandas.GeoDataFrame({"geometry": [Point(1, 1), Point(2, 2)]}) joined = sjoin_nearest( left, right, how="left", max_distance=1, distance_col="distances", ) expected = left.copy() expected["index_right"] = [np.nan, 0] expected["distances"] = [np.nan, 0] assert_geodataframe_equal(joined, expected)
def test_empty_join_due_to_max_distance_how_left(self): # after applying max_distance the join comes back empty # (as in NaN in the joined columns) left = geopandas.GeoDataFrame({"geometry": [Point(0, 0)]}) right = geopandas.GeoDataFrame({"geometry": [Point(1, 1), Point(2, 2)]}) joined = sjoin_nearest( left, right, how="left", max_distance=1, distance_col="distances", ) expected = left.copy() expected["index_right"] = [np.nan] expected["distances"] = [np.nan] assert_geodataframe_equal(joined, expected)
def test_max_distance(self, how): left = geopandas.GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1)]}) right = geopandas.GeoDataFrame({"geometry": [Point(1, 1), Point(2, 2)]}) joined = sjoin_nearest( left, right, how=how, max_distance=1, distance_col="distances", ) expected = left.copy() expected["index_right"] = [np.nan, 0] expected["distances"] = [np.nan, 0] if how == "inner": expected = expected.dropna() expected["index_right"] = expected["index_right"].astype("int64") assert_geodataframe_equal(joined, expected)
def test_sjoin_nearest(self, how, max_distance, distance_col): """ Basic test for availability of the GeoDataFrame method. Other sjoin tests are located in /tools/tests/test_sjoin.py """ left = read_file(geopandas.datasets.get_path("naturalearth_cities")) right = read_file(geopandas.datasets.get_path("naturalearth_lowres")) expected = geopandas.sjoin_nearest(left, right, how=how, max_distance=max_distance, distance_col=distance_col) result = left.sjoin_nearest(right, how=how, max_distance=max_distance, distance_col=distance_col) assert_geodataframe_equal(result, expected)
def test_empty_join_due_to_max_distance(self, how): # after applying max_distance the join comes back empty # (as in NaN in the joined columns) left = geopandas.GeoDataFrame({"geometry": [Point(0, 0)]}) right = geopandas.GeoDataFrame({"geometry": [Point(1, 1), Point(2, 2)]}) joined = sjoin_nearest( left, right, how=how, max_distance=1, distance_col="distances", ) expected = left.copy() expected["index_right"] = [np.nan] expected["distances"] = [np.nan] if how == "inner": expected = expected.dropna() expected["index_right"] = expected["index_right"].astype("int64") assert_geodataframe_equal(joined, expected)
def match_nodes_edges_to_countries(nodes,edges,countries): # assign iso code and continent name to each node nodes_matches = gpd.sjoin(nodes[["node_id","geometry"]], countries, how="left", predicate='within').reset_index() nodes_matches = nodes_matches[~nodes_matches["ISO_A3"].isna()] nodes_matches = nodes_matches[["node_id","ISO_A3","CONTINENT","geometry"]] nodes_matches.rename(columns={"ISO_A3":"iso_code","CONTINENT":"continent"},inplace=True) nodes_matches = nodes_matches.drop_duplicates(subset=["node_id"],keep="first") nodes_unmatched = nodes[~nodes["node_id"].isin(nodes_matches["node_id"].values.tolist())] nodes_unmatched = gpd.sjoin_nearest(nodes_unmatched[["node_id","geometry"]], countries, how="left").reset_index() nodes_unmatched = nodes_unmatched[["node_id","ISO_A3","CONTINENT","geometry"]] nodes_unmatched.rename(columns={"ISO_A3":"iso_code","CONTINENT":"continent"},inplace=True) nodes_unmatched = nodes_unmatched.drop_duplicates(subset=["node_id"],keep="first") nodes = pd.concat([nodes_matches,nodes_unmatched],axis=0,ignore_index=True) nodes = gpd.GeoDataFrame(nodes[["node_id","iso_code","continent","geometry"]],geometry="geometry",crs="EPSG:4326") # assign iso code and continent name to each edge edges = pd.merge(edges,nodes[["node_id","iso_code","continent"]],how="left",left_on=["from_node"],right_on=["node_id"]) edges.rename(columns={"iso_code":"from_iso","continent":"from_continent"},inplace=True) edges.drop("node_id",axis=1,inplace=True) edges = pd.merge(edges,nodes[["node_id","iso_code","continent"]],how="left",left_on=["to_node"],right_on=["node_id"]) edges.rename(columns={"iso_code":"to_iso","continent":"to_continent"},inplace=True) edges.drop("node_id",axis=1,inplace=True) nodes["old_node_id"] = nodes["node_id"] nodes["node_id"] = nodes.progress_apply(lambda x:f"{x.iso_code}_{x.node_id}",axis=1) edges["from_node"] = edges.progress_apply(lambda x:f"{x.from_iso}_{x.from_node}",axis=1) edges["to_node"] = edges.progress_apply(lambda x:f"{x.to_iso}_{x.to_node}",axis=1) edges["old_edge_id"] = edges["edge_id"] edges["edge_id"] = edges.progress_apply(lambda x:f"{x.from_iso}_{x.to_iso}_{x.edge_id}",axis=1) return nodes, edges
def test_invalid_hows(self, how: str): left = geopandas.GeoDataFrame({"geometry": []}) right = geopandas.GeoDataFrame({"geometry": []}) with pytest.raises(ValueError, match="`how` was"): sjoin_nearest(left, right, how=how)
def test_allowed_hows(self, how_kwargs): left = geopandas.GeoDataFrame({"geometry": []}) right = geopandas.GeoDataFrame({"geometry": []}) sjoin_nearest(left, right, **how_kwargs) # no error