def test_points(): x = np.arange(10).astype(np.float) y = np.arange(10).astype(np.float)**2 points = points_from_xy(x, y) assert isinstance(points, GeometryArray) for i in range(10): assert isinstance(points[i], shapely.geometry.Point) assert points[i].x == x[i] assert points[i].y == y[i]
def load_metro_areas_shapefile(): shpfile_path = ( CWD / "USA_Core_Based_Statistical_Area" # / "USA_Core_Based_Statistical_Area.shp" ) metro_areas = gpd.read_file(shpfile_path) metro_areas = metro_areas.to_crs(epsg=4326) corrected_metro_centroids = pd.read_csv(CWD.parent / "bin" / "msa_urban_centroids.csv") corrected_metro_centroids["CBSA_ID"] = corrected_metro_centroids[ "CBSA_ID"].astype("str") corrected_metro_centroids = corrected_metro_centroids.set_index("CBSA_ID") corrected_metro_centroids = gpd.GeoDataFrame( corrected_metro_centroids, geometry=points_from_xy( corrected_metro_centroids["msa_longitude"], corrected_metro_centroids["msa_latitude"], ), crs="EPSG:4326", ) metro_areas["center"] = find_centroid(metro_areas) metro_areas["corrected_center"] = metro_areas["CBSA_ID"].map( corrected_metro_centroids["geometry"]) metro_areas["msa_center"] = metro_areas["center"] metro_areas.loc[~metro_areas["corrected_center"].isna(), "center"] = metro_areas["corrected_center"] keep_cols = [ "CBSA_ID", "NAME", "CBSA_TYPE", "POPULATION", "center", "msa_center", "geometry", ] # metro_areas["geometry"] = metro_areas["center"] metro_areas = metro_areas.loc[:, keep_cols] metro_areas["metro_id"] = metro_areas["CBSA_ID"] metro_areas.columns = metro_areas.columns.str.lower() metro_areas["state"] = metro_areas["name"].str.split(", ").str[-1] metro_areas = metro_areas.loc[ ~metro_areas.state.isin(["AK", "HI", "PR"]), :] NY_Z_J_lon_lat = (-73.930488, 40.695448) NY_Z_K_lon_lat = (-73.008906, 40.840391) extra_metros = pd.DataFrame([["NY_Z_J", 1e6], ["NY_Z_K", 1e6]], columns=["metro_id", "population"]) extra_metros = gpd.GeoDataFrame( extra_metros, geometry=points_from_xy(*zip(NY_Z_J_lon_lat, NY_Z_K_lon_lat)), crs="EPSG:4326", ) extra_metros["center"] = extra_metros["geometry"] metro_areas = pd.concat([metro_areas, extra_metros], ignore_index=True, sort=False) return metro_areas