def export_data(db: PostgreSQL): """ Using the prepared data, extract a point shapefile to disk. Geometry is the intersection nodes that are near bus stops. Attributes include the total number of weekday boardings, alightings, and a list of the bus routes and stop IDs that contributed to the sums. """ # Aggregate boardings/alightings by street node id and join to geometry join_query = """ with ridership_data as ( select nearest_node, sum(weekday_bo) as boardings, sum(weekday_le) as alightings, array_agg(route) as routes, array_agg(gid) as stopgid from septa_bus_stops_fall_2019 where nearest_node is not null group by nearest_node ) select row_number() over() as uid, d.*, n.geom from ridership_data d left join street_nodes n on n.streetnodeid = d.nearest_node """ gdf = db.query_as_geo_df(join_query) # The SQL array_agg() gets translated into a list by geopandas # and shapefiles don't support this data type # so instead, convert list to comma-delimited-list first for col in ["routes", "stopgid"]: gdf[col] = gdf[col].apply(lambda x: ",".join(map(str, x))) # Write to disk gdf.to_file(GIS_FOLDER / "bus_stops_with_ridership_agg.shp")
def write_query_to_geojson(filename: str, query: str, db: PostgreSQL): """ Write SQL query out to geojson file on disk. """ # Put into Google Drive, if configured if FOLDER_DATA_PRODUCTS: output_filepath = FOLDER_DATA_PRODUCTS / f"{filename}.geojson" # Otherwise just drop it into the active directory else: output_filepath = f"{filename}.geojson" # Extract geodataframe from SQL df = db.query_as_geo_df(query) # Save to file df.to_file(output_filepath, driver="GeoJSON")
def import_production_sql_data(remote_db: PostgreSQL, local_db: PostgreSQL): """ Copy data from DVRPC's production SQL database to a SQL database on localhost. Note - this connection will only work within the firewall. """ data_to_download = [ ("transportation", ["pedestriannetwork_lines", "pedestriannetwork_points", "pa_centerline", "CircuitTrails", ]), ("structure", ["points_of_interest"]), ("boundaries", ["municipalboundaries"]), ] for schema, table_list in data_to_download: for table_name in table_list: # Extract the data from the remote database and rename the geom column query = f"SELECT * FROM {schema}.{table_name};" gdf = remote_db.query_as_geo_df(query, geom_col="shape") gdf = gdf.rename(columns={"shape": "geometry"}).set_geometry("geometry") # Check if we have multipart geometries. # If so, explode them (but keep the index) multipart = False for geom_type in gdf.geom_type.unique(): if "Multi" in geom_type: multipart = True if multipart: print(f"EXPLODING {table_name}") gdf = gdf.explode() gdf['explode'] = gdf.index gdf = gdf.reset_index() # Save to the local database local_db.import_geodataframe(gdf, table_name.lower())