def export_data(db: PostgreSQL):
    """
    Using the prepared data, extract a point shapefile to disk.
    Geometry is the intersection nodes that are near bus stops.
    Attributes include the total number of weekday boardings, alightings,
    and a list of the bus routes and stop IDs that contributed to the sums.
    """

    # Aggregate boardings/alightings by street node id and join to geometry
    join_query = """
        with ridership_data as (
            select
                nearest_node,
                sum(weekday_bo) as boardings,
                sum(weekday_le) as alightings,
                array_agg(route) as routes,
                array_agg(gid) as stopgid
            from
                septa_bus_stops_fall_2019
            where
                nearest_node is not null
            group by
                nearest_node
        )
        select
            row_number() over() as uid,
            d.*,
            n.geom
        from
            ridership_data d
        left join
            street_nodes n
        on
            n.streetnodeid = d.nearest_node
    """
    gdf = db.query_as_geo_df(join_query)

    # The SQL array_agg() gets translated into a list by geopandas
    # and shapefiles don't support this data type
    # so instead, convert list to comma-delimited-list first
    for col in ["routes", "stopgid"]:
        gdf[col] = gdf[col].apply(lambda x: ",".join(map(str, x)))

    # Write to disk
    gdf.to_file(GIS_FOLDER / "bus_stops_with_ridership_agg.shp")
def write_query_to_geojson(filename: str, query: str, db: PostgreSQL):
    """
        Write SQL query out to geojson file on disk.
    """

    # Put into Google Drive, if configured
    if FOLDER_DATA_PRODUCTS:
        output_filepath = FOLDER_DATA_PRODUCTS / f"{filename}.geojson"

    # Otherwise just drop it into the active directory
    else:
        output_filepath = f"{filename}.geojson"

    # Extract geodataframe from SQL
    df = db.query_as_geo_df(query)

    # Save to file
    df.to_file(output_filepath, driver="GeoJSON")
Beispiel #3
0
def import_production_sql_data(remote_db: PostgreSQL, local_db: PostgreSQL):
    """
    Copy data from DVRPC's production SQL database to a SQL database on localhost.

    Note - this connection will only work within the firewall.
    """

    data_to_download = [
        ("transportation", ["pedestriannetwork_lines",
                            "pedestriannetwork_points",
                            "pa_centerline",
                            "CircuitTrails",
                            ]),

        ("structure", ["points_of_interest"]),

        ("boundaries", ["municipalboundaries"]),
    ]

    for schema, table_list in data_to_download:

        for table_name in table_list:

            # Extract the data from the remote database and rename the geom column
            query = f"SELECT * FROM {schema}.{table_name};"
            gdf = remote_db.query_as_geo_df(query, geom_col="shape")
            gdf = gdf.rename(columns={"shape": "geometry"}).set_geometry("geometry")

            # Check if we have multipart geometries.
            # If so, explode them (but keep the index)
            multipart = False
            for geom_type in gdf.geom_type.unique():
                if "Multi" in geom_type:
                    multipart = True

            if multipart:
                print(f"EXPLODING {table_name}")
                gdf = gdf.explode()
                gdf['explode'] = gdf.index
                gdf = gdf.reset_index()

            # Save to the local database
            local_db.import_geodataframe(gdf, table_name.lower())