def export_shp( tablename: str, output_path: Path = GDRIVE_FOLDER, db: Database = _db, include_geoids: bool = True, ) -> None: """ Export a spatial table from SQL to a shapefile. If include_geoids = True, it will include geographic identifiers by joining the projects to blocks, and then to a lookup table. """ output_folder = output_path / "Outputs" export_args = { "filetype": "shp", } if include_geoids: export_args["table_or_sql"] = f""" with points_and_blockids as ( select p.*, b.geoid as block_geoid from {tablename} p left join blocks b on st_within(p.geom, b.geom) ) select p.*, b.* from points_and_blockids p left join blocktogeos_lookup b on b.block10::text = p.block_geoid """ export_args[ "filepath"] = output_folder / f"{tablename}_with_geoids.shp" else: export_args["table_or_sql"] = tablename export_args["filepath"] = output_folder / f"{tablename}.shp" print("-" * 80) print(f"Exporting {tablename} to {export_args['filepath']}") db.export_gis(**export_args)
def import_data(db: Database = _db, folder: Path = GDRIVE_FOLDER) -> None: """ Import all shapefiles and CSV files within the '/Input/' subfolder, if they have not yet been uploaded. """ # Ensure that the database exists db.admin("create") print("-" * 80) print("Importing shapefiles:") for shp in folder.rglob("Inputs/*.shp"): sql_tablename = clean_name_for_sql(shp) if f"public.{sql_tablename}" not in db.tables(spatial_only=True): print("\t ->", sql_tablename) db.import_gis(method="shp2pgsql", srid=26918, filepath=shp, sql_tablename=sql_tablename) print("Importing CSV files:") for csvfile in folder.rglob("Inputs/*.csv"): sql_tablename = clean_name_for_sql(csvfile) if f"public.{sql_tablename}" not in db.tables(): print("\t ->", sql_tablename) db.import_file_with_pandas(csvfile, sql_tablename)
def local_db(): """ Spin up a local db, use it in the test, then drop it """ db = Database.from_config("pytest", "localhost") db.admin("CREATE") yield db db.admin("DROP")
def test_geopandas_imports_spatial_data_from_disk(local_db: Database, downloaded_shapefile): """ Using geopandas: Confirm that we have a spatial table with proper projection in the DB after importing a local shapefile """ unzipped_shp_file_name = downloaded_shapefile sql_tablename = "test.neighborhoods_gpd" local_db.import_gis( method="geopandas", filepath=str(unzipped_shp_file_name) + ".shp", sql_tablename=sql_tablename, ) # Confirm the spatial table exists assert sql_tablename in local_db.tables(spatial_only=True) # Confirm that the EPSG is correct assert 2272 == local_db.projection(sql_tablename)
def _find_latest_project_table(db: Database = _db) -> str: """ You may have multiple development ('project') tables. This function finds the one with the most recent datestamp and returns the name of the table """ query = """ select table_name from information_schema.tables where table_name like 'all_devprojects%%' order by table_name desc limit 1 """ return db.query_as_singleton(query)
def local_db_with_spatial_data(downloaded_shapefile): """ Spin up a local db, use it in the test, then drop it """ db = Database.from_config("pytest", "localhost") db.admin("CREATE") sql_tablename = "test.neighborhoods_gpd" db.import_gis( method="geopandas", filepath=str(downloaded_shapefile) + ".shp", sql_tablename=sql_tablename, ) yield db db.admin("DROP")
def test_that_spatial_table_is_inside_database(local_db_with_spatial_data: Database): assert "test.neighborhoods_gpd" in local_db_with_spatial_data.tables(spatial_only=True)
def test_that_existing_database_exists(localhost_postgres: Database): """ This database definitely exists already """ assert localhost_postgres.exists() is True
def localhost_postgres(): yield Database.from_config("postgres", "localhost")
import os from pathlib import Path from dotenv import load_dotenv, find_dotenv from pg_data_etl import Database load_dotenv(find_dotenv()) GDRIVE_FOLDER = Path(os.getenv("GOOGLE_DRIVE_ROOT")) _credentials = { "host": os.getenv("DB_HOST"), "port": os.getenv("DB_PORT"), "un": os.getenv("DB_USER"), "pw": os.getenv("DB_PW"), "super_un": os.getenv("DB_SUPER_UN") or "postgres", "super_pw": os.getenv("DB_SUPER_PW") or os.getenv("DB_PW"), } _db_name = os.getenv("DB_NAME") _db = Database.from_parameters(_db_name, **_credentials)
def projects_to_parcels( project_table: str = "all_devprojects_2021_03_15", parcel_table: str = "parcels", db: Database = _db, year_filter: Union[str, bool] = False, ) -> None: """ If you want to limit to developments starting in a certain year, provide a SQL-valid year_filter argument as a string. For example: year_filter = 'start_year > 2020' """ # Parse the date out of the project table name # Turn "all_devprojects_2021_03_15" into "2021_03_15" project_table_date = "_".join(project_table.split("_")[-3:]) new_project_table = f"projects_{project_table_date}" # Create a new version of the project table with only the projects we want to analyze projects_to_map_query = f""" select project_id, name, address, parcel_id, building_type, start_year, residential_units, non_res_sqft, tags, duration from {project_table} where (tags like '%%NineCoDur%%' or tags like '%%bethel_concord%%' or tags like '%%Mercer_Affordable_Other%%' ) """ # Add the year filter to the SQL code and the new table name if year_filter: projects_to_map_query += f""" AND {year_filter} """ year_filter_table_name = year_filter.replace(" ", "_") for old_char, new_char in [("<", "lt"), (">", "gt"), ("=", "e")]: if old_char in year_filter_table_name: year_filter_table_name = year_filter_table_name.replace(old_char, new_char) new_project_table += f"_{year_filter_table_name}" print("-" * 80) print(f"Applying parcel geometries to {new_project_table}") create_table_query = f""" drop table if exists {new_project_table}; create table {new_project_table} as ( {projects_to_map_query} ) """ print(create_table_query) db.execute(create_table_query) # Add a 'geom' column to hold POINT data db.execute(f"select addgeometrycolumn('{new_project_table}', 'geom', 26918, 'POINT', 2)") # Loop over every project and assign an appropriate point location to the 'geom' column parcel_ids = db.query_as_list_of_singletons(f"SELECT parcel_id FROM {new_project_table}") for pid in tqdm(parcel_ids, total=len(parcel_ids)): if ";" not in pid: query = f""" update {new_project_table} set geom = ( select st_centroid(geom) from {parcel_table} where primary_id = {pid} ) where parcel_id = '{pid}' """ else: query = f""" update {new_project_table} set geom = ( select st_centroid(st_union(geom)) from {parcel_table} where primary_id in (select unnest(string_to_array('{pid}', ';'))::int) ) where parcel_id = '{pid}' """ db.execute(query)
- transit link volumes - stop-level boardings by mode - 24hr origin-destination flows to/from the study area """ import os import pandas as pd from dotenv import load_dotenv, find_dotenv from pg_data_etl import Database from fy21_university_city import GDRIVE_FOLDER load_dotenv(find_dotenv()) DATABASE_URL = os.getenv("FY21_UCITY_DB") db = Database.from_uri(DATABASE_URL) result_folder = GDRIVE_FOLDER / "GIS/Results_Data_for_Aaron" output_folder = GDRIVE_FOLDER / "GIS/Results_Data_for_Aaron_Crunched" def import_geodata(): """ Import all shapefiles that accompany the tabular model outputs """ # Import shapefiles shapefiles_to_import = result_folder.rglob("*.SHP") for shp_path in shapefiles_to_import: tablename = shp_path.stem.lower()
"Bucks County", "Chester County", "Delaware County", "Montgomery County", "Philadelphia County", ] NJ_COUNTIES = [ "Burlington County", "Camden County", "Gloucester County", "Mercer County", ] if __name__ == "__main__": db = Database.from_config("aopp_grant", "localhost") db.admin("CREATE") # Read tabular data and filter to eligible tracts in our region before writing to database data_source_folder = Path( "/Volumes/GoogleDrive/Shared drives/Sidewalk Gap Analysis/GIS data/AOPP-analysis" ) df = pd.read_csv(data_source_folder / "inputs/RAISE_Persistent_Poverty.csv") # Filter to the county/state combinations we care about pa_tracts = df[(df["A. State"] == "Pennsylvania") & (df["B. County"].isin(PA_COUNTIES))] nj_tracts = df[(df["A. State"] == "New Jersey") & (df["B. County"].isin(NJ_COUNTIES))]