Python Database Examples, pg_data_etl.Database Python Examples

Example #1

0

Show file

def export_shp(
    tablename: str,
    output_path: Path = GDRIVE_FOLDER,
    db: Database = _db,
    include_geoids: bool = True,
) -> None:
    """
    Export a spatial table from SQL to a shapefile.

    If include_geoids = True, it will include geographic identifiers
    by joining the projects to blocks, and then to a lookup table.
    """

    output_folder = output_path / "Outputs"

    export_args = {
        "filetype": "shp",
    }

    if include_geoids:
        export_args["table_or_sql"] = f"""
            with points_and_blockids as (
                select
                    p.*,
                    b.geoid as block_geoid
                from
                    {tablename} p
                left join
                    blocks b
                on
                    st_within(p.geom, b.geom)
            )
            select
                p.*,
                b.*
            from
                points_and_blockids p
            left join
                blocktogeos_lookup b
            on
                b.block10::text = p.block_geoid
        """
        export_args[
            "filepath"] = output_folder / f"{tablename}_with_geoids.shp"

    else:
        export_args["table_or_sql"] = tablename
        export_args["filepath"] = output_folder / f"{tablename}.shp"

    print("-" * 80)
    print(f"Exporting {tablename} to {export_args['filepath']}")

    db.export_gis(**export_args)

Example #2

0

Show file

def import_data(db: Database = _db, folder: Path = GDRIVE_FOLDER) -> None:
    """
    Import all shapefiles and CSV files within the '/Input/' subfolder,
    if they have not yet been uploaded.
    """

    # Ensure that the database exists
    db.admin("create")

    print("-" * 80)
    print("Importing shapefiles:")

    for shp in folder.rglob("Inputs/*.shp"):
        sql_tablename = clean_name_for_sql(shp)

        if f"public.{sql_tablename}" not in db.tables(spatial_only=True):
            print("\t ->", sql_tablename)
            db.import_gis(method="shp2pgsql",
                          srid=26918,
                          filepath=shp,
                          sql_tablename=sql_tablename)

    print("Importing CSV files:")

    for csvfile in folder.rglob("Inputs/*.csv"):
        sql_tablename = clean_name_for_sql(csvfile)

        if f"public.{sql_tablename}" not in db.tables():
            print("\t ->", sql_tablename)
            db.import_file_with_pandas(csvfile, sql_tablename)

Example #3

0

Show file

File: conftest.py Project: aaronfraint/pg-data-etl

def local_db():
    """ Spin up a local db, use it in the test, then drop it """

    db = Database.from_config("pytest", "localhost")

    db.admin("CREATE")

    yield db

    db.admin("DROP")

Example #4

0

Show file

File: test_gis_import.py Project: aaronfraint/pg-data-etl

def test_geopandas_imports_spatial_data_from_disk(local_db: Database,
                                                  downloaded_shapefile):
    """
    Using geopandas:
        Confirm that we have a spatial table with proper projection in the DB
        after importing a local shapefile
    """

    unzipped_shp_file_name = downloaded_shapefile

    sql_tablename = "test.neighborhoods_gpd"

    local_db.import_gis(
        method="geopandas",
        filepath=str(unzipped_shp_file_name) + ".shp",
        sql_tablename=sql_tablename,
    )

    # Confirm the spatial table exists
    assert sql_tablename in local_db.tables(spatial_only=True)

    # Confirm that the EPSG is correct
    assert 2272 == local_db.projection(sql_tablename)

Example #5

0

Show file

def _find_latest_project_table(db: Database = _db) -> str:
    """
    You may have multiple development ('project') tables.
    This function finds the one with the most recent datestamp
    and returns the name of the table
    """

    query = """
        select table_name 
        from information_schema.tables
        where table_name like 'all_devprojects%%'
        order by table_name desc limit 1
    """

    return db.query_as_singleton(query)

Example #6

0

Show file

File: conftest.py Project: aaronfraint/pg-data-etl

def local_db_with_spatial_data(downloaded_shapefile):
    """ Spin up a local db, use it in the test, then drop it """

    db = Database.from_config("pytest", "localhost")

    db.admin("CREATE")

    sql_tablename = "test.neighborhoods_gpd"

    db.import_gis(
        method="geopandas",
        filepath=str(downloaded_shapefile) + ".shp",
        sql_tablename=sql_tablename,
    )

    yield db

    db.admin("DROP")

Example #7

0

Show file

File: test_existing.py Project: aaronfraint/pg-data-etl

def test_that_spatial_table_is_inside_database(local_db_with_spatial_data: Database):
    assert "test.neighborhoods_gpd" in local_db_with_spatial_data.tables(spatial_only=True)

Example #8

0

Show file

File: test_existing.py Project: aaronfraint/pg-data-etl

def test_that_existing_database_exists(localhost_postgres: Database):
    """ This database definitely exists already """

    assert localhost_postgres.exists() is True

Example #9

0

Show file

File: conftest.py Project: aaronfraint/pg-data-etl

def localhost_postgres():
    yield Database.from_config("postgres", "localhost")

Example #10

0

Show file

File: db.py Project: dvrpc/urbansim-data

import os
from pathlib import Path
from dotenv import load_dotenv, find_dotenv

from pg_data_etl import Database


load_dotenv(find_dotenv())

GDRIVE_FOLDER = Path(os.getenv("GOOGLE_DRIVE_ROOT"))

_credentials = {
    "host": os.getenv("DB_HOST"),
    "port": os.getenv("DB_PORT"),
    "un": os.getenv("DB_USER"),
    "pw": os.getenv("DB_PW"),
    "super_un": os.getenv("DB_SUPER_UN") or "postgres",
    "super_pw": os.getenv("DB_SUPER_PW") or os.getenv("DB_PW"),
}

_db_name = os.getenv("DB_NAME")

_db = Database.from_parameters(_db_name, **_credentials)

Example #11

0

Show file

def projects_to_parcels(
    project_table: str = "all_devprojects_2021_03_15",
    parcel_table: str = "parcels",
    db: Database = _db,
    year_filter: Union[str, bool] = False,
) -> None:
    """
    If you want to limit to developments starting in a certain year,
    provide a SQL-valid year_filter argument as a string.
    For example:
        year_filter = 'start_year > 2020'
    """

    # Parse the date out of the project table name
    # Turn "all_devprojects_2021_03_15" into "2021_03_15"
    project_table_date = "_".join(project_table.split("_")[-3:])
    new_project_table = f"projects_{project_table_date}"

    # Create a new version of the project table with only the projects we want to analyze
    projects_to_map_query = f"""
        select
            project_id,
            name,
            address,
            parcel_id,
            building_type,
            start_year,
            residential_units,
            non_res_sqft,
            tags,
            duration
        from {project_table}
        where
            (tags like '%%NineCoDur%%'
            or
                tags like '%%bethel_concord%%'
            or
                tags like '%%Mercer_Affordable_Other%%'
            )
    """

    # Add the year filter to the SQL code and the new table name
    if year_filter:

        projects_to_map_query += f"""
            AND {year_filter}
        """

        year_filter_table_name = year_filter.replace(" ", "_")
        for old_char, new_char in [("<", "lt"), (">", "gt"), ("=", "e")]:
            if old_char in year_filter_table_name:
                year_filter_table_name = year_filter_table_name.replace(old_char, new_char)
        new_project_table += f"_{year_filter_table_name}"

    print("-" * 80)
    print(f"Applying parcel geometries to {new_project_table}")

    create_table_query = f"""
    drop table if exists {new_project_table};
    create table {new_project_table} as (
        {projects_to_map_query}
    )
    """
    print(create_table_query)
    db.execute(create_table_query)

    # Add a 'geom' column to hold POINT data
    db.execute(f"select addgeometrycolumn('{new_project_table}', 'geom', 26918, 'POINT', 2)")

    # Loop over every project and assign an appropriate point location to the 'geom' column
    parcel_ids = db.query_as_list_of_singletons(f"SELECT parcel_id FROM {new_project_table}")

    for pid in tqdm(parcel_ids, total=len(parcel_ids)):

        if ";" not in pid:
            query = f"""
                update {new_project_table} set geom = (
                    select st_centroid(geom) from {parcel_table} where primary_id = {pid}
                )
                where parcel_id = '{pid}'
            """
        else:
            query = f"""
                update {new_project_table} set geom = (
                    select st_centroid(st_union(geom)) from {parcel_table}
                    where primary_id in (select unnest(string_to_array('{pid}', ';'))::int)
                )
                where parcel_id = '{pid}'
            """

        db.execute(query)

Example #12

0

Show file

File: process_model_results.py Project: dvrpc/project-data-science-code

    - transit link volumes
    - stop-level boardings by mode
    - 24hr origin-destination flows to/from the study area
"""

import os
import pandas as pd
from dotenv import load_dotenv, find_dotenv
from pg_data_etl import Database

from fy21_university_city import GDRIVE_FOLDER

load_dotenv(find_dotenv())

DATABASE_URL = os.getenv("FY21_UCITY_DB")
db = Database.from_uri(DATABASE_URL)

result_folder = GDRIVE_FOLDER / "GIS/Results_Data_for_Aaron"
output_folder = GDRIVE_FOLDER / "GIS/Results_Data_for_Aaron_Crunched"


def import_geodata():
    """
    Import all shapefiles that accompany the tabular model outputs
    """

    # Import shapefiles
    shapefiles_to_import = result_folder.rglob("*.SHP")

    for shp_path in shapefiles_to_import:
        tablename = shp_path.stem.lower()

Example #13

0

Show file

File: aopp_grant.py Project: dvrpc/project-data-science-code

    "Bucks County",
    "Chester County",
    "Delaware County",
    "Montgomery County",
    "Philadelphia County",
]
NJ_COUNTIES = [
    "Burlington County",
    "Camden County",
    "Gloucester County",
    "Mercer County",
]

if __name__ == "__main__":

    db = Database.from_config("aopp_grant", "localhost")
    db.admin("CREATE")

    # Read tabular data and filter to eligible tracts in our region before writing to database
    data_source_folder = Path(
        "/Volumes/GoogleDrive/Shared drives/Sidewalk Gap Analysis/GIS data/AOPP-analysis"
    )

    df = pd.read_csv(data_source_folder /
                     "inputs/RAISE_Persistent_Poverty.csv")

    # Filter to the county/state combinations we care about
    pa_tracts = df[(df["A. State"] == "Pennsylvania")
                   & (df["B. County"].isin(PA_COUNTIES))]
    nj_tracts = df[(df["A. State"] == "New Jersey")
                   & (df["B. County"].isin(NJ_COUNTIES))]